math/big: use clearer loop bounds check elimination

Checking that the lengths are equal and panicking teaches the compiler
that it can assume “i in range for z” implies “i in range for x”, letting us
simplify the actual loops a bit.

It also turns up a few places in math/big that were playing maybe a little
too fast and loose with slice lengths. Update those to explicitly set all the
input slices to the same length.

These speedups are basically irrelevant, since they only happen
in real code if people are compiling with -tags math_big_pure_go.
But at least the code is clearer.

benchmark \ system                   c3h88    c2s16       s7      386   s7-386   c4as16      mac      arm  loong64  ppc64le  riscv64    s390x
AddVV/words=1/impl=go                    ~  +11.20%   +5.11%   -7.67%   -7.77%   +1.90%  +10.76%  -33.22%        ~  +10.98%        ~   +6.60%
AddVV/words=10/impl=go             -22.12%  -13.48%  -10.37%  -17.95%  -18.07%  -24.58%  -22.04%  -29.95%  -14.22%        ~   -6.33%   +3.66%
AddVV/words=16/impl=go              -9.75%  -13.73%        ~  -21.90%  -18.66%  -30.03%  -20.45%  -28.09%  -17.33%   -7.15%   -8.96%  +12.55%
AddVV/words=100/impl=go             -5.91%   -1.02%        ~  -29.23%  -22.18%  -25.62%   -6.49%  -23.59%  -22.31%   -1.88%  -14.13%   +9.23%
AddVV/words=1000/impl=go            -0.52%   -0.19%   -3.58%  -33.89%  -23.46%  -22.46%        ~  -24.00%  -24.73%   +0.93%  -15.79%  +12.32%
AddVV/words=10000/impl=go                ~        ~        ~  -33.79%  -23.72%  -23.79%   -5.98%  -23.92%        ~   +0.78%  -15.45%   +8.59%
AddVV/words=100000/impl=go               ~        ~        ~  -33.90%  -24.25%  -22.82%   -4.09%  -24.63%        ~   +1.00%  -13.56%        ~
SubVV/words=1/impl=go                    ~  +11.64%  +14.05%        ~   -4.07%        ~  +10.79%  -33.69%        ~        ~   +3.89%  +12.33%
SubVV/words=10/impl=go             -10.31%  -14.09%   -7.38%  +13.76%  -13.25%  -18.05%  -20.08%  -24.97%  -14.15%  +10.13%   -0.97%   -2.51%
SubVV/words=16/impl=go              -8.06%  -13.73%   -5.70%  +17.00%  -12.83%  -23.76%  -17.52%  -25.25%  -17.30%   -2.80%   -4.96%  -18.25%
SubVV/words=100/impl=go             -9.22%   -1.30%   -2.76%  +20.88%  -14.35%  -15.29%   -8.49%  -19.64%  -22.31%   -0.68%  -14.30%   -9.04%
SubVV/words=1000/impl=go            -0.60%        ~   -3.43%  +23.08%  -16.14%  -11.96%        ~  -28.52%  -24.73%        ~  -15.95%   -9.91%
SubVV/words=10000/impl=go                ~        ~        ~  +26.01%  -15.24%  -11.92%        ~  -28.26%   +4.25%        ~  -15.42%   -5.95%
SubVV/words=100000/impl=go               ~        ~        ~  +25.71%  -15.83%  -12.13%        ~  -27.88%   -1.27%        ~  -13.57%   -6.72%
LshVU/words=1/impl=go               +0.56%   +0.36%        ~        ~        ~        ~        ~        ~        ~        ~        ~        ~
LshVU/words=10/impl=go             +13.37%   +4.63%        ~        ~        ~        ~        ~   -2.90%        ~        ~        ~        ~
LshVU/words=16/impl=go             +22.83%   +6.47%        ~        ~        ~        ~        ~        ~   +0.80%        ~        ~   +5.88%
LshVU/words=100/impl=go             +7.56%  +13.95%        ~        ~        ~        ~        ~        ~   +0.33%   -2.50%        ~        ~
LshVU/words=1000/impl=go            +0.64%  +17.92%        ~        ~        ~        ~        ~   -6.52%        ~   -2.58%        ~        ~
LshVU/words=10000/impl=go                ~  +17.60%        ~        ~        ~        ~        ~   -6.64%   -6.22%   -1.40%        ~        ~
LshVU/words=100000/impl=go               ~  +14.57%        ~        ~        ~        ~        ~        ~   -5.47%        ~        ~        ~
RshVU/words=1/impl=go                    ~        ~        ~        ~        ~        ~        ~        ~        ~        ~        ~   +2.72%
RshVU/words=10/impl=go                   ~        ~        ~        ~        ~        ~        ~   +2.50%        ~        ~        ~        ~
RshVU/words=16/impl=go                   ~   +0.53%        ~        ~        ~        ~        ~   +3.82%        ~        ~        ~        ~
RshVU/words=100/impl=go                  ~        ~        ~        ~        ~        ~        ~   +6.18%        ~        ~        ~        ~
RshVU/words=1000/impl=go                 ~        ~        ~        ~        ~        ~        ~   +7.00%        ~        ~        ~        ~
RshVU/words=10000/impl=go                ~        ~        ~        ~        ~        ~        ~        ~        ~        ~        ~        ~
RshVU/words=100000/impl=go               ~        ~        ~        ~        ~        ~        ~   +7.05%        ~        ~        ~        ~
MulAddVWW/words=1/impl=go          -10.34%   +4.43%  +10.62%   -1.62%   -4.74%   -2.86%  +11.75%        ~   -8.00%   +8.89%   +3.87%        ~
MulAddVWW/words=10/impl=go          -1.61%   -5.87%        ~   -8.30%   -4.55%   +0.87%        ~   -5.28%  -20.82%        ~        ~   -2.32%
MulAddVWW/words=16/impl=go          -2.96%   -5.28%        ~   -9.22%   -5.28%        ~        ~   -3.74%  -19.52%   -1.48%   -2.53%   -9.52%
MulAddVWW/words=100/impl=go         -3.89%   -7.53%   +1.93%  -10.49%   -4.87%   -8.27%        ~        ~   -0.65%   -0.61%   -7.59%  -20.61%
MulAddVWW/words=1000/impl=go        -0.45%   -3.91%   +4.54%  -11.46%   -4.69%   -8.53%        ~        ~   -0.05%        ~   -8.88%  -19.77%
MulAddVWW/words=10000/impl=go            ~   -3.30%   +4.10%  -11.34%   -4.10%   -9.43%        ~   -0.61%        ~   -0.55%   -8.21%  -18.48%
MulAddVWW/words=100000/impl=go      -0.30%   -3.03%   +4.31%  -11.55%   -4.41%   -9.74%        ~   -0.75%   +0.63%        ~   -7.80%  -19.82%
AddMulVVWW/words=1/impl=go               ~  +13.09%  +12.50%   -7.05%  -10.41%   +2.53%  +13.32%   -3.49%        ~  +15.56%   +3.62%        ~
AddMulVVWW/words=10/impl=go        -15.96%   -9.06%   -5.06%  -14.56%  -11.83%   -5.44%  -26.30%  -14.23%  -11.44%   -1.79%   -5.93%   -6.60%
AddMulVVWW/words=16/impl=go        -19.05%  -12.43%   -6.19%  -14.24%  -12.67%   -8.65%  -18.64%  -16.56%  -10.64%   -3.00%   -7.61%  -12.80%
AddMulVVWW/words=100/impl=go       -22.13%  -16.59%  -13.04%  -13.79%  -11.46%  -12.01%   -6.46%  -21.80%   -5.08%   -3.13%  -13.60%  -22.53%
AddMulVVWW/words=1000/impl=go      -17.07%  -17.05%  -14.08%  -13.59%  -12.13%  -11.21%        ~  -22.81%   -4.27%   -1.27%  -16.35%  -23.47%
AddMulVVWW/words=10000/impl=go     -15.03%  -16.78%  -14.23%  -13.86%  -11.84%  -11.69%        ~  -22.75%  -13.39%   -1.10%  -14.37%  -22.01%
AddMulVVWW/words=100000/impl=go    -13.70%  -14.90%  -14.26%  -13.55%  -12.04%  -11.63%        ~  -22.61%        ~   -2.53%  -10.42%  -23.16%

Change-Id: Ic6f64344484a762b818c7090d1396afceb638607
Reviewed-on: https://go-review.googlesource.com/c/go/+/665155
Auto-Submit: Russ Cox <rsc@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
Russ Cox 2025-04-11 08:54:58 -04:00 committed by Gopher Robot
parent 7f516a31b0
commit a4d0269a4f
3 changed files with 47 additions and 30 deletions

View File

@ -26,17 +26,13 @@ const (
_M = _B - 1 // digit mask
)
// Many of the loops in this file are of the form
// for i := 0; i < len(z) && i < len(x) && i < len(y); i++
// i < len(z) is the real condition.
// However, checking i < len(x) && i < len(y) as well is faster than
// having the compiler do a bounds check in the body of the loop;
// remarkably it is even faster than hoisting the bounds check
// out of the loop, by doing something like
// _, _ = x[len(z)-1], y[len(z)-1]
// There are other ways to hoist the bounds check out of the loop,
// but the compiler's BCE isn't powerful enough for them (yet?).
// See the discussion in CL 164966.
// In these routines, it is the caller's responsibility to arrange for
// x, y, and z to all have the same length. We check this and panic.
// The assembly versions of these routines do not include that check.
//
// The check+panic also has the effect of teaching the compiler that
// “i in range for z” implies “i in range for x and y”, eliminating all
// bounds checks in loops from 0 to len(z) and vice versa.
// ----------------------------------------------------------------------------
// Elementary operations on words
@ -65,8 +61,11 @@ func nlz(x Word) uint {
// The resulting carry c is either 0 or 1.
func addVV_g(z, x, y []Word) (c Word) {
// The comment near the top of this file discusses this for loop condition.
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
if len(x) != len(z) || len(y) != len(z) {
panic("addVV len")
}
for i := range z {
zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
z[i] = Word(zi)
c = Word(cc)
@ -76,8 +75,11 @@ func addVV_g(z, x, y []Word) (c Word) {
// The resulting carry c is either 0 or 1.
func subVV_g(z, x, y []Word) (c Word) {
// The comment near the top of this file discusses this for loop condition.
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
if len(x) != len(z) || len(y) != len(z) {
panic("subVV len")
}
for i := range z {
zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
z[i] = Word(zi)
c = Word(cc)
@ -99,7 +101,10 @@ func subVV_g(z, x, y []Word) (c Word) {
//
//go:linkname addVW
func addVW(z, x []Word, y Word) (c Word) {
x = x[:len(z)]
if len(x) != len(z) {
panic("addVW len")
}
if len(z) == 0 {
return y
}
@ -150,7 +155,10 @@ func addVW_ref(z, x []Word, y Word) (c Word) {
//
//go:linkname subVW
func subVW(z, x []Word, y Word) (c Word) {
x = x[:len(z)]
if len(x) != len(z) {
panic("subVW len")
}
if len(z) == 0 {
return y
}
@ -188,6 +196,10 @@ func subVW_ref(z, x []Word, y Word) (c Word) {
}
func lshVU_g(z, x []Word, s uint) (c Word) {
if len(x) != len(z) {
panic("lshVU len")
}
if s == 0 {
copy(z, x)
return
@ -207,6 +219,10 @@ func lshVU_g(z, x []Word, s uint) (c Word) {
}
func rshVU_g(z, x []Word, s uint) (c Word) {
if len(x) != len(z) {
panic("rshVU len")
}
if s == 0 {
copy(z, x)
return
@ -214,10 +230,6 @@ func rshVU_g(z, x []Word, s uint) (c Word) {
if len(z) == 0 {
return
}
if len(x) != len(z) {
// This is an invariant guaranteed by the caller.
panic("len(x) != len(z)")
}
s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
ŝ := _W - s
ŝ &= _W - 1 // ditto
@ -230,18 +242,23 @@ func rshVU_g(z, x []Word, s uint) (c Word) {
}
func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
if len(x) != len(z) {
panic("mulAddVWW len")
}
c = r
// The comment near the top of this file discusses this for loop condition.
for i := 0; i < len(z) && i < len(x); i++ {
for i := range z {
c, z[i] = mulAddWWW_g(x[i], y, c)
}
return
}
func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
if len(x) != len(z) || len(y) != len(z) {
panic("rshVU len")
}
c = a
// The comment near the top of this file discusses this for loop condition.
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
for i := range z {
z1, z0 := mulAddWWW_g(y[i], m, x[i])
lo, cc := bits.Add(uint(z0), uint(c), 0)
c, z[i] = Word(cc), Word(lo)

View File

@ -111,7 +111,7 @@ func (z nat) add(x, y nat) nat {
// m > 0
z = z.make(m + 1)
c := addVV(z[0:n], x, y)
c := addVV(z[:n], x[:n], y[:n])
if m > n {
c = addVW(z[n:m], x[n:], c)
}
@ -137,7 +137,7 @@ func (z nat) sub(x, y nat) nat {
// m > 0
z = z.make(m)
c := subVV(z[0:n], x, y)
c := subVV(z[:n], x[:n], y[:n])
if m > n {
c = subVW(z[n:], x[n:], c)
}
@ -232,7 +232,7 @@ func alias(x, y nat) bool {
// slice, and we don't need to normalize z after each addition)
func addTo(z, x nat) {
if n := len(x); n > 0 {
if c := addVV(z[:n], z, x); c != 0 {
if c := addVV(z[:n], z[:n], x[:n]); c != 0 {
if n < len(z) {
addVW(z[n:], z[n:], c)
}

View File

@ -699,9 +699,9 @@ func (q nat) divBasic(stk *stack, u, v nat) {
// Subtract q̂·v from the current section of u.
// If it underflows, q̂·v > u, which we fix up
// by decrementing q̂ and adding v back.
c := subVV(u[j:j+qhl], u[j:], qhatv)
c := subVV(u[j:j+qhl], u[j:j+qhl], qhatv[:qhl])
if c != 0 {
c := addVV(u[j:j+n], u[j:], v)
c := addVV(u[j:j+n], u[j:j+n], v)
// If n == qhl, the carry from subVV and the carry from addVV
// cancel out and don't affect u[j+n].
if n < qhl {