mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
math/big: use clearer loop bounds check elimination
Checking that the lengths are equal and panicking teaches the compiler that it can assume “i in range for z” implies “i in range for x”, letting us simplify the actual loops a bit. It also turns up a few places in math/big that were playing maybe a little too fast and loose with slice lengths. Update those to explicitly set all the input slices to the same length. These speedups are basically irrelevant, since they only happen in real code if people are compiling with -tags math_big_pure_go. But at least the code is clearer. benchmark \ system c3h88 c2s16 s7 386 s7-386 c4as16 mac arm loong64 ppc64le riscv64 s390x AddVV/words=1/impl=go ~ +11.20% +5.11% -7.67% -7.77% +1.90% +10.76% -33.22% ~ +10.98% ~ +6.60% AddVV/words=10/impl=go -22.12% -13.48% -10.37% -17.95% -18.07% -24.58% -22.04% -29.95% -14.22% ~ -6.33% +3.66% AddVV/words=16/impl=go -9.75% -13.73% ~ -21.90% -18.66% -30.03% -20.45% -28.09% -17.33% -7.15% -8.96% +12.55% AddVV/words=100/impl=go -5.91% -1.02% ~ -29.23% -22.18% -25.62% -6.49% -23.59% -22.31% -1.88% -14.13% +9.23% AddVV/words=1000/impl=go -0.52% -0.19% -3.58% -33.89% -23.46% -22.46% ~ -24.00% -24.73% +0.93% -15.79% +12.32% AddVV/words=10000/impl=go ~ ~ ~ -33.79% -23.72% -23.79% -5.98% -23.92% ~ +0.78% -15.45% +8.59% AddVV/words=100000/impl=go ~ ~ ~ -33.90% -24.25% -22.82% -4.09% -24.63% ~ +1.00% -13.56% ~ SubVV/words=1/impl=go ~ +11.64% +14.05% ~ -4.07% ~ +10.79% -33.69% ~ ~ +3.89% +12.33% SubVV/words=10/impl=go -10.31% -14.09% -7.38% +13.76% -13.25% -18.05% -20.08% -24.97% -14.15% +10.13% -0.97% -2.51% SubVV/words=16/impl=go -8.06% -13.73% -5.70% +17.00% -12.83% -23.76% -17.52% -25.25% -17.30% -2.80% -4.96% -18.25% SubVV/words=100/impl=go -9.22% -1.30% -2.76% +20.88% -14.35% -15.29% -8.49% -19.64% -22.31% -0.68% -14.30% -9.04% SubVV/words=1000/impl=go -0.60% ~ -3.43% +23.08% -16.14% -11.96% ~ -28.52% -24.73% ~ -15.95% -9.91% SubVV/words=10000/impl=go ~ ~ ~ +26.01% -15.24% -11.92% ~ -28.26% +4.25% ~ -15.42% -5.95% SubVV/words=100000/impl=go ~ ~ ~ +25.71% -15.83% -12.13% ~ -27.88% -1.27% ~ -13.57% -6.72% LshVU/words=1/impl=go +0.56% +0.36% ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ LshVU/words=10/impl=go +13.37% +4.63% ~ ~ ~ ~ ~ -2.90% ~ ~ ~ ~ LshVU/words=16/impl=go +22.83% +6.47% ~ ~ ~ ~ ~ ~ +0.80% ~ ~ +5.88% LshVU/words=100/impl=go +7.56% +13.95% ~ ~ ~ ~ ~ ~ +0.33% -2.50% ~ ~ LshVU/words=1000/impl=go +0.64% +17.92% ~ ~ ~ ~ ~ -6.52% ~ -2.58% ~ ~ LshVU/words=10000/impl=go ~ +17.60% ~ ~ ~ ~ ~ -6.64% -6.22% -1.40% ~ ~ LshVU/words=100000/impl=go ~ +14.57% ~ ~ ~ ~ ~ ~ -5.47% ~ ~ ~ RshVU/words=1/impl=go ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ +2.72% RshVU/words=10/impl=go ~ ~ ~ ~ ~ ~ ~ +2.50% ~ ~ ~ ~ RshVU/words=16/impl=go ~ +0.53% ~ ~ ~ ~ ~ +3.82% ~ ~ ~ ~ RshVU/words=100/impl=go ~ ~ ~ ~ ~ ~ ~ +6.18% ~ ~ ~ ~ RshVU/words=1000/impl=go ~ ~ ~ ~ ~ ~ ~ +7.00% ~ ~ ~ ~ RshVU/words=10000/impl=go ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ RshVU/words=100000/impl=go ~ ~ ~ ~ ~ ~ ~ +7.05% ~ ~ ~ ~ MulAddVWW/words=1/impl=go -10.34% +4.43% +10.62% -1.62% -4.74% -2.86% +11.75% ~ -8.00% +8.89% +3.87% ~ MulAddVWW/words=10/impl=go -1.61% -5.87% ~ -8.30% -4.55% +0.87% ~ -5.28% -20.82% ~ ~ -2.32% MulAddVWW/words=16/impl=go -2.96% -5.28% ~ -9.22% -5.28% ~ ~ -3.74% -19.52% -1.48% -2.53% -9.52% MulAddVWW/words=100/impl=go -3.89% -7.53% +1.93% -10.49% -4.87% -8.27% ~ ~ -0.65% -0.61% -7.59% -20.61% MulAddVWW/words=1000/impl=go -0.45% -3.91% +4.54% -11.46% -4.69% -8.53% ~ ~ -0.05% ~ -8.88% -19.77% MulAddVWW/words=10000/impl=go ~ -3.30% +4.10% -11.34% -4.10% -9.43% ~ -0.61% ~ -0.55% -8.21% -18.48% MulAddVWW/words=100000/impl=go -0.30% -3.03% +4.31% -11.55% -4.41% -9.74% ~ -0.75% +0.63% ~ -7.80% -19.82% AddMulVVWW/words=1/impl=go ~ +13.09% +12.50% -7.05% -10.41% +2.53% +13.32% -3.49% ~ +15.56% +3.62% ~ AddMulVVWW/words=10/impl=go -15.96% -9.06% -5.06% -14.56% -11.83% -5.44% -26.30% -14.23% -11.44% -1.79% -5.93% -6.60% AddMulVVWW/words=16/impl=go -19.05% -12.43% -6.19% -14.24% -12.67% -8.65% -18.64% -16.56% -10.64% -3.00% -7.61% -12.80% AddMulVVWW/words=100/impl=go -22.13% -16.59% -13.04% -13.79% -11.46% -12.01% -6.46% -21.80% -5.08% -3.13% -13.60% -22.53% AddMulVVWW/words=1000/impl=go -17.07% -17.05% -14.08% -13.59% -12.13% -11.21% ~ -22.81% -4.27% -1.27% -16.35% -23.47% AddMulVVWW/words=10000/impl=go -15.03% -16.78% -14.23% -13.86% -11.84% -11.69% ~ -22.75% -13.39% -1.10% -14.37% -22.01% AddMulVVWW/words=100000/impl=go -13.70% -14.90% -14.26% -13.55% -12.04% -11.63% ~ -22.61% ~ -2.53% -10.42% -23.16% Change-Id: Ic6f64344484a762b818c7090d1396afceb638607 Reviewed-on: https://go-review.googlesource.com/c/go/+/665155 Auto-Submit: Russ Cox <rsc@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
parent
7f516a31b0
commit
a4d0269a4f
@ -26,17 +26,13 @@ const (
|
||||
_M = _B - 1 // digit mask
|
||||
)
|
||||
|
||||
// Many of the loops in this file are of the form
|
||||
// for i := 0; i < len(z) && i < len(x) && i < len(y); i++
|
||||
// i < len(z) is the real condition.
|
||||
// However, checking i < len(x) && i < len(y) as well is faster than
|
||||
// having the compiler do a bounds check in the body of the loop;
|
||||
// remarkably it is even faster than hoisting the bounds check
|
||||
// out of the loop, by doing something like
|
||||
// _, _ = x[len(z)-1], y[len(z)-1]
|
||||
// There are other ways to hoist the bounds check out of the loop,
|
||||
// but the compiler's BCE isn't powerful enough for them (yet?).
|
||||
// See the discussion in CL 164966.
|
||||
// In these routines, it is the caller's responsibility to arrange for
|
||||
// x, y, and z to all have the same length. We check this and panic.
|
||||
// The assembly versions of these routines do not include that check.
|
||||
//
|
||||
// The check+panic also has the effect of teaching the compiler that
|
||||
// “i in range for z” implies “i in range for x and y”, eliminating all
|
||||
// bounds checks in loops from 0 to len(z) and vice versa.
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Elementary operations on words
|
||||
@ -65,8 +61,11 @@ func nlz(x Word) uint {
|
||||
|
||||
// The resulting carry c is either 0 or 1.
|
||||
func addVV_g(z, x, y []Word) (c Word) {
|
||||
// The comment near the top of this file discusses this for loop condition.
|
||||
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
|
||||
if len(x) != len(z) || len(y) != len(z) {
|
||||
panic("addVV len")
|
||||
}
|
||||
|
||||
for i := range z {
|
||||
zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
|
||||
z[i] = Word(zi)
|
||||
c = Word(cc)
|
||||
@ -76,8 +75,11 @@ func addVV_g(z, x, y []Word) (c Word) {
|
||||
|
||||
// The resulting carry c is either 0 or 1.
|
||||
func subVV_g(z, x, y []Word) (c Word) {
|
||||
// The comment near the top of this file discusses this for loop condition.
|
||||
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
|
||||
if len(x) != len(z) || len(y) != len(z) {
|
||||
panic("subVV len")
|
||||
}
|
||||
|
||||
for i := range z {
|
||||
zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
|
||||
z[i] = Word(zi)
|
||||
c = Word(cc)
|
||||
@ -99,7 +101,10 @@ func subVV_g(z, x, y []Word) (c Word) {
|
||||
//
|
||||
//go:linkname addVW
|
||||
func addVW(z, x []Word, y Word) (c Word) {
|
||||
x = x[:len(z)]
|
||||
if len(x) != len(z) {
|
||||
panic("addVW len")
|
||||
}
|
||||
|
||||
if len(z) == 0 {
|
||||
return y
|
||||
}
|
||||
@ -150,7 +155,10 @@ func addVW_ref(z, x []Word, y Word) (c Word) {
|
||||
//
|
||||
//go:linkname subVW
|
||||
func subVW(z, x []Word, y Word) (c Word) {
|
||||
x = x[:len(z)]
|
||||
if len(x) != len(z) {
|
||||
panic("subVW len")
|
||||
}
|
||||
|
||||
if len(z) == 0 {
|
||||
return y
|
||||
}
|
||||
@ -188,6 +196,10 @@ func subVW_ref(z, x []Word, y Word) (c Word) {
|
||||
}
|
||||
|
||||
func lshVU_g(z, x []Word, s uint) (c Word) {
|
||||
if len(x) != len(z) {
|
||||
panic("lshVU len")
|
||||
}
|
||||
|
||||
if s == 0 {
|
||||
copy(z, x)
|
||||
return
|
||||
@ -207,6 +219,10 @@ func lshVU_g(z, x []Word, s uint) (c Word) {
|
||||
}
|
||||
|
||||
func rshVU_g(z, x []Word, s uint) (c Word) {
|
||||
if len(x) != len(z) {
|
||||
panic("rshVU len")
|
||||
}
|
||||
|
||||
if s == 0 {
|
||||
copy(z, x)
|
||||
return
|
||||
@ -214,10 +230,6 @@ func rshVU_g(z, x []Word, s uint) (c Word) {
|
||||
if len(z) == 0 {
|
||||
return
|
||||
}
|
||||
if len(x) != len(z) {
|
||||
// This is an invariant guaranteed by the caller.
|
||||
panic("len(x) != len(z)")
|
||||
}
|
||||
s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
|
||||
ŝ := _W - s
|
||||
ŝ &= _W - 1 // ditto
|
||||
@ -230,18 +242,23 @@ func rshVU_g(z, x []Word, s uint) (c Word) {
|
||||
}
|
||||
|
||||
func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
|
||||
if len(x) != len(z) {
|
||||
panic("mulAddVWW len")
|
||||
}
|
||||
c = r
|
||||
// The comment near the top of this file discusses this for loop condition.
|
||||
for i := 0; i < len(z) && i < len(x); i++ {
|
||||
for i := range z {
|
||||
c, z[i] = mulAddWWW_g(x[i], y, c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
|
||||
if len(x) != len(z) || len(y) != len(z) {
|
||||
panic("rshVU len")
|
||||
}
|
||||
|
||||
c = a
|
||||
// The comment near the top of this file discusses this for loop condition.
|
||||
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
|
||||
for i := range z {
|
||||
z1, z0 := mulAddWWW_g(y[i], m, x[i])
|
||||
lo, cc := bits.Add(uint(z0), uint(c), 0)
|
||||
c, z[i] = Word(cc), Word(lo)
|
||||
|
@ -111,7 +111,7 @@ func (z nat) add(x, y nat) nat {
|
||||
// m > 0
|
||||
|
||||
z = z.make(m + 1)
|
||||
c := addVV(z[0:n], x, y)
|
||||
c := addVV(z[:n], x[:n], y[:n])
|
||||
if m > n {
|
||||
c = addVW(z[n:m], x[n:], c)
|
||||
}
|
||||
@ -137,7 +137,7 @@ func (z nat) sub(x, y nat) nat {
|
||||
// m > 0
|
||||
|
||||
z = z.make(m)
|
||||
c := subVV(z[0:n], x, y)
|
||||
c := subVV(z[:n], x[:n], y[:n])
|
||||
if m > n {
|
||||
c = subVW(z[n:], x[n:], c)
|
||||
}
|
||||
@ -232,7 +232,7 @@ func alias(x, y nat) bool {
|
||||
// slice, and we don't need to normalize z after each addition)
|
||||
func addTo(z, x nat) {
|
||||
if n := len(x); n > 0 {
|
||||
if c := addVV(z[:n], z, x); c != 0 {
|
||||
if c := addVV(z[:n], z[:n], x[:n]); c != 0 {
|
||||
if n < len(z) {
|
||||
addVW(z[n:], z[n:], c)
|
||||
}
|
||||
|
@ -699,9 +699,9 @@ func (q nat) divBasic(stk *stack, u, v nat) {
|
||||
// Subtract q̂·v from the current section of u.
|
||||
// If it underflows, q̂·v > u, which we fix up
|
||||
// by decrementing q̂ and adding v back.
|
||||
c := subVV(u[j:j+qhl], u[j:], qhatv)
|
||||
c := subVV(u[j:j+qhl], u[j:j+qhl], qhatv[:qhl])
|
||||
if c != 0 {
|
||||
c := addVV(u[j:j+n], u[j:], v)
|
||||
c := addVV(u[j:j+n], u[j:j+n], v)
|
||||
// If n == qhl, the carry from subVV and the carry from addVV
|
||||
// cancel out and don't affect u[j+n].
|
||||
if n < qhl {
|
||||
|
Loading…
x
Reference in New Issue
Block a user