mirror of
https://github.com/golang/go.git
synced 2025-05-05 23:53:05 +00:00
math/big: use clearer loop bounds check elimination
Checking that the lengths are equal and panicking teaches the compiler that it can assume “i in range for z” implies “i in range for x”, letting us simplify the actual loops a bit. It also turns up a few places in math/big that were playing maybe a little too fast and loose with slice lengths. Update those to explicitly set all the input slices to the same length. These speedups are basically irrelevant, since they only happen in real code if people are compiling with -tags math_big_pure_go. But at least the code is clearer. benchmark \ system c3h88 c2s16 s7 386 s7-386 c4as16 mac arm loong64 ppc64le riscv64 s390x AddVV/words=1/impl=go ~ +11.20% +5.11% -7.67% -7.77% +1.90% +10.76% -33.22% ~ +10.98% ~ +6.60% AddVV/words=10/impl=go -22.12% -13.48% -10.37% -17.95% -18.07% -24.58% -22.04% -29.95% -14.22% ~ -6.33% +3.66% AddVV/words=16/impl=go -9.75% -13.73% ~ -21.90% -18.66% -30.03% -20.45% -28.09% -17.33% -7.15% -8.96% +12.55% AddVV/words=100/impl=go -5.91% -1.02% ~ -29.23% -22.18% -25.62% -6.49% -23.59% -22.31% -1.88% -14.13% +9.23% AddVV/words=1000/impl=go -0.52% -0.19% -3.58% -33.89% -23.46% -22.46% ~ -24.00% -24.73% +0.93% -15.79% +12.32% AddVV/words=10000/impl=go ~ ~ ~ -33.79% -23.72% -23.79% -5.98% -23.92% ~ +0.78% -15.45% +8.59% AddVV/words=100000/impl=go ~ ~ ~ -33.90% -24.25% -22.82% -4.09% -24.63% ~ +1.00% -13.56% ~ SubVV/words=1/impl=go ~ +11.64% +14.05% ~ -4.07% ~ +10.79% -33.69% ~ ~ +3.89% +12.33% SubVV/words=10/impl=go -10.31% -14.09% -7.38% +13.76% -13.25% -18.05% -20.08% -24.97% -14.15% +10.13% -0.97% -2.51% SubVV/words=16/impl=go -8.06% -13.73% -5.70% +17.00% -12.83% -23.76% -17.52% -25.25% -17.30% -2.80% -4.96% -18.25% SubVV/words=100/impl=go -9.22% -1.30% -2.76% +20.88% -14.35% -15.29% -8.49% -19.64% -22.31% -0.68% -14.30% -9.04% SubVV/words=1000/impl=go -0.60% ~ -3.43% +23.08% -16.14% -11.96% ~ -28.52% -24.73% ~ -15.95% -9.91% SubVV/words=10000/impl=go ~ ~ ~ +26.01% -15.24% -11.92% ~ -28.26% +4.25% ~ -15.42% -5.95% SubVV/words=100000/impl=go ~ ~ ~ +25.71% -15.83% -12.13% ~ -27.88% -1.27% ~ -13.57% -6.72% LshVU/words=1/impl=go +0.56% +0.36% ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ LshVU/words=10/impl=go +13.37% +4.63% ~ ~ ~ ~ ~ -2.90% ~ ~ ~ ~ LshVU/words=16/impl=go +22.83% +6.47% ~ ~ ~ ~ ~ ~ +0.80% ~ ~ +5.88% LshVU/words=100/impl=go +7.56% +13.95% ~ ~ ~ ~ ~ ~ +0.33% -2.50% ~ ~ LshVU/words=1000/impl=go +0.64% +17.92% ~ ~ ~ ~ ~ -6.52% ~ -2.58% ~ ~ LshVU/words=10000/impl=go ~ +17.60% ~ ~ ~ ~ ~ -6.64% -6.22% -1.40% ~ ~ LshVU/words=100000/impl=go ~ +14.57% ~ ~ ~ ~ ~ ~ -5.47% ~ ~ ~ RshVU/words=1/impl=go ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ +2.72% RshVU/words=10/impl=go ~ ~ ~ ~ ~ ~ ~ +2.50% ~ ~ ~ ~ RshVU/words=16/impl=go ~ +0.53% ~ ~ ~ ~ ~ +3.82% ~ ~ ~ ~ RshVU/words=100/impl=go ~ ~ ~ ~ ~ ~ ~ +6.18% ~ ~ ~ ~ RshVU/words=1000/impl=go ~ ~ ~ ~ ~ ~ ~ +7.00% ~ ~ ~ ~ RshVU/words=10000/impl=go ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ RshVU/words=100000/impl=go ~ ~ ~ ~ ~ ~ ~ +7.05% ~ ~ ~ ~ MulAddVWW/words=1/impl=go -10.34% +4.43% +10.62% -1.62% -4.74% -2.86% +11.75% ~ -8.00% +8.89% +3.87% ~ MulAddVWW/words=10/impl=go -1.61% -5.87% ~ -8.30% -4.55% +0.87% ~ -5.28% -20.82% ~ ~ -2.32% MulAddVWW/words=16/impl=go -2.96% -5.28% ~ -9.22% -5.28% ~ ~ -3.74% -19.52% -1.48% -2.53% -9.52% MulAddVWW/words=100/impl=go -3.89% -7.53% +1.93% -10.49% -4.87% -8.27% ~ ~ -0.65% -0.61% -7.59% -20.61% MulAddVWW/words=1000/impl=go -0.45% -3.91% +4.54% -11.46% -4.69% -8.53% ~ ~ -0.05% ~ -8.88% -19.77% MulAddVWW/words=10000/impl=go ~ -3.30% +4.10% -11.34% -4.10% -9.43% ~ -0.61% ~ -0.55% -8.21% -18.48% MulAddVWW/words=100000/impl=go -0.30% -3.03% +4.31% -11.55% -4.41% -9.74% ~ -0.75% +0.63% ~ -7.80% -19.82% AddMulVVWW/words=1/impl=go ~ +13.09% +12.50% -7.05% -10.41% +2.53% +13.32% -3.49% ~ +15.56% +3.62% ~ AddMulVVWW/words=10/impl=go -15.96% -9.06% -5.06% -14.56% -11.83% -5.44% -26.30% -14.23% -11.44% -1.79% -5.93% -6.60% AddMulVVWW/words=16/impl=go -19.05% -12.43% -6.19% -14.24% -12.67% -8.65% -18.64% -16.56% -10.64% -3.00% -7.61% -12.80% AddMulVVWW/words=100/impl=go -22.13% -16.59% -13.04% -13.79% -11.46% -12.01% -6.46% -21.80% -5.08% -3.13% -13.60% -22.53% AddMulVVWW/words=1000/impl=go -17.07% -17.05% -14.08% -13.59% -12.13% -11.21% ~ -22.81% -4.27% -1.27% -16.35% -23.47% AddMulVVWW/words=10000/impl=go -15.03% -16.78% -14.23% -13.86% -11.84% -11.69% ~ -22.75% -13.39% -1.10% -14.37% -22.01% AddMulVVWW/words=100000/impl=go -13.70% -14.90% -14.26% -13.55% -12.04% -11.63% ~ -22.61% ~ -2.53% -10.42% -23.16% Change-Id: Ic6f64344484a762b818c7090d1396afceb638607 Reviewed-on: https://go-review.googlesource.com/c/go/+/665155 Auto-Submit: Russ Cox <rsc@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
parent
7f516a31b0
commit
a4d0269a4f
@ -26,17 +26,13 @@ const (
|
|||||||
_M = _B - 1 // digit mask
|
_M = _B - 1 // digit mask
|
||||||
)
|
)
|
||||||
|
|
||||||
// Many of the loops in this file are of the form
|
// In these routines, it is the caller's responsibility to arrange for
|
||||||
// for i := 0; i < len(z) && i < len(x) && i < len(y); i++
|
// x, y, and z to all have the same length. We check this and panic.
|
||||||
// i < len(z) is the real condition.
|
// The assembly versions of these routines do not include that check.
|
||||||
// However, checking i < len(x) && i < len(y) as well is faster than
|
//
|
||||||
// having the compiler do a bounds check in the body of the loop;
|
// The check+panic also has the effect of teaching the compiler that
|
||||||
// remarkably it is even faster than hoisting the bounds check
|
// “i in range for z” implies “i in range for x and y”, eliminating all
|
||||||
// out of the loop, by doing something like
|
// bounds checks in loops from 0 to len(z) and vice versa.
|
||||||
// _, _ = x[len(z)-1], y[len(z)-1]
|
|
||||||
// There are other ways to hoist the bounds check out of the loop,
|
|
||||||
// but the compiler's BCE isn't powerful enough for them (yet?).
|
|
||||||
// See the discussion in CL 164966.
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// Elementary operations on words
|
// Elementary operations on words
|
||||||
@ -65,8 +61,11 @@ func nlz(x Word) uint {
|
|||||||
|
|
||||||
// The resulting carry c is either 0 or 1.
|
// The resulting carry c is either 0 or 1.
|
||||||
func addVV_g(z, x, y []Word) (c Word) {
|
func addVV_g(z, x, y []Word) (c Word) {
|
||||||
// The comment near the top of this file discusses this for loop condition.
|
if len(x) != len(z) || len(y) != len(z) {
|
||||||
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
|
panic("addVV len")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range z {
|
||||||
zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
|
zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
|
||||||
z[i] = Word(zi)
|
z[i] = Word(zi)
|
||||||
c = Word(cc)
|
c = Word(cc)
|
||||||
@ -76,8 +75,11 @@ func addVV_g(z, x, y []Word) (c Word) {
|
|||||||
|
|
||||||
// The resulting carry c is either 0 or 1.
|
// The resulting carry c is either 0 or 1.
|
||||||
func subVV_g(z, x, y []Word) (c Word) {
|
func subVV_g(z, x, y []Word) (c Word) {
|
||||||
// The comment near the top of this file discusses this for loop condition.
|
if len(x) != len(z) || len(y) != len(z) {
|
||||||
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
|
panic("subVV len")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range z {
|
||||||
zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
|
zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
|
||||||
z[i] = Word(zi)
|
z[i] = Word(zi)
|
||||||
c = Word(cc)
|
c = Word(cc)
|
||||||
@ -99,7 +101,10 @@ func subVV_g(z, x, y []Word) (c Word) {
|
|||||||
//
|
//
|
||||||
//go:linkname addVW
|
//go:linkname addVW
|
||||||
func addVW(z, x []Word, y Word) (c Word) {
|
func addVW(z, x []Word, y Word) (c Word) {
|
||||||
x = x[:len(z)]
|
if len(x) != len(z) {
|
||||||
|
panic("addVW len")
|
||||||
|
}
|
||||||
|
|
||||||
if len(z) == 0 {
|
if len(z) == 0 {
|
||||||
return y
|
return y
|
||||||
}
|
}
|
||||||
@ -150,7 +155,10 @@ func addVW_ref(z, x []Word, y Word) (c Word) {
|
|||||||
//
|
//
|
||||||
//go:linkname subVW
|
//go:linkname subVW
|
||||||
func subVW(z, x []Word, y Word) (c Word) {
|
func subVW(z, x []Word, y Word) (c Word) {
|
||||||
x = x[:len(z)]
|
if len(x) != len(z) {
|
||||||
|
panic("subVW len")
|
||||||
|
}
|
||||||
|
|
||||||
if len(z) == 0 {
|
if len(z) == 0 {
|
||||||
return y
|
return y
|
||||||
}
|
}
|
||||||
@ -188,6 +196,10 @@ func subVW_ref(z, x []Word, y Word) (c Word) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func lshVU_g(z, x []Word, s uint) (c Word) {
|
func lshVU_g(z, x []Word, s uint) (c Word) {
|
||||||
|
if len(x) != len(z) {
|
||||||
|
panic("lshVU len")
|
||||||
|
}
|
||||||
|
|
||||||
if s == 0 {
|
if s == 0 {
|
||||||
copy(z, x)
|
copy(z, x)
|
||||||
return
|
return
|
||||||
@ -207,6 +219,10 @@ func lshVU_g(z, x []Word, s uint) (c Word) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func rshVU_g(z, x []Word, s uint) (c Word) {
|
func rshVU_g(z, x []Word, s uint) (c Word) {
|
||||||
|
if len(x) != len(z) {
|
||||||
|
panic("rshVU len")
|
||||||
|
}
|
||||||
|
|
||||||
if s == 0 {
|
if s == 0 {
|
||||||
copy(z, x)
|
copy(z, x)
|
||||||
return
|
return
|
||||||
@ -214,10 +230,6 @@ func rshVU_g(z, x []Word, s uint) (c Word) {
|
|||||||
if len(z) == 0 {
|
if len(z) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if len(x) != len(z) {
|
|
||||||
// This is an invariant guaranteed by the caller.
|
|
||||||
panic("len(x) != len(z)")
|
|
||||||
}
|
|
||||||
s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
|
s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
|
||||||
ŝ := _W - s
|
ŝ := _W - s
|
||||||
ŝ &= _W - 1 // ditto
|
ŝ &= _W - 1 // ditto
|
||||||
@ -230,18 +242,23 @@ func rshVU_g(z, x []Word, s uint) (c Word) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
|
func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
|
||||||
|
if len(x) != len(z) {
|
||||||
|
panic("mulAddVWW len")
|
||||||
|
}
|
||||||
c = r
|
c = r
|
||||||
// The comment near the top of this file discusses this for loop condition.
|
for i := range z {
|
||||||
for i := 0; i < len(z) && i < len(x); i++ {
|
|
||||||
c, z[i] = mulAddWWW_g(x[i], y, c)
|
c, z[i] = mulAddWWW_g(x[i], y, c)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
|
func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
|
||||||
|
if len(x) != len(z) || len(y) != len(z) {
|
||||||
|
panic("rshVU len")
|
||||||
|
}
|
||||||
|
|
||||||
c = a
|
c = a
|
||||||
// The comment near the top of this file discusses this for loop condition.
|
for i := range z {
|
||||||
for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
|
|
||||||
z1, z0 := mulAddWWW_g(y[i], m, x[i])
|
z1, z0 := mulAddWWW_g(y[i], m, x[i])
|
||||||
lo, cc := bits.Add(uint(z0), uint(c), 0)
|
lo, cc := bits.Add(uint(z0), uint(c), 0)
|
||||||
c, z[i] = Word(cc), Word(lo)
|
c, z[i] = Word(cc), Word(lo)
|
||||||
|
@ -111,7 +111,7 @@ func (z nat) add(x, y nat) nat {
|
|||||||
// m > 0
|
// m > 0
|
||||||
|
|
||||||
z = z.make(m + 1)
|
z = z.make(m + 1)
|
||||||
c := addVV(z[0:n], x, y)
|
c := addVV(z[:n], x[:n], y[:n])
|
||||||
if m > n {
|
if m > n {
|
||||||
c = addVW(z[n:m], x[n:], c)
|
c = addVW(z[n:m], x[n:], c)
|
||||||
}
|
}
|
||||||
@ -137,7 +137,7 @@ func (z nat) sub(x, y nat) nat {
|
|||||||
// m > 0
|
// m > 0
|
||||||
|
|
||||||
z = z.make(m)
|
z = z.make(m)
|
||||||
c := subVV(z[0:n], x, y)
|
c := subVV(z[:n], x[:n], y[:n])
|
||||||
if m > n {
|
if m > n {
|
||||||
c = subVW(z[n:], x[n:], c)
|
c = subVW(z[n:], x[n:], c)
|
||||||
}
|
}
|
||||||
@ -232,7 +232,7 @@ func alias(x, y nat) bool {
|
|||||||
// slice, and we don't need to normalize z after each addition)
|
// slice, and we don't need to normalize z after each addition)
|
||||||
func addTo(z, x nat) {
|
func addTo(z, x nat) {
|
||||||
if n := len(x); n > 0 {
|
if n := len(x); n > 0 {
|
||||||
if c := addVV(z[:n], z, x); c != 0 {
|
if c := addVV(z[:n], z[:n], x[:n]); c != 0 {
|
||||||
if n < len(z) {
|
if n < len(z) {
|
||||||
addVW(z[n:], z[n:], c)
|
addVW(z[n:], z[n:], c)
|
||||||
}
|
}
|
||||||
|
@ -699,9 +699,9 @@ func (q nat) divBasic(stk *stack, u, v nat) {
|
|||||||
// Subtract q̂·v from the current section of u.
|
// Subtract q̂·v from the current section of u.
|
||||||
// If it underflows, q̂·v > u, which we fix up
|
// If it underflows, q̂·v > u, which we fix up
|
||||||
// by decrementing q̂ and adding v back.
|
// by decrementing q̂ and adding v back.
|
||||||
c := subVV(u[j:j+qhl], u[j:], qhatv)
|
c := subVV(u[j:j+qhl], u[j:j+qhl], qhatv[:qhl])
|
||||||
if c != 0 {
|
if c != 0 {
|
||||||
c := addVV(u[j:j+n], u[j:], v)
|
c := addVV(u[j:j+n], u[j:j+n], v)
|
||||||
// If n == qhl, the carry from subVV and the carry from addVV
|
// If n == qhl, the carry from subVV and the carry from addVV
|
||||||
// cancel out and don't affect u[j+n].
|
// cancel out and don't affect u[j+n].
|
||||||
if n < qhl {
|
if n < qhl {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user