mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
math/big: faster FloatPrec implementation
Based on observations by Cherry Mui (see comments in CL 539299). Add new benchmark FloatPrecMixed. For #50489. name old time/op new time/op delta FloatPrecExact/1-12 129ns ± 0% 105ns ±11% -18.51% (p=0.008 n=5+5) FloatPrecExact/10-12 317ns ± 2% 283ns ± 1% -10.65% (p=0.008 n=5+5) FloatPrecExact/100-12 1.80µs ±15% 1.35µs ± 0% -25.09% (p=0.008 n=5+5) FloatPrecExact/1000-12 9.48µs ±14% 8.32µs ± 1% -12.25% (p=0.008 n=5+5) FloatPrecExact/10000-12 195µs ± 1% 191µs ± 0% -1.73% (p=0.008 n=5+5) FloatPrecExact/100000-12 7.31ms ± 1% 7.24ms ± 1% -0.99% (p=0.032 n=5+5) FloatPrecExact/1000000-12 301ms ± 3% 302ms ± 2% ~ (p=0.841 n=5+5) FloatPrecMixed/1-12 141ns ± 0% 110ns ± 3% -21.88% (p=0.008 n=5+5) FloatPrecMixed/10-12 767ns ± 0% 739ns ± 5% ~ (p=0.151 n=5+5) FloatPrecMixed/100-12 4.93µs ± 2% 3.73µs ± 1% -24.33% (p=0.008 n=5+5) FloatPrecMixed/1000-12 90.9µs ±11% 70.3µs ± 2% -22.66% (p=0.008 n=5+5) FloatPrecMixed/10000-12 2.30ms ± 0% 1.92ms ± 1% -16.41% (p=0.008 n=5+5) FloatPrecMixed/100000-12 87.1ms ± 1% 68.5ms ± 1% -21.42% (p=0.008 n=5+5) FloatPrecMixed/1000000-12 4.09s ± 1% 3.58s ± 1% -12.35% (p=0.008 n=5+5) FloatPrecInexact/1-12 92.4ns ± 0% 66.1ns ± 5% -28.41% (p=0.008 n=5+5) FloatPrecInexact/10-12 118ns ± 0% 91ns ± 1% -23.14% (p=0.016 n=5+4) FloatPrecInexact/100-12 310ns ±10% 244ns ± 1% -21.32% (p=0.008 n=5+5) FloatPrecInexact/1000-12 952ns ± 1% 828ns ± 1% -12.96% (p=0.016 n=4+5) FloatPrecInexact/10000-12 6.71µs ± 1% 6.25µs ± 3% -6.83% (p=0.008 n=5+5) FloatPrecInexact/100000-12 66.1µs ± 1% 61.2µs ± 1% -7.45% (p=0.008 n=5+5) FloatPrecInexact/1000000-12 635µs ± 2% 584µs ± 1% -7.97% (p=0.008 n=5+5) Change-Id: I3aa67b49a042814a3286ee8306fbed36709cbb6e Reviewed-on: https://go-review.googlesource.com/c/go/+/542756 Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Robert Griesemer <gri@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@google.com> Auto-Submit: Robert Griesemer <gri@google.com>
This commit is contained in:
parent
2f5bd4e4f2
commit
22278e3835
@ -415,62 +415,45 @@ func (x *Rat) FloatPrec() (n int, exact bool) {
|
||||
q = q.shr(d, p2)
|
||||
|
||||
// Determine p5 by counting factors of 5.
|
||||
|
||||
// Build a table starting with an initial power of 5,
|
||||
// and using repeated squaring until the factor doesn't
|
||||
// and use repeated squaring until the factor doesn't
|
||||
// divide q anymore. Then use the table to determine
|
||||
// the power of 5 in q.
|
||||
//
|
||||
// Setting the table limit to 0 turns this off;
|
||||
// a limit of 1 uses just one factor 5^fp.
|
||||
// Larger values build up a more comprehensive table.
|
||||
const fp = 13 // f == 5^fp
|
||||
const limit = 100 // table size limit
|
||||
var tab []nat // tab[i] == 5^(fp·2^i)
|
||||
var tab []nat // tab[i] == (5^fp)^(2^i) == 5^(fp·2^i)
|
||||
f := nat{1220703125} // == 5^fp (must fit into a uint32 Word)
|
||||
var t, r nat // temporaries
|
||||
for len(tab) < limit {
|
||||
for {
|
||||
if _, r = t.div(r, q, f); len(r) != 0 {
|
||||
break // f doesn't divide q evenly
|
||||
}
|
||||
tab = append(tab, f)
|
||||
f = f.sqr(f)
|
||||
f = nat(nil).sqr(f) // nat(nil) to ensure a new f for each table entry
|
||||
}
|
||||
|
||||
// TODO(gri) Optimization: don't waste the successful
|
||||
// division q/f above; instead reduce q and
|
||||
// count the multiples.
|
||||
|
||||
// Factor q using the table entries, if any.
|
||||
var p5, p uint
|
||||
// We start with the largest factor f = tab[len(tab)-1]
|
||||
// that evenly divides q. It does so at most once because
|
||||
// otherwise f·f would also divide q. That can't be true
|
||||
// because f·f is the next higher table entry, contradicting
|
||||
// how f was chosen in the first place.
|
||||
// The same reasoning applies to the subsequent factors.
|
||||
var p5 uint
|
||||
for i := len(tab) - 1; i >= 0; i-- {
|
||||
q, p = multiples(q, tab[i])
|
||||
p5 += p << i * fp
|
||||
if t, r = t.div(r, q, tab[i]); len(r) == 0 {
|
||||
p5 += fp * (1 << i) // tab[i] == 5^(fp·2^i)
|
||||
q = q.set(t)
|
||||
}
|
||||
}
|
||||
|
||||
q, p = multiples(q, natFive)
|
||||
p5 += p
|
||||
// If fp != 1, we may still have multiples of 5 left.
|
||||
for {
|
||||
if t, r = t.div(r, q, natFive); len(r) != 0 {
|
||||
break
|
||||
}
|
||||
p5++
|
||||
q = q.set(t)
|
||||
}
|
||||
|
||||
return int(max(p2, p5)), q.cmp(natOne) == 0
|
||||
}
|
||||
|
||||
// multiples returns d and largest p such that x = d·f^p.
|
||||
// x and f must not be 0.
|
||||
func multiples(x, f nat) (d nat, p uint) {
|
||||
// Determine p through repeated division.
|
||||
d = d.set(x)
|
||||
// p == 0
|
||||
var q, r nat
|
||||
for {
|
||||
// invariant x == d·f^p
|
||||
q, r = q.div(r, d, f)
|
||||
if len(r) != 0 {
|
||||
return
|
||||
}
|
||||
// q == d/f
|
||||
// x == q·f·f^p
|
||||
p++
|
||||
// x == q·f^p
|
||||
d = d.set(q)
|
||||
}
|
||||
}
|
||||
|
@ -719,6 +719,28 @@ func BenchmarkFloatPrecExact(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFloatPrecMixed(b *testing.B) {
|
||||
for _, n := range []int{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6} {
|
||||
// d := (3·5·7·11)^n
|
||||
d := NewInt(3 * 5 * 7 * 11)
|
||||
p := NewInt(int64(n))
|
||||
d.Exp(d, p, nil)
|
||||
|
||||
// r := 1/d
|
||||
var r Rat
|
||||
r.SetFrac(NewInt(1), d)
|
||||
|
||||
b.Run(fmt.Sprint(n), func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
prec, ok := r.FloatPrec()
|
||||
if prec != n || ok {
|
||||
b.Fatalf("got exact, ok = %d, %v; want %d, %v", prec, ok, uint64(n), false)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFloatPrecInexact(b *testing.B) {
|
||||
for _, n := range []int{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6} {
|
||||
// d := 5^n + 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user