mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
math/big: optimize subVV function for loong64
Benchmark results on Loongson 3C5000 (which is an LA464 implementation): goos: linux goarch: loong64 pkg: math/big cpu: Loongson-3C5000 @ 2200.00MHz │ test/old_3c5000_subvv.log │ test/new_3c5000_subvv.log │ │ sec/op │ sec/op vs base │ SubVV/1 10.920n ± 0% 7.657n ± 0% -29.88% (p=0.000 n=20) SubVV/2 14.100n ± 0% 8.841n ± 0% -37.30% (p=0.000 n=20) SubVV/3 16.38n ± 0% 11.06n ± 0% -32.48% (p=0.000 n=20) SubVV/4 18.65n ± 0% 12.85n ± 0% -31.10% (p=0.000 n=20) SubVV/5 20.93n ± 0% 14.79n ± 0% -29.34% (p=0.000 n=20) SubVV/10 32.30n ± 0% 22.29n ± 0% -30.99% (p=0.000 n=20) SubVV/100 244.3n ± 0% 149.2n ± 0% -38.93% (p=0.000 n=20) SubVV/1000 2.292µ ± 0% 1.378µ ± 0% -39.88% (p=0.000 n=20) SubVV/10000 26.26µ ± 0% 25.64µ ± 0% -2.33% (p=0.000 n=20) SubVV/100000 341.3µ ± 0% 238.0µ ± 0% -30.26% (p=0.000 n=20) geomean 209.1n 144.5n -30.86% Change-Id: I3863c2c6728f1b0f8fecbf77de13254299c5b1cb Reviewed-on: https://go-review.googlesource.com/c/go/+/659877 Reviewed-by: abner chenc <chenguoqi@loongson.cn> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
72fa8adbdc
commit
396a48bea6
@ -12,8 +12,35 @@
|
||||
TEXT ·addVV(SB),NOSPLIT,$0
|
||||
JMP ·addVV_g(SB)
|
||||
|
||||
// func subVV(z, x, y []Word) (c Word)
|
||||
TEXT ·subVV(SB),NOSPLIT,$0
|
||||
JMP ·subVV_g(SB)
|
||||
// input:
|
||||
// R4: z
|
||||
// R5: z_len
|
||||
// R7: x
|
||||
// R10: y
|
||||
MOVV z+0(FP), R4
|
||||
MOVV z_len+8(FP), R5
|
||||
MOVV x+24(FP), R7
|
||||
MOVV y+48(FP), R10
|
||||
MOVV $0, R6
|
||||
SLLV $3, R5
|
||||
MOVV $0, R8
|
||||
loop:
|
||||
BEQ R5, R6, done
|
||||
MOVV (R6)(R7), R9
|
||||
MOVV (R6)(R10), R11
|
||||
SUBV R11, R9, R11 // x1 - y1 = z1', if z1' > x1 then overflow
|
||||
SUBV R8, R11, R12 // z1' - c0 = z1, if z1 > z1' then overflow
|
||||
SGTU R11, R9, R9
|
||||
SGTU R12, R11, R11
|
||||
MOVV R12, (R6)(R4)
|
||||
OR R9, R11, R8
|
||||
ADDV $8, R6
|
||||
JMP loop
|
||||
done:
|
||||
MOVV R8, c+72(FP)
|
||||
RET
|
||||
|
||||
// func addVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addVW(SB),NOSPLIT,$0
|
||||
|
Loading…
x
Reference in New Issue
Block a user