mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
math/big: optimize mulAddVWW function for loong64
Benchmark results on Loongson 3A5000 (which is an LA464 implementation): goos: linux goarch: loong64 pkg: math/big cpu: Loongson-3A5000-HV @ 2500.00MHz │ test/old_3a5000_muladdvww.log │ test/new_3a5000_muladdvww.log │ │ sec/op │ sec/op vs base │ MulAddVWW/1 7.606n ± 0% 6.987n ± 0% -8.14% (p=0.000 n=20) MulAddVWW/2 9.207n ± 0% 8.567n ± 0% -6.95% (p=0.000 n=20) MulAddVWW/3 10.810n ± 0% 9.223n ± 0% -14.68% (p=0.000 n=20) MulAddVWW/4 13.01n ± 0% 12.41n ± 0% -4.61% (p=0.000 n=20) MulAddVWW/5 15.79n ± 0% 12.99n ± 0% -17.73% (p=0.000 n=20) MulAddVWW/10 25.62n ± 0% 20.02n ± 0% -21.86% (p=0.000 n=20) MulAddVWW/100 217.0n ± 0% 170.9n ± 0% -21.24% (p=0.000 n=20) MulAddVWW/1000 2.064µ ± 0% 1.612µ ± 0% -21.90% (p=0.000 n=20) MulAddVWW/10000 24.50µ ± 0% 16.74µ ± 0% -31.66% (p=0.000 n=20) MulAddVWW/100000 239.1µ ± 0% 171.1µ ± 0% -28.45% (p=0.000 n=20) geomean 159.2n 130.3n -18.18% Change-Id: I063434bc382f4f1234f879172ab671a3d6f2eb80 Reviewed-on: https://go-review.googlesource.com/c/go/+/659881 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn> Reviewed-by: Carlos Amedee <carlos@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
24daaeea09
commit
72fa8adbdc
@ -71,8 +71,35 @@ TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
JMP ·rshVU_g(SB)
|
||||
|
||||
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
JMP ·mulAddVWW_g(SB)
|
||||
// input:
|
||||
// R4: z
|
||||
// R5: z_len
|
||||
// R7: x
|
||||
// R10: y
|
||||
// R11: r
|
||||
MOVV z+0(FP), R4
|
||||
MOVV z_len+8(FP), R5
|
||||
MOVV x+24(FP), R7
|
||||
MOVV y+48(FP), R10
|
||||
MOVV r+56(FP), R11
|
||||
SLLV $3, R5
|
||||
MOVV $0, R6
|
||||
loop:
|
||||
BEQ R5, R6, done
|
||||
MOVV (R6)(R7), R8
|
||||
MULV R8, R10, R9
|
||||
MULHVU R8, R10, R12
|
||||
ADDV R9, R11, R8
|
||||
SGTU R9, R8, R11 // if (c' = lo + c) < lo then overflow
|
||||
MOVV R8, (R6)(R4)
|
||||
ADDV R12, R11
|
||||
ADDV $8, R6
|
||||
JMP loop
|
||||
done:
|
||||
MOVV R11, c+64(FP)
|
||||
RET
|
||||
|
||||
TEXT ·addMulVVWW(SB),NOSPLIT,$0
|
||||
JMP ·addMulVVWW_g(SB)
|
||||
|
Loading…
x
Reference in New Issue
Block a user