mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
math/big: remove copy responsibility from, rename shlVU, shrVU
It is annoying that non-x86 implementations of shlVU and shrVU have to go out of their way to handle the trivial case shift==0 with their own copy loops. Instead, arrange to never call them with shift==0, so that the code can be removed. Unfortunately, there are linknames of shlVU, so we cannot change that function. But we can rename the functions and then leave behind a shlVU wrapper, so do that. Since the big.Int API calls the operations Lsh and Rsh, rename shlVU/shrVU to lshVU/rshVU. Also rename various other shl/shr methods and functions to lsh/rsh. Change-Id: Ieaf54e0110a298730aa3e4566ce5be57ba7fc121 Reviewed-on: https://go-review.googlesource.com/c/go/+/664896 Reviewed-by: Alan Donovan <adonovan@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
4dffdd797b
commit
432fd9c60f
@ -143,7 +143,7 @@ func subVWlarge(z, x []Word, y Word) (c Word) {
|
||||
return
|
||||
}
|
||||
|
||||
func shlVU_g(z, x []Word, s uint) (c Word) {
|
||||
func lshVU_g(z, x []Word, s uint) (c Word) {
|
||||
if s == 0 {
|
||||
copy(z, x)
|
||||
return
|
||||
@ -162,7 +162,7 @@ func shlVU_g(z, x []Word, s uint) (c Word) {
|
||||
return
|
||||
}
|
||||
|
||||
func shrVU_g(z, x []Word, s uint) (c Word) {
|
||||
func rshVU_g(z, x []Word, s uint) (c Word) {
|
||||
if s == 0 {
|
||||
copy(z, x)
|
||||
return
|
||||
|
@ -105,8 +105,8 @@ E4: CMPL BX, BP // i < n
|
||||
RET
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
// func lshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
MOVL z_len+4(FP), BX // i = z
|
||||
SUBL $1, BX // i--
|
||||
JL X8b // i < 0 (n <= 0)
|
||||
@ -140,8 +140,8 @@ X8b: MOVL $0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
// func rshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
MOVL z_len+4(FP), BP
|
||||
SUBL $1, BP // n--
|
||||
JL X9b // n < 0 (n <= 0)
|
||||
|
@ -234,8 +234,8 @@ large:
|
||||
JMP ·subVWlarge(SB)
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
// func lshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
MOVQ z_len+8(FP), BX // i = z
|
||||
SUBQ $1, BX // i--
|
||||
JL X8b // i < 0 (n <= 0)
|
||||
@ -269,8 +269,8 @@ X8b: MOVQ $0, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
// func rshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
MOVQ z_len+8(FP), R11
|
||||
SUBQ $1, R11 // n--
|
||||
JL X9b // n < 0 (n <= 0)
|
||||
|
@ -118,8 +118,8 @@ E4:
|
||||
RET
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
// func lshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
MOVW z_len+4(FP), R5
|
||||
TEQ $0, R5
|
||||
BEQ X7
|
||||
@ -129,8 +129,6 @@ TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
ADD R5<<2, R2, R2
|
||||
ADD R5<<2, R1, R5
|
||||
MOVW s+24(FP), R3
|
||||
TEQ $0, R3 // shift 0 is special
|
||||
BEQ Y7
|
||||
ADD $4, R1 // stop one word early
|
||||
MOVW $32, R4
|
||||
SUB R3, R4
|
||||
@ -154,20 +152,15 @@ E7:
|
||||
MOVW R7, -4(R5)
|
||||
RET
|
||||
|
||||
Y7: // copy loop, because shift 0 == shift 32
|
||||
MOVW.W -4(R2), R6
|
||||
MOVW.W R6, -4(R5)
|
||||
TEQ R1, R5
|
||||
BNE Y7
|
||||
|
||||
X7:
|
||||
MOVW $0, R1
|
||||
MOVW R1, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
|
||||
// func rshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
MOVW z_len+4(FP), R5
|
||||
TEQ $0, R5
|
||||
BEQ X6
|
||||
@ -176,8 +169,6 @@ TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
MOVW x+12(FP), R2
|
||||
ADD R5<<2, R1, R5
|
||||
MOVW s+24(FP), R3
|
||||
TEQ $0, R3 // shift 0 is special
|
||||
BEQ Y6
|
||||
SUB $4, R5 // stop one word early
|
||||
MOVW $32, R4
|
||||
SUB R3, R4
|
||||
@ -203,18 +194,11 @@ E6:
|
||||
MOVW R7, 0(R1)
|
||||
RET
|
||||
|
||||
Y6: // copy loop, because shift 0 == shift 32
|
||||
MOVW.P 4(R2), R6
|
||||
MOVW.P R6, 4(R1)
|
||||
TEQ R1, R5
|
||||
BNE Y6
|
||||
|
||||
X6:
|
||||
MOVW $0, R1
|
||||
MOVW R1, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func mulAddVWW(z, x []Word, m, a Word) (c Word)
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
MOVW $0, R0
|
||||
|
@ -251,19 +251,18 @@ copy_4: // no carry flag, copy the rest
|
||||
vwOneIterCopy(R0, done)
|
||||
B copy_4
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
// func lshVU(z, x []Word, s uint) (c Word)
|
||||
// This implementation handles the shift operation from the high word to the low word,
|
||||
// which may be an error for the case where the low word of x overlaps with the high
|
||||
// word of z. When calling this function directly, you need to pay attention to this
|
||||
// situation.
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
LDP z+0(FP), (R0, R1) // R0 = z.ptr, R1 = len(z)
|
||||
MOVD x+24(FP), R2
|
||||
MOVD s+48(FP), R3
|
||||
ADD R1<<3, R0 // R0 = &z[n]
|
||||
ADD R1<<3, R2 // R2 = &x[n]
|
||||
CBZ R1, len0
|
||||
CBZ R3, copy // if the number of shift is 0, just copy x to z
|
||||
MOVD $64, R4
|
||||
SUB R3, R4
|
||||
// handling the most significant element x[n-1]
|
||||
@ -313,36 +312,16 @@ done:
|
||||
MOVD.W R8, -8(R0) // the first element x[0]
|
||||
MOVD R5, c+56(FP) // the part moved out from x[n-1]
|
||||
RET
|
||||
copy:
|
||||
CMP R0, R2
|
||||
BEQ len0
|
||||
TBZ $0, R1, ctwo
|
||||
MOVD.W -8(R2), R4
|
||||
MOVD.W R4, -8(R0)
|
||||
SUB $1, R1
|
||||
ctwo:
|
||||
TBZ $1, R1, cloop
|
||||
LDP.W -16(R2), (R4, R5)
|
||||
STP.W (R4, R5), -16(R0)
|
||||
SUB $2, R1
|
||||
cloop:
|
||||
CBZ R1, len0
|
||||
LDP.W -32(R2), (R4, R5)
|
||||
LDP 16(R2), (R6, R7)
|
||||
STP.W (R4, R5), -32(R0)
|
||||
STP (R6, R7), 16(R0)
|
||||
SUB $4, R1
|
||||
B cloop
|
||||
len0:
|
||||
MOVD $0, c+56(FP)
|
||||
RET
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
// func rshVU(z, x []Word, s uint) (c Word)
|
||||
// This implementation handles the shift operation from the low word to the high word,
|
||||
// which may be an error for the case where the high word of x overlaps with the low
|
||||
// word of z. When calling this function directly, you need to pay attention to this
|
||||
// situation.
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
MOVD z+0(FP), R0
|
||||
MOVD z_len+8(FP), R1
|
||||
MOVD x+24(FP), R2
|
||||
@ -351,7 +330,6 @@ TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
MOVD $64, R4
|
||||
SUB R3, R4
|
||||
CBZ R1, len0
|
||||
CBZ R3, copy // if the number of shift is 0, just copy x to z
|
||||
|
||||
MOVD.P 8(R2), R20
|
||||
LSR R3, R20, R8
|
||||
@ -400,26 +378,6 @@ loop:
|
||||
done:
|
||||
MOVD R8, (R0) // deal with the last element
|
||||
RET
|
||||
copy:
|
||||
CMP R0, R2
|
||||
BEQ len0
|
||||
TBZ $0, R1, ctwo
|
||||
MOVD.P 8(R2), R3
|
||||
MOVD.P R3, 8(R0)
|
||||
SUB $1, R1
|
||||
ctwo:
|
||||
TBZ $1, R1, cloop
|
||||
LDP.P 16(R2), (R4, R5)
|
||||
STP.P (R4, R5), 16(R0)
|
||||
SUB $2, R1
|
||||
cloop:
|
||||
CBZ R1, len0
|
||||
LDP.P 32(R2), (R4, R5)
|
||||
LDP -16(R2), (R6, R7)
|
||||
STP.P (R4, R5), 32(R0)
|
||||
STP (R6, R7), -16(R0)
|
||||
SUB $4, R1
|
||||
B cloop
|
||||
len0:
|
||||
MOVD $0, c+56(FP)
|
||||
RET
|
||||
|
@ -58,7 +58,7 @@ func addVW(z, x []Word, y Word) (c Word)
|
||||
//go:noescape
|
||||
func subVW(z, x []Word, y Word) (c Word)
|
||||
|
||||
// shlVU should be an internal detail,
|
||||
// shlVU should be an internal detail (and a stale one at that),
|
||||
// but widely used packages access it using linkname.
|
||||
// Notable members of the hall of shame include:
|
||||
// - github.com/remyoudompheng/bigfft
|
||||
@ -67,11 +67,23 @@ func subVW(z, x []Word, y Word) (c Word)
|
||||
// See go.dev/issue/67401.
|
||||
//
|
||||
//go:linkname shlVU
|
||||
//go:noescape
|
||||
func shlVU(z, x []Word, s uint) (c Word)
|
||||
func shlVU(z, x []Word, s uint) (c Word) {
|
||||
if s == 0 {
|
||||
copy(z, x)
|
||||
return 0
|
||||
}
|
||||
return lshVU(z, x, s)
|
||||
}
|
||||
|
||||
// lshVU sets z = x<<s, returning the high bits c. 1 ≤ s ≤ _B-1.
|
||||
//
|
||||
//go:noescape
|
||||
func shrVU(z, x []Word, s uint) (c Word)
|
||||
func lshVU(z, x []Word, s uint) (c Word)
|
||||
|
||||
// rshVU sets z = x>>s, returning the low bits c. 1 ≤ s ≤ _B-1.
|
||||
//
|
||||
//go:noescape
|
||||
func rshVU(z, x []Word, s uint) (c Word)
|
||||
|
||||
// mulAddVWW should be an internal detail,
|
||||
// but widely used packages access it using linkname.
|
||||
|
@ -32,12 +32,12 @@ func subVW(z, x []Word, y Word) (c Word) {
|
||||
return fn(z, x, y)
|
||||
}
|
||||
|
||||
func shlVU(z, x []Word, s uint) (c Word) {
|
||||
return shlVU_g(z, x, s)
|
||||
func lshVU(z, x []Word, s uint) (c Word) {
|
||||
return lshVU_g(z, x, s)
|
||||
}
|
||||
|
||||
func shrVU(z, x []Word, s uint) (c Word) {
|
||||
return shrVU_g(z, x, s)
|
||||
func rshVU(z, x []Word, s uint) (c Word) {
|
||||
return rshVU_g(z, x, s)
|
||||
}
|
||||
|
||||
func mulAddVWW(z, x []Word, y, r Word) (c Word) {
|
||||
|
@ -21,11 +21,11 @@ TEXT ·addVW(SB),NOSPLIT,$0
|
||||
TEXT ·subVW(SB),NOSPLIT,$0
|
||||
JMP ·subVW_g(SB)
|
||||
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
JMP ·shlVU_g(SB)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
JMP ·lshVU_g(SB)
|
||||
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
JMP ·shrVU_g(SB)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
JMP ·rshVU_g(SB)
|
||||
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
JMP ·mulAddVWW_g(SB)
|
||||
|
@ -21,11 +21,11 @@ TEXT ·addVW(SB),NOSPLIT,$0
|
||||
TEXT ·subVW(SB),NOSPLIT,$0
|
||||
JMP ·subVW_g(SB)
|
||||
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
JMP ·shlVU_g(SB)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
JMP ·lshVU_g(SB)
|
||||
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
JMP ·shrVU_g(SB)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
JMP ·rshVU_g(SB)
|
||||
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
JMP ·mulAddVWW_g(SB)
|
||||
|
@ -21,11 +21,11 @@ TEXT ·addVW(SB),NOSPLIT,$0
|
||||
TEXT ·subVW(SB),NOSPLIT,$0
|
||||
JMP ·subVW_g(SB)
|
||||
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
JMP ·shlVU_g(SB)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
JMP ·lshVU_g(SB)
|
||||
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
JMP ·shrVU_g(SB)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
JMP ·rshVU_g(SB)
|
||||
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
JMP ·mulAddVWW_g(SB)
|
||||
|
@ -339,15 +339,13 @@ done:
|
||||
MOVD R4, c+56(FP)
|
||||
RET
|
||||
|
||||
//func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB), NOSPLIT, $0
|
||||
//func lshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·lshVU(SB), NOSPLIT, $0
|
||||
MOVD z+0(FP), R3
|
||||
MOVD x+24(FP), R6
|
||||
MOVD s+48(FP), R9
|
||||
MOVD z_len+8(FP), R4
|
||||
MOVD x_len+32(FP), R7
|
||||
CMP R9, $0 // s==0 copy(z,x)
|
||||
BEQ zeroshift
|
||||
CMP R4, $0 // len(z)==0 return
|
||||
BEQ done
|
||||
|
||||
@ -378,51 +376,18 @@ loopexit:
|
||||
MOVD R4, 0(R3) // z[0]=x[0]<<s
|
||||
MOVD R7, c+56(FP) // store pre-computed x[len(z)-1]>>ŝ into c
|
||||
RET
|
||||
|
||||
zeroshift:
|
||||
CMP R6, $0 // x is null, nothing to copy
|
||||
BEQ done
|
||||
CMP R6, R3 // if x is same as z, nothing to copy
|
||||
BEQ done
|
||||
CMP R7, R4
|
||||
ISEL $0, R7, R4, R7 // Take the lower bound of lengths of x,z
|
||||
SLD $3, R7, R7
|
||||
SUB R6, R3, R11 // dest - src
|
||||
CMPU R11, R7, CR2 // < len?
|
||||
BLT CR2, backward // there is overlap, copy backwards
|
||||
MOVD $0, R14
|
||||
// shlVU processes backwards, but added a forward copy option
|
||||
// since its faster on POWER
|
||||
repeat:
|
||||
MOVD (R6)(R14), R15 // Copy 8 bytes at a time
|
||||
MOVD R15, (R3)(R14)
|
||||
ADD $8, R14
|
||||
CMP R14, R7 // More 8 bytes left?
|
||||
BLT repeat
|
||||
BR done
|
||||
backward:
|
||||
ADD $-8,R7, R14
|
||||
repeatback:
|
||||
MOVD (R6)(R14), R15 // copy x into z backwards
|
||||
MOVD R15, (R3)(R14) // copy 8 bytes at a time
|
||||
SUB $8, R14
|
||||
CMP R14, $-8 // More 8 bytes left?
|
||||
BGT repeatback
|
||||
|
||||
done:
|
||||
MOVD R0, c+56(FP) // c=0
|
||||
RET
|
||||
|
||||
//func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB), NOSPLIT, $0
|
||||
//func rshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·rshVU(SB), NOSPLIT, $0
|
||||
MOVD z+0(FP), R3
|
||||
MOVD x+24(FP), R6
|
||||
MOVD s+48(FP), R9
|
||||
MOVD z_len+8(FP), R4
|
||||
MOVD x_len+32(FP), R7
|
||||
|
||||
CMP R9, $0 // s==0, copy(z,x)
|
||||
BEQ zeroshift
|
||||
CMP R4, $0 // len(z)==0 return
|
||||
BEQ done
|
||||
SUBC R9, $64, R5 // ŝ=_W-s, we skip & by _W-1 as the caller ensures s < _W(64)
|
||||
@ -476,22 +441,6 @@ loopexit:
|
||||
MOVD R5, (R3)(R4) // z[len(z)-1]=x[len(z)-1]>>s
|
||||
MOVD R7, c+56(FP) // store pre-computed x[0]<<ŝ into c
|
||||
RET
|
||||
|
||||
zeroshift:
|
||||
CMP R6, $0 // x is null, nothing to copy
|
||||
BEQ done
|
||||
CMP R6, R3 // if x is same as z, nothing to copy
|
||||
BEQ done
|
||||
CMP R7, R4
|
||||
ISEL $0, R7, R4, R7 // Take the lower bounds of lengths of x, z
|
||||
SLD $3, R7, R7
|
||||
MOVD $0, R14
|
||||
repeat:
|
||||
MOVD (R6)(R14), R15 // copy 8 bytes at a time
|
||||
MOVD R15, (R3)(R14) // shrVU processes bytes only forwards
|
||||
ADD $8, R14
|
||||
CMP R14, R7 // More 8 bytes left?
|
||||
BLT repeat
|
||||
done:
|
||||
MOVD R0, c+56(FP)
|
||||
RET
|
||||
|
@ -293,11 +293,11 @@ done:
|
||||
MOV X29, c+56(FP) // return b
|
||||
RET
|
||||
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
JMP ·shlVU_g(SB)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
JMP ·lshVU_g(SB)
|
||||
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
JMP ·shrVU_g(SB)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
JMP ·rshVU_g(SB)
|
||||
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
MOV x+24(FP), X5
|
||||
|
@ -682,13 +682,13 @@ returnC:
|
||||
MOVD R7, c+56(FP)
|
||||
RET
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB), NOSPLIT, $0
|
||||
BR ·shlVU_g(SB)
|
||||
// func lshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·lshVU(SB), NOSPLIT, $0
|
||||
BR ·lshVU_g(SB)
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB), NOSPLIT, $0
|
||||
BR ·shrVU_g(SB)
|
||||
// func rshVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·rshVU(SB), NOSPLIT, $0
|
||||
BR ·rshVU_g(SB)
|
||||
|
||||
// CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, DX = r3, AX = r6, BX = R1, (R0 set to 0) + use R11 + use R7 for i
|
||||
// func mulAddVWW(z, x []Word, m, a Word) (c Word)
|
||||
|
@ -136,32 +136,26 @@ var sumVW = []argVW{
|
||||
{nat{585}, nat{314}, 271, 0},
|
||||
}
|
||||
|
||||
var lshVW = []argVW{
|
||||
var lshVWTests = []argVW{
|
||||
{},
|
||||
{nat{0}, nat{0}, 0, 0},
|
||||
{nat{0}, nat{0}, 1, 0},
|
||||
{nat{0}, nat{0}, 20, 0},
|
||||
|
||||
{nat{_M}, nat{_M}, 0, 0},
|
||||
{nat{_M << 1 & _M}, nat{_M}, 1, 1},
|
||||
{nat{_M << 20 & _M}, nat{_M}, 20, _M >> (_W - 20)},
|
||||
|
||||
{nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0},
|
||||
{nat{_M << 1 & _M, _M, _M}, nat{_M, _M, _M}, 1, 1},
|
||||
{nat{_M << 20 & _M, _M, _M}, nat{_M, _M, _M}, 20, _M >> (_W - 20)},
|
||||
}
|
||||
|
||||
var rshVW = []argVW{
|
||||
var rshVWTests = []argVW{
|
||||
{},
|
||||
{nat{0}, nat{0}, 0, 0},
|
||||
{nat{0}, nat{0}, 1, 0},
|
||||
{nat{0}, nat{0}, 20, 0},
|
||||
|
||||
{nat{_M}, nat{_M}, 0, 0},
|
||||
{nat{_M >> 1}, nat{_M}, 1, _M << (_W - 1) & _M},
|
||||
{nat{_M >> 20}, nat{_M}, 20, _M << (_W - 20) & _M},
|
||||
|
||||
{nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0},
|
||||
{nat{_M, _M, _M >> 1}, nat{_M, _M, _M}, 1, _M << (_W - 1) & _M},
|
||||
{nat{_M, _M, _M >> 20}, nat{_M, _M, _M}, 20, _M << (_W - 20) & _M},
|
||||
}
|
||||
@ -214,20 +208,20 @@ func TestFunVW(t *testing.T) {
|
||||
testFunVW(t, "subVW", subVW, arg)
|
||||
}
|
||||
|
||||
shlVW_g := makeFunVW(shlVU_g)
|
||||
shlVW := makeFunVW(shlVU)
|
||||
for _, a := range lshVW {
|
||||
lshVW_g := makeFunVW(lshVU_g)
|
||||
lshVW := makeFunVW(lshVU)
|
||||
for _, a := range lshVWTests {
|
||||
arg := a
|
||||
testFunVW(t, "shlVU_g", shlVW_g, arg)
|
||||
testFunVW(t, "shlVU", shlVW, arg)
|
||||
testFunVW(t, "lshVU_g", lshVW_g, arg)
|
||||
testFunVW(t, "lshVU", lshVW, arg)
|
||||
}
|
||||
|
||||
shrVW_g := makeFunVW(shrVU_g)
|
||||
shrVW := makeFunVW(shrVU)
|
||||
for _, a := range rshVW {
|
||||
rshVW_g := makeFunVW(rshVU_g)
|
||||
rshVW := makeFunVW(rshVU)
|
||||
for _, a := range rshVWTests {
|
||||
arg := a
|
||||
testFunVW(t, "shrVU_g", shrVW_g, arg)
|
||||
testFunVW(t, "shrVU", shrVW, arg)
|
||||
testFunVW(t, "rshVU_g", rshVW_g, arg)
|
||||
testFunVW(t, "rshVU", rshVW, arg)
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,56 +279,48 @@ type argVU struct {
|
||||
m string // message.
|
||||
}
|
||||
|
||||
var argshlVUIn = []Word{1, 2, 4, 8, 16, 32, 64, 0, 0, 0}
|
||||
var argshlVUr0 = []Word{1, 2, 4, 8, 16, 32, 64}
|
||||
var argshlVUr1 = []Word{2, 4, 8, 16, 32, 64, 128}
|
||||
var argshlVUrWm1 = []Word{1 << (_W - 1), 0, 1, 2, 4, 8, 16}
|
||||
var arglshVUIn = []Word{1, 2, 4, 8, 16, 32, 64, 0, 0, 0}
|
||||
var arglshVUr0 = []Word{1, 2, 4, 8, 16, 32, 64}
|
||||
var arglshVUr1 = []Word{2, 4, 8, 16, 32, 64, 128}
|
||||
var arglshVUrWm1 = []Word{1 << (_W - 1), 0, 1, 2, 4, 8, 16}
|
||||
|
||||
var argshlVU = []argVU{
|
||||
// test cases for shlVU
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of shlVU"},
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of shlVU"},
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of shlVU"},
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of shlVU"},
|
||||
// additional test cases with shift values of 0, 1 and (_W-1)
|
||||
{argshlVUIn, 7, 0, 0, 0, argshlVUr0, 0, "complete overlap of shlVU and shift of 0"},
|
||||
{argshlVUIn, 7, 0, 0, 1, argshlVUr1, 0, "complete overlap of shlVU and shift of 1"},
|
||||
{argshlVUIn, 7, 0, 0, _W - 1, argshlVUrWm1, 32, "complete overlap of shlVU and shift of _W - 1"},
|
||||
{argshlVUIn, 7, 0, 1, 0, argshlVUr0, 0, "partial overlap by 6 Words of shlVU and shift of 0"},
|
||||
{argshlVUIn, 7, 0, 1, 1, argshlVUr1, 0, "partial overlap by 6 Words of shlVU and shift of 1"},
|
||||
{argshlVUIn, 7, 0, 1, _W - 1, argshlVUrWm1, 32, "partial overlap by 6 Words of shlVU and shift of _W - 1"},
|
||||
{argshlVUIn, 7, 0, 2, 0, argshlVUr0, 0, "partial overlap by 5 Words of shlVU and shift of 0"},
|
||||
{argshlVUIn, 7, 0, 2, 1, argshlVUr1, 0, "partial overlap by 5 Words of shlVU and shift of 1"},
|
||||
{argshlVUIn, 7, 0, 2, _W - 1, argshlVUrWm1, 32, "partial overlap by 5 Words of shlVU abd shift of _W - 1"},
|
||||
{argshlVUIn, 7, 0, 3, 0, argshlVUr0, 0, "partial overlap by 4 Words of shlVU and shift of 0"},
|
||||
{argshlVUIn, 7, 0, 3, 1, argshlVUr1, 0, "partial overlap by 4 Words of shlVU and shift of 1"},
|
||||
{argshlVUIn, 7, 0, 3, _W - 1, argshlVUrWm1, 32, "partial overlap by 4 Words of shlVU and shift of _W - 1"},
|
||||
var arglshVU = []argVU{
|
||||
// test cases for lshVU
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of lshVU"},
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of lshVU"},
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of lshVU"},
|
||||
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of lshVU"},
|
||||
// additional test cases with shift values of 1 and (_W-1)
|
||||
{arglshVUIn, 7, 0, 0, 1, arglshVUr1, 0, "complete overlap of lshVU and shift of 1"},
|
||||
{arglshVUIn, 7, 0, 0, _W - 1, arglshVUrWm1, 32, "complete overlap of lshVU and shift of _W - 1"},
|
||||
{arglshVUIn, 7, 0, 1, 1, arglshVUr1, 0, "partial overlap by 6 Words of lshVU and shift of 1"},
|
||||
{arglshVUIn, 7, 0, 1, _W - 1, arglshVUrWm1, 32, "partial overlap by 6 Words of lshVU and shift of _W - 1"},
|
||||
{arglshVUIn, 7, 0, 2, 1, arglshVUr1, 0, "partial overlap by 5 Words of lshVU and shift of 1"},
|
||||
{arglshVUIn, 7, 0, 2, _W - 1, arglshVUrWm1, 32, "partial overlap by 5 Words of lshVU abd shift of _W - 1"},
|
||||
{arglshVUIn, 7, 0, 3, 1, arglshVUr1, 0, "partial overlap by 4 Words of lshVU and shift of 1"},
|
||||
{arglshVUIn, 7, 0, 3, _W - 1, arglshVUrWm1, 32, "partial overlap by 4 Words of lshVU and shift of _W - 1"},
|
||||
}
|
||||
|
||||
var argshrVUIn = []Word{0, 0, 0, 1, 2, 4, 8, 16, 32, 64}
|
||||
var argshrVUr0 = []Word{1, 2, 4, 8, 16, 32, 64}
|
||||
var argshrVUr1 = []Word{0, 1, 2, 4, 8, 16, 32}
|
||||
var argshrVUrWm1 = []Word{4, 8, 16, 32, 64, 128, 0}
|
||||
var argrshVUIn = []Word{0, 0, 0, 1, 2, 4, 8, 16, 32, 64}
|
||||
var argrshVUr0 = []Word{1, 2, 4, 8, 16, 32, 64}
|
||||
var argrshVUr1 = []Word{0, 1, 2, 4, 8, 16, 32}
|
||||
var argrshVUrWm1 = []Word{4, 8, 16, 32, 64, 128, 0}
|
||||
|
||||
var argshrVU = []argVU{
|
||||
// test cases for shrVU
|
||||
{[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of shrVU"},
|
||||
{[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of shrVU"},
|
||||
{[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of shrVU"},
|
||||
{[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of shrVU"},
|
||||
var argrshVU = []argVU{
|
||||
// test cases for rshVU
|
||||
{[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of rshVU"},
|
||||
{[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of rshVU"},
|
||||
{[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of rshVU"},
|
||||
{[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of rshVU"},
|
||||
// additional test cases with shift values of 0, 1 and (_W-1)
|
||||
{argshrVUIn, 7, 3, 3, 0, argshrVUr0, 0, "complete overlap of shrVU and shift of 0"},
|
||||
{argshrVUIn, 7, 3, 3, 1, argshrVUr1, 1 << (_W - 1), "complete overlap of shrVU and shift of 1"},
|
||||
{argshrVUIn, 7, 3, 3, _W - 1, argshrVUrWm1, 2, "complete overlap of shrVU and shift of _W - 1"},
|
||||
{argshrVUIn, 7, 3, 2, 0, argshrVUr0, 0, "partial overlap by 6 Words of shrVU and shift of 0"},
|
||||
{argshrVUIn, 7, 3, 2, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 6 Words of shrVU and shift of 1"},
|
||||
{argshrVUIn, 7, 3, 2, _W - 1, argshrVUrWm1, 2, "partial overlap by 6 Words of shrVU and shift of _W - 1"},
|
||||
{argshrVUIn, 7, 3, 1, 0, argshrVUr0, 0, "partial overlap by 5 Words of shrVU and shift of 0"},
|
||||
{argshrVUIn, 7, 3, 1, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 5 Words of shrVU and shift of 1"},
|
||||
{argshrVUIn, 7, 3, 1, _W - 1, argshrVUrWm1, 2, "partial overlap by 5 Words of shrVU and shift of _W - 1"},
|
||||
{argshrVUIn, 7, 3, 0, 0, argshrVUr0, 0, "partial overlap by 4 Words of shrVU and shift of 0"},
|
||||
{argshrVUIn, 7, 3, 0, 1, argshrVUr1, 1 << (_W - 1), "partial overlap by 4 Words of shrVU and shift of 1"},
|
||||
{argshrVUIn, 7, 3, 0, _W - 1, argshrVUrWm1, 2, "partial overlap by 4 Words of shrVU and shift of _W - 1"},
|
||||
{argrshVUIn, 7, 3, 3, 1, argrshVUr1, 1 << (_W - 1), "complete overlap of rshVU and shift of 1"},
|
||||
{argrshVUIn, 7, 3, 3, _W - 1, argrshVUrWm1, 2, "complete overlap of rshVU and shift of _W - 1"},
|
||||
{argrshVUIn, 7, 3, 2, 1, argrshVUr1, 1 << (_W - 1), "partial overlap by 6 Words of rshVU and shift of 1"},
|
||||
{argrshVUIn, 7, 3, 2, _W - 1, argrshVUrWm1, 2, "partial overlap by 6 Words of rshVU and shift of _W - 1"},
|
||||
{argrshVUIn, 7, 3, 1, 1, argrshVUr1, 1 << (_W - 1), "partial overlap by 5 Words of rshVU and shift of 1"},
|
||||
{argrshVUIn, 7, 3, 1, _W - 1, argrshVUrWm1, 2, "partial overlap by 5 Words of rshVU and shift of _W - 1"},
|
||||
{argrshVUIn, 7, 3, 0, 1, argrshVUr1, 1 << (_W - 1), "partial overlap by 4 Words of rshVU and shift of 1"},
|
||||
{argrshVUIn, 7, 3, 0, _W - 1, argrshVUrWm1, 2, "partial overlap by 4 Words of rshVU and shift of _W - 1"},
|
||||
}
|
||||
|
||||
func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) {
|
||||
@ -346,24 +332,24 @@ func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) {
|
||||
c := f(z, x, a.s)
|
||||
for i, zi := range z {
|
||||
if zi != a.r[i] {
|
||||
t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
|
||||
t.Errorf("d := %v, %s (d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
|
||||
break
|
||||
}
|
||||
}
|
||||
if c != a.c {
|
||||
t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
|
||||
t.Errorf("d := %v, %s (d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShiftOverlap(t *testing.T) {
|
||||
for _, a := range argshlVU {
|
||||
for _, a := range arglshVU {
|
||||
arg := a
|
||||
testShiftFunc(t, shlVU, arg)
|
||||
testShiftFunc(t, lshVU, arg)
|
||||
}
|
||||
|
||||
for _, a := range argshrVU {
|
||||
for _, a := range argrshVU {
|
||||
arg := a
|
||||
testShiftFunc(t, shrVU, arg)
|
||||
testShiftFunc(t, rshVU, arg)
|
||||
}
|
||||
}
|
||||
|
||||
@ -374,11 +360,11 @@ func TestIssue31084(t *testing.T) {
|
||||
// compute 10^n via 5^n << n.
|
||||
const n = 165
|
||||
p := nat(nil).expNN(stk, nat{5}, nat{n}, nil, false)
|
||||
p = p.shl(p, n)
|
||||
p = p.lsh(p, n)
|
||||
got := string(p.utoa(10))
|
||||
want := "1" + strings.Repeat("0", n)
|
||||
if got != want {
|
||||
t.Errorf("shl(%v, %v)\n\tgot %s\n\twant %s", p, n, got, want)
|
||||
t.Errorf("lsh(%v, %v)\n\tgot %s\n\twant %s", p, n, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
@ -387,11 +373,11 @@ const issue42838Value = "1593091911132452277028880397767711805591104555192618786
|
||||
func TestIssue42838(t *testing.T) {
|
||||
const s = 192
|
||||
z, _, _, _ := nat(nil).scan(strings.NewReader(issue42838Value), 0, false)
|
||||
z = z.shl(z, s)
|
||||
z = z.lsh(z, s)
|
||||
got := string(z.utoa(10))
|
||||
want := "1" + strings.Repeat("0", s)
|
||||
if got != want {
|
||||
t.Errorf("shl(%v, %v)\n\tgot %s\n\twant %s", z, s, got, want)
|
||||
t.Errorf("lsh(%v, %v)\n\tgot %s\n\twant %s", z, s, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
@ -687,14 +673,14 @@ func BenchmarkNonZeroShifts(b *testing.B) {
|
||||
z := make([]Word, n)
|
||||
b.Run(fmt.Sprint(n), func(b *testing.B) {
|
||||
b.SetBytes(int64(n * _W))
|
||||
b.Run("shrVU", func(b *testing.B) {
|
||||
b.Run("rshVU", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = shrVU(z, x, s)
|
||||
_ = rshVU(z, x, s)
|
||||
}
|
||||
})
|
||||
b.Run("shlVU", func(b *testing.B) {
|
||||
b.Run("lshVU", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = shlVU(z, x, s)
|
||||
_ = lshVU(z, x, s)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
@ -18,11 +18,11 @@ TEXT ·addVW(SB),NOSPLIT,$0
|
||||
TEXT ·subVW(SB),NOSPLIT,$0
|
||||
JMP ·subVW_g(SB)
|
||||
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
JMP ·shlVU_g(SB)
|
||||
TEXT ·lshVU(SB),NOSPLIT,$0
|
||||
JMP ·lshVU_g(SB)
|
||||
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
JMP ·shrVU_g(SB)
|
||||
TEXT ·rshVU(SB),NOSPLIT,$0
|
||||
JMP ·rshVU_g(SB)
|
||||
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
JMP ·mulAddVWW_g(SB)
|
||||
|
@ -69,13 +69,13 @@ func (x *decimal) init(m nat, shift int) {
|
||||
if s >= ntz {
|
||||
s = ntz // shift at most ntz bits
|
||||
}
|
||||
m = nat(nil).shr(m, s)
|
||||
m = nat(nil).rsh(m, s)
|
||||
shift += int(s)
|
||||
}
|
||||
|
||||
// Do any shift left in binary representation.
|
||||
if shift > 0 {
|
||||
m = nat(nil).shl(m, uint(shift))
|
||||
m = nat(nil).lsh(m, uint(shift))
|
||||
shift = 0
|
||||
}
|
||||
|
||||
@ -93,15 +93,15 @@ func (x *decimal) init(m nat, shift int) {
|
||||
// Do any (remaining) shift right in decimal representation.
|
||||
if shift < 0 {
|
||||
for shift < -maxShift {
|
||||
shr(x, maxShift)
|
||||
rsh(x, maxShift)
|
||||
shift += maxShift
|
||||
}
|
||||
shr(x, uint(-shift))
|
||||
rsh(x, uint(-shift))
|
||||
}
|
||||
}
|
||||
|
||||
// shr implements x >> s, for s <= maxShift.
|
||||
func shr(x *decimal, s uint) {
|
||||
// rsh implements x >> s, for s <= maxShift.
|
||||
func rsh(x *decimal, s uint) {
|
||||
// Division by 1<<s using shift-and-subtract algorithm.
|
||||
|
||||
// pick up enough leading digits to cover first shift
|
||||
|
@ -488,7 +488,7 @@ func (z *Float) round(sbit uint) {
|
||||
}
|
||||
z.exp++
|
||||
// adjust mantissa: divide by 2 to compensate for exponent adjustment
|
||||
shrVU(z.mant, z.mant, 1)
|
||||
rshVU(z.mant, z.mant, 1)
|
||||
// set msb == carry == 1 from the mantissa overflow above
|
||||
const msb = 1 << (_W - 1)
|
||||
z.mant[n-1] |= msb
|
||||
@ -585,9 +585,9 @@ func fnorm(m nat) int64 {
|
||||
}
|
||||
s := nlz(m[len(m)-1])
|
||||
if s > 0 {
|
||||
c := shlVU(m, m, s)
|
||||
c := lshVU(m, m, s)
|
||||
if debugFloat && c != 0 {
|
||||
panic("nlz or shlVU incorrect")
|
||||
panic("nlz or lshVU incorrect")
|
||||
}
|
||||
}
|
||||
return int64(s)
|
||||
@ -1110,11 +1110,11 @@ func (x *Float) Int(z *Int) (*Int, Accuracy) {
|
||||
z.neg = x.neg
|
||||
switch {
|
||||
case exp > allBits:
|
||||
z.abs = z.abs.shl(x.mant, exp-allBits)
|
||||
z.abs = z.abs.lsh(x.mant, exp-allBits)
|
||||
default:
|
||||
z.abs = z.abs.set(x.mant)
|
||||
case exp < allBits:
|
||||
z.abs = z.abs.shr(x.mant, allBits-exp)
|
||||
z.abs = z.abs.rsh(x.mant, allBits-exp)
|
||||
}
|
||||
return z, acc
|
||||
|
||||
@ -1150,7 +1150,7 @@ func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
|
||||
z.a.neg = x.neg
|
||||
switch {
|
||||
case x.exp > allBits:
|
||||
z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
|
||||
z.a.abs = z.a.abs.lsh(x.mant, uint(x.exp-allBits))
|
||||
z.b.abs = z.b.abs[:0] // == 1 (see Rat)
|
||||
// z already in normal form
|
||||
default:
|
||||
@ -1160,7 +1160,7 @@ func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
|
||||
case x.exp < allBits:
|
||||
z.a.abs = z.a.abs.set(x.mant)
|
||||
t := z.b.abs.setUint64(1)
|
||||
z.b.abs = t.shl(t, uint(allBits-x.exp))
|
||||
z.b.abs = t.lsh(t, uint(allBits-x.exp))
|
||||
z.norm()
|
||||
}
|
||||
return z, Exact
|
||||
@ -1234,10 +1234,10 @@ func (z *Float) uadd(x, y *Float) {
|
||||
switch {
|
||||
case ex < ey:
|
||||
if al {
|
||||
t := nat(nil).shl(y.mant, uint(ey-ex))
|
||||
t := nat(nil).lsh(y.mant, uint(ey-ex))
|
||||
z.mant = z.mant.add(x.mant, t)
|
||||
} else {
|
||||
z.mant = z.mant.shl(y.mant, uint(ey-ex))
|
||||
z.mant = z.mant.lsh(y.mant, uint(ey-ex))
|
||||
z.mant = z.mant.add(x.mant, z.mant)
|
||||
}
|
||||
default:
|
||||
@ -1245,10 +1245,10 @@ func (z *Float) uadd(x, y *Float) {
|
||||
z.mant = z.mant.add(x.mant, y.mant)
|
||||
case ex > ey:
|
||||
if al {
|
||||
t := nat(nil).shl(x.mant, uint(ex-ey))
|
||||
t := nat(nil).lsh(x.mant, uint(ex-ey))
|
||||
z.mant = z.mant.add(t, y.mant)
|
||||
} else {
|
||||
z.mant = z.mant.shl(x.mant, uint(ex-ey))
|
||||
z.mant = z.mant.lsh(x.mant, uint(ex-ey))
|
||||
z.mant = z.mant.add(z.mant, y.mant)
|
||||
}
|
||||
ex = ey
|
||||
@ -1279,10 +1279,10 @@ func (z *Float) usub(x, y *Float) {
|
||||
switch {
|
||||
case ex < ey:
|
||||
if al {
|
||||
t := nat(nil).shl(y.mant, uint(ey-ex))
|
||||
t := nat(nil).lsh(y.mant, uint(ey-ex))
|
||||
z.mant = t.sub(x.mant, t)
|
||||
} else {
|
||||
z.mant = z.mant.shl(y.mant, uint(ey-ex))
|
||||
z.mant = z.mant.lsh(y.mant, uint(ey-ex))
|
||||
z.mant = z.mant.sub(x.mant, z.mant)
|
||||
}
|
||||
default:
|
||||
@ -1290,10 +1290,10 @@ func (z *Float) usub(x, y *Float) {
|
||||
z.mant = z.mant.sub(x.mant, y.mant)
|
||||
case ex > ey:
|
||||
if al {
|
||||
t := nat(nil).shl(x.mant, uint(ex-ey))
|
||||
t := nat(nil).lsh(x.mant, uint(ex-ey))
|
||||
z.mant = t.sub(t, y.mant)
|
||||
} else {
|
||||
z.mant = z.mant.shl(x.mant, uint(ex-ey))
|
||||
z.mant = z.mant.lsh(x.mant, uint(ex-ey))
|
||||
z.mant = z.mant.sub(z.mant, y.mant)
|
||||
}
|
||||
ex = ey
|
||||
|
@ -188,9 +188,9 @@ func roundShortest(d *decimal, x *Float) {
|
||||
s := mant.bitLen() - int(x.prec+1)
|
||||
switch {
|
||||
case s < 0:
|
||||
mant = mant.shl(mant, uint(-s))
|
||||
mant = mant.lsh(mant, uint(-s))
|
||||
case s > 0:
|
||||
mant = mant.shr(mant, uint(+s))
|
||||
mant = mant.rsh(mant, uint(+s))
|
||||
}
|
||||
exp += s
|
||||
// x = mant * 2**exp with lsb(mant) == 1/2 ulp of x.prec
|
||||
@ -329,9 +329,9 @@ func (x *Float) fmtB(buf []byte) []byte {
|
||||
m := x.mant
|
||||
switch w := uint32(len(x.mant)) * _W; {
|
||||
case w < x.prec:
|
||||
m = nat(nil).shl(m, uint(x.prec-w))
|
||||
m = nat(nil).lsh(m, uint(x.prec-w))
|
||||
case w > x.prec:
|
||||
m = nat(nil).shr(m, uint(w-x.prec))
|
||||
m = nat(nil).rsh(m, uint(w-x.prec))
|
||||
}
|
||||
|
||||
buf = append(buf, m.utoa(10)...)
|
||||
@ -380,9 +380,9 @@ func (x *Float) fmtX(buf []byte, prec int) []byte {
|
||||
m := x.mant
|
||||
switch w := uint(len(x.mant)) * _W; {
|
||||
case w < n:
|
||||
m = nat(nil).shl(m, n-w)
|
||||
m = nat(nil).lsh(m, n-w)
|
||||
case w > n:
|
||||
m = nat(nil).shr(m, w-n)
|
||||
m = nat(nil).rsh(m, w-n)
|
||||
}
|
||||
exp64 := int64(x.exp) - 1 // avoid wrap-around
|
||||
|
||||
|
@ -1097,7 +1097,7 @@ func (z *Int) ModSqrt(x, p *Int) *Int {
|
||||
|
||||
// Lsh sets z = x << n and returns z.
|
||||
func (z *Int) Lsh(x *Int, n uint) *Int {
|
||||
z.abs = z.abs.shl(x.abs, n)
|
||||
z.abs = z.abs.lsh(x.abs, n)
|
||||
z.neg = x.neg
|
||||
return z
|
||||
}
|
||||
@ -1107,13 +1107,13 @@ func (z *Int) Rsh(x *Int, n uint) *Int {
|
||||
if x.neg {
|
||||
// (-x) >> s == ^(x-1) >> s == ^((x-1) >> s) == -(((x-1) >> s) + 1)
|
||||
t := z.abs.sub(x.abs, natOne) // no underflow because |x| > 0
|
||||
t = t.shr(t, n)
|
||||
t = t.rsh(t, n)
|
||||
z.abs = t.add(t, natOne)
|
||||
z.neg = true // z cannot be zero if x is negative
|
||||
return z
|
||||
}
|
||||
|
||||
z.abs = z.abs.shr(x.abs, n)
|
||||
z.abs = z.abs.rsh(x.abs, n)
|
||||
z.neg = false
|
||||
return z
|
||||
}
|
||||
|
@ -1614,7 +1614,7 @@ func TestModInverse(t *testing.T) {
|
||||
|
||||
func BenchmarkModInverse(b *testing.B) {
|
||||
p := new(Int).SetInt64(1) // Mersenne prime 2**1279 -1
|
||||
p.abs = p.abs.shl(p.abs, 1279)
|
||||
p.abs = p.abs.lsh(p.abs, 1279)
|
||||
p.Sub(p, intOne)
|
||||
x := new(Int).Sub(p, intOne)
|
||||
z := new(Int)
|
||||
|
@ -380,7 +380,7 @@ func same(x, y nat) bool {
|
||||
}
|
||||
|
||||
// z = x << s
|
||||
func (z nat) shl(x nat, s uint) nat {
|
||||
func (z nat) lsh(x nat, s uint) nat {
|
||||
if s == 0 {
|
||||
if same(z, x) {
|
||||
return z
|
||||
@ -398,14 +398,19 @@ func (z nat) shl(x nat, s uint) nat {
|
||||
|
||||
n := m + int(s/_W)
|
||||
z = z.make(n + 1)
|
||||
z[n] = shlVU(z[n-m:n], x, s%_W)
|
||||
if s %= _W; s == 0 {
|
||||
copy(z[n-m:n], x)
|
||||
z[n] = 0
|
||||
} else {
|
||||
z[n] = lshVU(z[n-m:n], x, s)
|
||||
}
|
||||
clear(z[0 : n-m])
|
||||
|
||||
return z.norm()
|
||||
}
|
||||
|
||||
// z = x >> s
|
||||
func (z nat) shr(x nat, s uint) nat {
|
||||
func (z nat) rsh(x nat, s uint) nat {
|
||||
if s == 0 {
|
||||
if same(z, x) {
|
||||
return z
|
||||
@ -423,7 +428,11 @@ func (z nat) shr(x nat, s uint) nat {
|
||||
// n > 0
|
||||
|
||||
z = z.make(n)
|
||||
shrVU(z, x[m-n:], s%_W)
|
||||
if s %= _W; s == 0 {
|
||||
copy(z, x[m-n:])
|
||||
} else {
|
||||
rshVU(z, x[m-n:], s)
|
||||
}
|
||||
|
||||
return z.norm()
|
||||
}
|
||||
@ -745,8 +754,8 @@ func (z nat) expNN(stk *stack, x, y, m nat, slow bool) nat {
|
||||
func (z nat) expNNMontgomeryEven(stk *stack, x, y, m nat) nat {
|
||||
// Split m = m₁ × m₂ where m₁ = 2ⁿ
|
||||
n := m.trailingZeroBits()
|
||||
m1 := nat(nil).shl(natOne, n)
|
||||
m2 := nat(nil).shr(m, n)
|
||||
m1 := nat(nil).lsh(natOne, n)
|
||||
m2 := nat(nil).rsh(m, n)
|
||||
|
||||
// We want z = x**y mod m.
|
||||
// z₁ = x**y mod m1 = (x**y mod m) mod m1 = z mod m1
|
||||
@ -906,7 +915,7 @@ func (z nat) expNNMontgomery(stk *stack, x, y, m nat) nat {
|
||||
|
||||
// RR = 2**(2*_W*len(m)) mod m
|
||||
RR := nat(nil).setWord(1)
|
||||
zz := nat(nil).shl(RR, uint(2*numWords*_W))
|
||||
zz := nat(nil).lsh(RR, uint(2*numWords*_W))
|
||||
_, RR = nat(nil).div(stk, RR, zz, m)
|
||||
if len(RR) < numWords {
|
||||
zz = zz.make(numWords)
|
||||
@ -1053,11 +1062,11 @@ func (z nat) sqrt(stk *stack, x nat) nat {
|
||||
var z1, z2 nat
|
||||
z1 = z
|
||||
z1 = z1.setUint64(1)
|
||||
z1 = z1.shl(z1, uint(x.bitLen()+1)/2) // must be ≥ √x
|
||||
z1 = z1.lsh(z1, uint(x.bitLen()+1)/2) // must be ≥ √x
|
||||
for n := 0; ; n++ {
|
||||
z2, _ = z2.div(stk, nil, x, z1)
|
||||
z2 = z2.add(z2, z1)
|
||||
z2 = z2.shr(z2, 1)
|
||||
z2 = z2.rsh(z2, 1)
|
||||
if z2.cmp(z1) >= 0 {
|
||||
// z1 is answer.
|
||||
// Figure out whether z1 or z2 is currently aliased to z by looking at loop count.
|
||||
|
@ -430,7 +430,7 @@ var leftShiftTests = []shiftTest{
|
||||
func TestShiftLeft(t *testing.T) {
|
||||
for i, test := range leftShiftTests {
|
||||
var z nat
|
||||
z = z.shl(test.in, test.shift)
|
||||
z = z.lsh(test.in, test.shift)
|
||||
for j, d := range test.out {
|
||||
if j >= len(z) || z[j] != d {
|
||||
t.Errorf("#%d: got: %v want: %v", i, z, test.out)
|
||||
@ -453,7 +453,7 @@ var rightShiftTests = []shiftTest{
|
||||
func TestShiftRight(t *testing.T) {
|
||||
for i, test := range rightShiftTests {
|
||||
var z nat
|
||||
z = z.shr(test.in, test.shift)
|
||||
z = z.rsh(test.in, test.shift)
|
||||
for j, d := range test.out {
|
||||
if j >= len(z) || z[j] != d {
|
||||
t.Errorf("#%d: got: %v want: %v", i, z, test.out)
|
||||
@ -469,24 +469,24 @@ func BenchmarkZeroShifts(b *testing.B) {
|
||||
b.Run("Shl", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var z nat
|
||||
z.shl(x, 0)
|
||||
z.lsh(x, 0)
|
||||
}
|
||||
})
|
||||
b.Run("ShlSame", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
x.shl(x, 0)
|
||||
x.lsh(x, 0)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("Shr", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var z nat
|
||||
z.shr(x, 0)
|
||||
z.rsh(x, 0)
|
||||
}
|
||||
})
|
||||
b.Run("ShrSame", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
x.shr(x, 0)
|
||||
x.rsh(x, 0)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
|
||||
slices.Reverse(z)
|
||||
z = z.norm()
|
||||
if i > 0 {
|
||||
z = z.shr(z, uint(n-i)*uint(_W/n))
|
||||
z = z.rsh(z, uint(n-i)*uint(_W/n))
|
||||
}
|
||||
} else {
|
||||
if i > 0 {
|
||||
|
@ -605,9 +605,15 @@ func (z nat) divLarge(stk *stack, u, uIn, vIn nat) (q, r nat) {
|
||||
defer stk.restore(stk.save())
|
||||
shift := nlz(vIn[n-1])
|
||||
v := stk.nat(n)
|
||||
shlVU(v, vIn, shift)
|
||||
u = u.make(len(uIn) + 1)
|
||||
u[len(uIn)] = shlVU(u[:len(uIn)], uIn, shift)
|
||||
if shift == 0 {
|
||||
copy(v, vIn)
|
||||
copy(u[:len(uIn)], uIn)
|
||||
u[len(uIn)] = 0
|
||||
} else {
|
||||
lshVU(v, vIn, shift)
|
||||
u[len(uIn)] = lshVU(u[:len(uIn)], uIn, shift)
|
||||
}
|
||||
|
||||
// The caller should not pass aliased z and u, since those are
|
||||
// the two different outputs, but correct just in case.
|
||||
@ -626,7 +632,9 @@ func (z nat) divLarge(stk *stack, u, uIn, vIn nat) (q, r nat) {
|
||||
q = q.norm()
|
||||
|
||||
// Undo scaling of remainder.
|
||||
shrVU(u, u, shift)
|
||||
if shift != 0 {
|
||||
rshVU(u, u, shift)
|
||||
}
|
||||
r = u.norm()
|
||||
|
||||
return q, r
|
||||
|
@ -128,7 +128,7 @@ func basicSqr(stk *stack, z, x nat) {
|
||||
// t collects the products x[i] * x[j] where j < i
|
||||
t[2*i] = addMulVVWW(t[i:2*i], t[i:2*i], x[0:i], d, 0)
|
||||
}
|
||||
t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products
|
||||
t[2*n-1] = lshVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products
|
||||
addVV(z, z, t) // combine the result
|
||||
}
|
||||
|
||||
|
@ -89,7 +89,7 @@ func (n nat) probablyPrimeMillerRabin(stk *stack, reps int, force2 bool) bool {
|
||||
nm1 := nat(nil).sub(n, natOne)
|
||||
// determine q, k such that nm1 = q << k
|
||||
k := nm1.trailingZeroBits()
|
||||
q := nat(nil).shr(nm1, k)
|
||||
q := nat(nil).rsh(nm1, k)
|
||||
|
||||
nm3 := nat(nil).sub(nm1, natTwo)
|
||||
rand := rand.New(rand.NewSource(int64(n[0])))
|
||||
@ -217,7 +217,7 @@ func (n nat) probablyPrimeLucas(stk *stack) bool {
|
||||
// Arrange s = (n - Jacobi(Δ, n)) / 2^r = (n+1) / 2^r.
|
||||
s := nat(nil).add(n, natOne)
|
||||
r := int(s.trailingZeroBits())
|
||||
s = s.shr(s, uint(r))
|
||||
s = s.rsh(s, uint(r))
|
||||
nm2 := nat(nil).sub(n, natTwo) // n-2
|
||||
|
||||
// We apply the "almost extra strong" test, which checks the above conditions
|
||||
@ -288,7 +288,7 @@ func (n nat) probablyPrimeLucas(stk *stack) bool {
|
||||
// Since we are checking for U(k) == 0 it suffices to check 2 V(k+1) == P V(k) mod n,
|
||||
// or P V(k) - 2 V(k+1) == 0 mod n.
|
||||
t1 := t1.mul(stk, vk, natP)
|
||||
t2 := t2.shl(vk1, 1)
|
||||
t2 := t2.lsh(vk1, 1)
|
||||
if t1.cmp(t2) < 0 {
|
||||
t1, t2 = t2, t1
|
||||
}
|
||||
|
@ -112,9 +112,9 @@ func quotToFloat32(stk *stack, a, b nat) (f float32, exact bool) {
|
||||
a2 = a2.set(a)
|
||||
b2 = b2.set(b)
|
||||
if shift := Msize2 - exp; shift > 0 {
|
||||
a2 = a2.shl(a2, uint(shift))
|
||||
a2 = a2.lsh(a2, uint(shift))
|
||||
} else if shift < 0 {
|
||||
b2 = b2.shl(b2, uint(-shift))
|
||||
b2 = b2.lsh(b2, uint(-shift))
|
||||
}
|
||||
|
||||
// 2. Compute quotient and remainder (q, r). NB: due to the
|
||||
@ -210,9 +210,9 @@ func quotToFloat64(stk *stack, a, b nat) (f float64, exact bool) {
|
||||
a2 = a2.set(a)
|
||||
b2 = b2.set(b)
|
||||
if shift := Msize2 - exp; shift > 0 {
|
||||
a2 = a2.shl(a2, uint(shift))
|
||||
a2 = a2.lsh(a2, uint(shift))
|
||||
} else if shift < 0 {
|
||||
b2 = b2.shl(b2, uint(-shift))
|
||||
b2 = b2.lsh(b2, uint(-shift))
|
||||
}
|
||||
|
||||
// 2. Compute quotient and remainder (q, r). NB: due to the
|
||||
|
@ -197,9 +197,9 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
|
||||
return nil, false // avoid excessively large exponents
|
||||
}
|
||||
if exp2 > 0 {
|
||||
z.a.abs = z.a.abs.shl(z.a.abs, uint(exp2))
|
||||
z.a.abs = z.a.abs.lsh(z.a.abs, uint(exp2))
|
||||
} else if exp2 < 0 {
|
||||
z.b.abs = z.b.abs.shl(z.b.abs, uint(-exp2))
|
||||
z.b.abs = z.b.abs.lsh(z.b.abs, uint(-exp2))
|
||||
}
|
||||
|
||||
z.a.neg = neg && len(z.a.abs) > 0 // 0 has no sign
|
||||
@ -421,7 +421,7 @@ func (x *Rat) FloatPrec() (n int, exact bool) {
|
||||
// Do this first to reduce q as much as possible.
|
||||
var q nat
|
||||
p2 := d.trailingZeroBits()
|
||||
q = q.shr(d, p2)
|
||||
q = q.rsh(d, p2)
|
||||
|
||||
// Determine p5 by counting factors of 5.
|
||||
// Build a table starting with an initial power of 5,
|
||||
|
Loading…
x
Reference in New Issue
Block a user