mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
math/big: fix incorrect register allocation for mipsx/mips64x
According to the MIPS ABI, R26/R27 are reserved for OS kernel, and may be clobbered by it. They must not be used by user mode. See Figure 3-18 of MIPS ELF ABI specification: https://refspecs.linuxfoundation.org/elf/mipsabi.pdf Fixes #73472 Change-Id: Ifda692a803176bfaab2c70d6623636c5d135f42e Reviewed-on: https://go-review.googlesource.com/c/go/+/667816 Reviewed-by: Alan Donovan <adonovan@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
b48e52b428
commit
6109185cf9
@ -17,18 +17,18 @@ TEXT ·addVV(SB), NOSPLIT, $0
|
|||||||
// compute unrolled loop lengths
|
// compute unrolled loop lengths
|
||||||
AND $3, R1, R5
|
AND $3, R1, R5
|
||||||
SRLV $2, R1
|
SRLV $2, R1
|
||||||
XOR R26, R26 // clear carry
|
XOR R24, R24 // clear carry
|
||||||
loop1:
|
loop1:
|
||||||
BEQ R5, loop1done
|
BEQ R5, loop1done
|
||||||
loop1cont:
|
loop1cont:
|
||||||
// unroll 1X
|
// unroll 1X
|
||||||
MOVV 0(R2), R6
|
MOVV 0(R2), R6
|
||||||
MOVV 0(R3), R7
|
MOVV 0(R3), R7
|
||||||
ADDVU R7, R6 // ADCS R7, R6, R6 (cr=R26)
|
ADDVU R7, R6 // ADCS R7, R6, R6 (cr=R24)
|
||||||
SGTU R7, R6, R23 // ...
|
SGTU R7, R6, R23 // ...
|
||||||
ADDVU R26, R6 // ...
|
ADDVU R24, R6 // ...
|
||||||
SGTU R26, R6, R26 // ...
|
SGTU R24, R6, R24 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
MOVV R6, 0(R4)
|
MOVV R6, 0(R4)
|
||||||
ADDVU $8, R2
|
ADDVU $8, R2
|
||||||
ADDVU $8, R3
|
ADDVU $8, R3
|
||||||
@ -48,26 +48,26 @@ loop4cont:
|
|||||||
MOVV 8(R3), R10
|
MOVV 8(R3), R10
|
||||||
MOVV 16(R3), R11
|
MOVV 16(R3), R11
|
||||||
MOVV 24(R3), R12
|
MOVV 24(R3), R12
|
||||||
ADDVU R9, R5 // ADCS R9, R5, R5 (cr=R26)
|
ADDVU R9, R5 // ADCS R9, R5, R5 (cr=R24)
|
||||||
SGTU R9, R5, R23 // ...
|
SGTU R9, R5, R23 // ...
|
||||||
ADDVU R26, R5 // ...
|
ADDVU R24, R5 // ...
|
||||||
SGTU R26, R5, R26 // ...
|
SGTU R24, R5, R24 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
ADDVU R10, R6 // ADCS R10, R6, R6 (cr=R26)
|
ADDVU R10, R6 // ADCS R10, R6, R6 (cr=R24)
|
||||||
SGTU R10, R6, R23 // ...
|
SGTU R10, R6, R23 // ...
|
||||||
ADDVU R26, R6 // ...
|
ADDVU R24, R6 // ...
|
||||||
SGTU R26, R6, R26 // ...
|
SGTU R24, R6, R24 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
ADDVU R11, R7 // ADCS R11, R7, R7 (cr=R26)
|
ADDVU R11, R7 // ADCS R11, R7, R7 (cr=R24)
|
||||||
SGTU R11, R7, R23 // ...
|
SGTU R11, R7, R23 // ...
|
||||||
ADDVU R26, R7 // ...
|
ADDVU R24, R7 // ...
|
||||||
SGTU R26, R7, R26 // ...
|
SGTU R24, R7, R24 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R26)
|
ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R24)
|
||||||
SGTU R12, R8, R23 // ...
|
SGTU R12, R8, R23 // ...
|
||||||
ADDVU R26, R8 // ...
|
ADDVU R24, R8 // ...
|
||||||
SGTU R26, R8, R26 // ...
|
SGTU R24, R8, R24 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
MOVV R5, 0(R4)
|
MOVV R5, 0(R4)
|
||||||
MOVV R6, 8(R4)
|
MOVV R6, 8(R4)
|
||||||
MOVV R7, 16(R4)
|
MOVV R7, 16(R4)
|
||||||
@ -78,7 +78,7 @@ loop4cont:
|
|||||||
SUBVU $1, R1
|
SUBVU $1, R1
|
||||||
BNE R1, loop4cont
|
BNE R1, loop4cont
|
||||||
loop4done:
|
loop4done:
|
||||||
MOVV R26, c+72(FP)
|
MOVV R24, c+72(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func subVV(z, x, y []Word) (c Word)
|
// func subVV(z, x, y []Word) (c Word)
|
||||||
@ -90,18 +90,18 @@ TEXT ·subVV(SB), NOSPLIT, $0
|
|||||||
// compute unrolled loop lengths
|
// compute unrolled loop lengths
|
||||||
AND $3, R1, R5
|
AND $3, R1, R5
|
||||||
SRLV $2, R1
|
SRLV $2, R1
|
||||||
XOR R26, R26 // clear carry
|
XOR R24, R24 // clear carry
|
||||||
loop1:
|
loop1:
|
||||||
BEQ R5, loop1done
|
BEQ R5, loop1done
|
||||||
loop1cont:
|
loop1cont:
|
||||||
// unroll 1X
|
// unroll 1X
|
||||||
MOVV 0(R2), R6
|
MOVV 0(R2), R6
|
||||||
MOVV 0(R3), R7
|
MOVV 0(R3), R7
|
||||||
SGTU R26, R6, R23 // SBCS R7, R6, R6
|
SGTU R24, R6, R23 // SBCS R7, R6, R6
|
||||||
SUBVU R26, R6 // ...
|
SUBVU R24, R6 // ...
|
||||||
SGTU R7, R6, R26 // ...
|
SGTU R7, R6, R24 // ...
|
||||||
SUBVU R7, R6 // ...
|
SUBVU R7, R6 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
MOVV R6, 0(R4)
|
MOVV R6, 0(R4)
|
||||||
ADDVU $8, R2
|
ADDVU $8, R2
|
||||||
ADDVU $8, R3
|
ADDVU $8, R3
|
||||||
@ -121,26 +121,26 @@ loop4cont:
|
|||||||
MOVV 8(R3), R10
|
MOVV 8(R3), R10
|
||||||
MOVV 16(R3), R11
|
MOVV 16(R3), R11
|
||||||
MOVV 24(R3), R12
|
MOVV 24(R3), R12
|
||||||
SGTU R26, R5, R23 // SBCS R9, R5, R5
|
SGTU R24, R5, R23 // SBCS R9, R5, R5
|
||||||
SUBVU R26, R5 // ...
|
SUBVU R24, R5 // ...
|
||||||
SGTU R9, R5, R26 // ...
|
SGTU R9, R5, R24 // ...
|
||||||
SUBVU R9, R5 // ...
|
SUBVU R9, R5 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
SGTU R26, R6, R23 // SBCS R10, R6, R6
|
SGTU R24, R6, R23 // SBCS R10, R6, R6
|
||||||
SUBVU R26, R6 // ...
|
SUBVU R24, R6 // ...
|
||||||
SGTU R10, R6, R26 // ...
|
SGTU R10, R6, R24 // ...
|
||||||
SUBVU R10, R6 // ...
|
SUBVU R10, R6 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
SGTU R26, R7, R23 // SBCS R11, R7, R7
|
SGTU R24, R7, R23 // SBCS R11, R7, R7
|
||||||
SUBVU R26, R7 // ...
|
SUBVU R24, R7 // ...
|
||||||
SGTU R11, R7, R26 // ...
|
SGTU R11, R7, R24 // ...
|
||||||
SUBVU R11, R7 // ...
|
SUBVU R11, R7 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
SGTU R26, R8, R23 // SBCS R12, R8, R8
|
SGTU R24, R8, R23 // SBCS R12, R8, R8
|
||||||
SUBVU R26, R8 // ...
|
SUBVU R24, R8 // ...
|
||||||
SGTU R12, R8, R26 // ...
|
SGTU R12, R8, R24 // ...
|
||||||
SUBVU R12, R8 // ...
|
SUBVU R12, R8 // ...
|
||||||
ADDVU R23, R26 // ...
|
ADDVU R23, R24 // ...
|
||||||
MOVV R5, 0(R4)
|
MOVV R5, 0(R4)
|
||||||
MOVV R6, 8(R4)
|
MOVV R6, 8(R4)
|
||||||
MOVV R7, 16(R4)
|
MOVV R7, 16(R4)
|
||||||
@ -151,7 +151,7 @@ loop4cont:
|
|||||||
SUBVU $1, R1
|
SUBVU $1, R1
|
||||||
BNE R1, loop4cont
|
BNE R1, loop4cont
|
||||||
loop4done:
|
loop4done:
|
||||||
MOVV R26, c+72(FP)
|
MOVV R24, c+72(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func lshVU(z, x []Word, s uint) (c Word)
|
// func lshVU(z, x []Word, s uint) (c Word)
|
||||||
@ -316,9 +316,9 @@ loop1cont:
|
|||||||
MULVU R1, R7
|
MULVU R1, R7
|
||||||
MOVV LO, R8
|
MOVV LO, R8
|
||||||
MOVV HI, R9
|
MOVV HI, R9
|
||||||
ADDVU R2, R8, R7 // ADDS R2, R8, R7 (cr=R26)
|
ADDVU R2, R8, R7 // ADDS R2, R8, R7 (cr=R24)
|
||||||
SGTU R2, R7, R26 // ...
|
SGTU R2, R7, R24 // ...
|
||||||
ADDVU R26, R9, R2 // ADC $0, R9, R2
|
ADDVU R24, R9, R2 // ADC $0, R9, R2
|
||||||
MOVV R7, 0(R5)
|
MOVV R7, 0(R5)
|
||||||
ADDVU $8, R4
|
ADDVU $8, R4
|
||||||
ADDVU $8, R5
|
ADDVU $8, R5
|
||||||
@ -337,27 +337,27 @@ loop4cont:
|
|||||||
MULVU R1, R6
|
MULVU R1, R6
|
||||||
MOVV LO, R10
|
MOVV LO, R10
|
||||||
MOVV HI, R11
|
MOVV HI, R11
|
||||||
ADDVU R2, R10, R6 // ADDS R2, R10, R6 (cr=R26)
|
ADDVU R2, R10, R6 // ADDS R2, R10, R6 (cr=R24)
|
||||||
SGTU R2, R6, R26 // ...
|
SGTU R2, R6, R24 // ...
|
||||||
ADDVU R26, R11, R2 // ADC $0, R11, R2
|
ADDVU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MULVU R1, R7
|
MULVU R1, R7
|
||||||
MOVV LO, R10
|
MOVV LO, R10
|
||||||
MOVV HI, R11
|
MOVV HI, R11
|
||||||
ADDVU R2, R10, R7 // ADDS R2, R10, R7 (cr=R26)
|
ADDVU R2, R10, R7 // ADDS R2, R10, R7 (cr=R24)
|
||||||
SGTU R2, R7, R26 // ...
|
SGTU R2, R7, R24 // ...
|
||||||
ADDVU R26, R11, R2 // ADC $0, R11, R2
|
ADDVU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MULVU R1, R8
|
MULVU R1, R8
|
||||||
MOVV LO, R10
|
MOVV LO, R10
|
||||||
MOVV HI, R11
|
MOVV HI, R11
|
||||||
ADDVU R2, R10, R8 // ADDS R2, R10, R8 (cr=R26)
|
ADDVU R2, R10, R8 // ADDS R2, R10, R8 (cr=R24)
|
||||||
SGTU R2, R8, R26 // ...
|
SGTU R2, R8, R24 // ...
|
||||||
ADDVU R26, R11, R2 // ADC $0, R11, R2
|
ADDVU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MULVU R1, R9
|
MULVU R1, R9
|
||||||
MOVV LO, R10
|
MOVV LO, R10
|
||||||
MOVV HI, R11
|
MOVV HI, R11
|
||||||
ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R26)
|
ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24)
|
||||||
SGTU R2, R9, R26 // ...
|
SGTU R2, R9, R24 // ...
|
||||||
ADDVU R26, R11, R2 // ADC $0, R11, R2
|
ADDVU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MOVV R6, 0(R5)
|
MOVV R6, 0(R5)
|
||||||
MOVV R7, 8(R5)
|
MOVV R7, 8(R5)
|
||||||
MOVV R8, 16(R5)
|
MOVV R8, 16(R5)
|
||||||
@ -391,12 +391,12 @@ loop1cont:
|
|||||||
MULVU R1, R9
|
MULVU R1, R9
|
||||||
MOVV LO, R10
|
MOVV LO, R10
|
||||||
MOVV HI, R11
|
MOVV HI, R11
|
||||||
ADDVU R8, R10 // ADDS R8, R10, R10 (cr=R26)
|
ADDVU R8, R10 // ADDS R8, R10, R10 (cr=R24)
|
||||||
SGTU R8, R10, R26 // ...
|
SGTU R8, R10, R24 // ...
|
||||||
ADDVU R26, R11 // ADC $0, R11, R11
|
ADDVU R24, R11 // ADC $0, R11, R11
|
||||||
ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R26)
|
ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24)
|
||||||
SGTU R2, R9, R26 // ...
|
SGTU R2, R9, R24 // ...
|
||||||
ADDVU R26, R11, R2 // ADC $0, R11, R2
|
ADDVU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MOVV R9, 0(R6)
|
MOVV R9, 0(R6)
|
||||||
ADDVU $8, R4
|
ADDVU $8, R4
|
||||||
ADDVU $8, R5
|
ADDVU $8, R5
|
||||||
@ -420,39 +420,39 @@ loop4cont:
|
|||||||
MULVU R1, R11
|
MULVU R1, R11
|
||||||
MOVV LO, R15
|
MOVV LO, R15
|
||||||
MOVV HI, R16
|
MOVV HI, R16
|
||||||
ADDVU R7, R15 // ADDS R7, R15, R15 (cr=R26)
|
ADDVU R7, R15 // ADDS R7, R15, R15 (cr=R24)
|
||||||
SGTU R7, R15, R26 // ...
|
SGTU R7, R15, R24 // ...
|
||||||
ADDVU R26, R16 // ADC $0, R16, R16
|
ADDVU R24, R16 // ADC $0, R16, R16
|
||||||
ADDVU R2, R15, R11 // ADDS R2, R15, R11 (cr=R26)
|
ADDVU R2, R15, R11 // ADDS R2, R15, R11 (cr=R24)
|
||||||
SGTU R2, R11, R26 // ...
|
SGTU R2, R11, R24 // ...
|
||||||
ADDVU R26, R16, R2 // ADC $0, R16, R2
|
ADDVU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MULVU R1, R12
|
MULVU R1, R12
|
||||||
MOVV LO, R15
|
MOVV LO, R15
|
||||||
MOVV HI, R16
|
MOVV HI, R16
|
||||||
ADDVU R8, R15 // ADDS R8, R15, R15 (cr=R26)
|
ADDVU R8, R15 // ADDS R8, R15, R15 (cr=R24)
|
||||||
SGTU R8, R15, R26 // ...
|
SGTU R8, R15, R24 // ...
|
||||||
ADDVU R26, R16 // ADC $0, R16, R16
|
ADDVU R24, R16 // ADC $0, R16, R16
|
||||||
ADDVU R2, R15, R12 // ADDS R2, R15, R12 (cr=R26)
|
ADDVU R2, R15, R12 // ADDS R2, R15, R12 (cr=R24)
|
||||||
SGTU R2, R12, R26 // ...
|
SGTU R2, R12, R24 // ...
|
||||||
ADDVU R26, R16, R2 // ADC $0, R16, R2
|
ADDVU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MULVU R1, R13
|
MULVU R1, R13
|
||||||
MOVV LO, R15
|
MOVV LO, R15
|
||||||
MOVV HI, R16
|
MOVV HI, R16
|
||||||
ADDVU R9, R15 // ADDS R9, R15, R15 (cr=R26)
|
ADDVU R9, R15 // ADDS R9, R15, R15 (cr=R24)
|
||||||
SGTU R9, R15, R26 // ...
|
SGTU R9, R15, R24 // ...
|
||||||
ADDVU R26, R16 // ADC $0, R16, R16
|
ADDVU R24, R16 // ADC $0, R16, R16
|
||||||
ADDVU R2, R15, R13 // ADDS R2, R15, R13 (cr=R26)
|
ADDVU R2, R15, R13 // ADDS R2, R15, R13 (cr=R24)
|
||||||
SGTU R2, R13, R26 // ...
|
SGTU R2, R13, R24 // ...
|
||||||
ADDVU R26, R16, R2 // ADC $0, R16, R2
|
ADDVU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MULVU R1, R14
|
MULVU R1, R14
|
||||||
MOVV LO, R15
|
MOVV LO, R15
|
||||||
MOVV HI, R16
|
MOVV HI, R16
|
||||||
ADDVU R10, R15 // ADDS R10, R15, R15 (cr=R26)
|
ADDVU R10, R15 // ADDS R10, R15, R15 (cr=R24)
|
||||||
SGTU R10, R15, R26 // ...
|
SGTU R10, R15, R24 // ...
|
||||||
ADDVU R26, R16 // ADC $0, R16, R16
|
ADDVU R24, R16 // ADC $0, R16, R16
|
||||||
ADDVU R2, R15, R14 // ADDS R2, R15, R14 (cr=R26)
|
ADDVU R2, R15, R14 // ADDS R2, R15, R14 (cr=R24)
|
||||||
SGTU R2, R14, R26 // ...
|
SGTU R2, R14, R24 // ...
|
||||||
ADDVU R26, R16, R2 // ADC $0, R16, R2
|
ADDVU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MOVV R11, 0(R6)
|
MOVV R11, 0(R6)
|
||||||
MOVV R12, 8(R6)
|
MOVV R12, 8(R6)
|
||||||
MOVV R13, 16(R6)
|
MOVV R13, 16(R6)
|
||||||
|
@ -17,18 +17,18 @@ TEXT ·addVV(SB), NOSPLIT, $0
|
|||||||
// compute unrolled loop lengths
|
// compute unrolled loop lengths
|
||||||
AND $3, R1, R5
|
AND $3, R1, R5
|
||||||
SRL $2, R1
|
SRL $2, R1
|
||||||
XOR R26, R26 // clear carry
|
XOR R24, R24 // clear carry
|
||||||
loop1:
|
loop1:
|
||||||
BEQ R5, loop1done
|
BEQ R5, loop1done
|
||||||
loop1cont:
|
loop1cont:
|
||||||
// unroll 1X
|
// unroll 1X
|
||||||
MOVW 0(R2), R6
|
MOVW 0(R2), R6
|
||||||
MOVW 0(R3), R7
|
MOVW 0(R3), R7
|
||||||
ADDU R7, R6 // ADCS R7, R6, R6 (cr=R26)
|
ADDU R7, R6 // ADCS R7, R6, R6 (cr=R24)
|
||||||
SGTU R7, R6, R23 // ...
|
SGTU R7, R6, R23 // ...
|
||||||
ADDU R26, R6 // ...
|
ADDU R24, R6 // ...
|
||||||
SGTU R26, R6, R26 // ...
|
SGTU R24, R6, R24 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
MOVW R6, 0(R4)
|
MOVW R6, 0(R4)
|
||||||
ADDU $4, R2
|
ADDU $4, R2
|
||||||
ADDU $4, R3
|
ADDU $4, R3
|
||||||
@ -48,26 +48,26 @@ loop4cont:
|
|||||||
MOVW 4(R3), R10
|
MOVW 4(R3), R10
|
||||||
MOVW 8(R3), R11
|
MOVW 8(R3), R11
|
||||||
MOVW 12(R3), R12
|
MOVW 12(R3), R12
|
||||||
ADDU R9, R5 // ADCS R9, R5, R5 (cr=R26)
|
ADDU R9, R5 // ADCS R9, R5, R5 (cr=R24)
|
||||||
SGTU R9, R5, R23 // ...
|
SGTU R9, R5, R23 // ...
|
||||||
ADDU R26, R5 // ...
|
ADDU R24, R5 // ...
|
||||||
SGTU R26, R5, R26 // ...
|
SGTU R24, R5, R24 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
ADDU R10, R6 // ADCS R10, R6, R6 (cr=R26)
|
ADDU R10, R6 // ADCS R10, R6, R6 (cr=R24)
|
||||||
SGTU R10, R6, R23 // ...
|
SGTU R10, R6, R23 // ...
|
||||||
ADDU R26, R6 // ...
|
ADDU R24, R6 // ...
|
||||||
SGTU R26, R6, R26 // ...
|
SGTU R24, R6, R24 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
ADDU R11, R7 // ADCS R11, R7, R7 (cr=R26)
|
ADDU R11, R7 // ADCS R11, R7, R7 (cr=R24)
|
||||||
SGTU R11, R7, R23 // ...
|
SGTU R11, R7, R23 // ...
|
||||||
ADDU R26, R7 // ...
|
ADDU R24, R7 // ...
|
||||||
SGTU R26, R7, R26 // ...
|
SGTU R24, R7, R24 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
ADDU R12, R8 // ADCS R12, R8, R8 (cr=R26)
|
ADDU R12, R8 // ADCS R12, R8, R8 (cr=R24)
|
||||||
SGTU R12, R8, R23 // ...
|
SGTU R12, R8, R23 // ...
|
||||||
ADDU R26, R8 // ...
|
ADDU R24, R8 // ...
|
||||||
SGTU R26, R8, R26 // ...
|
SGTU R24, R8, R24 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
MOVW R5, 0(R4)
|
MOVW R5, 0(R4)
|
||||||
MOVW R6, 4(R4)
|
MOVW R6, 4(R4)
|
||||||
MOVW R7, 8(R4)
|
MOVW R7, 8(R4)
|
||||||
@ -78,7 +78,7 @@ loop4cont:
|
|||||||
SUBU $1, R1
|
SUBU $1, R1
|
||||||
BNE R1, loop4cont
|
BNE R1, loop4cont
|
||||||
loop4done:
|
loop4done:
|
||||||
MOVW R26, c+36(FP)
|
MOVW R24, c+36(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func subVV(z, x, y []Word) (c Word)
|
// func subVV(z, x, y []Word) (c Word)
|
||||||
@ -90,18 +90,18 @@ TEXT ·subVV(SB), NOSPLIT, $0
|
|||||||
// compute unrolled loop lengths
|
// compute unrolled loop lengths
|
||||||
AND $3, R1, R5
|
AND $3, R1, R5
|
||||||
SRL $2, R1
|
SRL $2, R1
|
||||||
XOR R26, R26 // clear carry
|
XOR R24, R24 // clear carry
|
||||||
loop1:
|
loop1:
|
||||||
BEQ R5, loop1done
|
BEQ R5, loop1done
|
||||||
loop1cont:
|
loop1cont:
|
||||||
// unroll 1X
|
// unroll 1X
|
||||||
MOVW 0(R2), R6
|
MOVW 0(R2), R6
|
||||||
MOVW 0(R3), R7
|
MOVW 0(R3), R7
|
||||||
SGTU R26, R6, R23 // SBCS R7, R6, R6
|
SGTU R24, R6, R23 // SBCS R7, R6, R6
|
||||||
SUBU R26, R6 // ...
|
SUBU R24, R6 // ...
|
||||||
SGTU R7, R6, R26 // ...
|
SGTU R7, R6, R24 // ...
|
||||||
SUBU R7, R6 // ...
|
SUBU R7, R6 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
MOVW R6, 0(R4)
|
MOVW R6, 0(R4)
|
||||||
ADDU $4, R2
|
ADDU $4, R2
|
||||||
ADDU $4, R3
|
ADDU $4, R3
|
||||||
@ -121,26 +121,26 @@ loop4cont:
|
|||||||
MOVW 4(R3), R10
|
MOVW 4(R3), R10
|
||||||
MOVW 8(R3), R11
|
MOVW 8(R3), R11
|
||||||
MOVW 12(R3), R12
|
MOVW 12(R3), R12
|
||||||
SGTU R26, R5, R23 // SBCS R9, R5, R5
|
SGTU R24, R5, R23 // SBCS R9, R5, R5
|
||||||
SUBU R26, R5 // ...
|
SUBU R24, R5 // ...
|
||||||
SGTU R9, R5, R26 // ...
|
SGTU R9, R5, R24 // ...
|
||||||
SUBU R9, R5 // ...
|
SUBU R9, R5 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
SGTU R26, R6, R23 // SBCS R10, R6, R6
|
SGTU R24, R6, R23 // SBCS R10, R6, R6
|
||||||
SUBU R26, R6 // ...
|
SUBU R24, R6 // ...
|
||||||
SGTU R10, R6, R26 // ...
|
SGTU R10, R6, R24 // ...
|
||||||
SUBU R10, R6 // ...
|
SUBU R10, R6 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
SGTU R26, R7, R23 // SBCS R11, R7, R7
|
SGTU R24, R7, R23 // SBCS R11, R7, R7
|
||||||
SUBU R26, R7 // ...
|
SUBU R24, R7 // ...
|
||||||
SGTU R11, R7, R26 // ...
|
SGTU R11, R7, R24 // ...
|
||||||
SUBU R11, R7 // ...
|
SUBU R11, R7 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
SGTU R26, R8, R23 // SBCS R12, R8, R8
|
SGTU R24, R8, R23 // SBCS R12, R8, R8
|
||||||
SUBU R26, R8 // ...
|
SUBU R24, R8 // ...
|
||||||
SGTU R12, R8, R26 // ...
|
SGTU R12, R8, R24 // ...
|
||||||
SUBU R12, R8 // ...
|
SUBU R12, R8 // ...
|
||||||
ADDU R23, R26 // ...
|
ADDU R23, R24 // ...
|
||||||
MOVW R5, 0(R4)
|
MOVW R5, 0(R4)
|
||||||
MOVW R6, 4(R4)
|
MOVW R6, 4(R4)
|
||||||
MOVW R7, 8(R4)
|
MOVW R7, 8(R4)
|
||||||
@ -151,7 +151,7 @@ loop4cont:
|
|||||||
SUBU $1, R1
|
SUBU $1, R1
|
||||||
BNE R1, loop4cont
|
BNE R1, loop4cont
|
||||||
loop4done:
|
loop4done:
|
||||||
MOVW R26, c+36(FP)
|
MOVW R24, c+36(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func lshVU(z, x []Word, s uint) (c Word)
|
// func lshVU(z, x []Word, s uint) (c Word)
|
||||||
@ -316,9 +316,9 @@ loop1cont:
|
|||||||
MULU R1, R7
|
MULU R1, R7
|
||||||
MOVW LO, R8
|
MOVW LO, R8
|
||||||
MOVW HI, R9
|
MOVW HI, R9
|
||||||
ADDU R2, R8, R7 // ADDS R2, R8, R7 (cr=R26)
|
ADDU R2, R8, R7 // ADDS R2, R8, R7 (cr=R24)
|
||||||
SGTU R2, R7, R26 // ...
|
SGTU R2, R7, R24 // ...
|
||||||
ADDU R26, R9, R2 // ADC $0, R9, R2
|
ADDU R24, R9, R2 // ADC $0, R9, R2
|
||||||
MOVW R7, 0(R5)
|
MOVW R7, 0(R5)
|
||||||
ADDU $4, R4
|
ADDU $4, R4
|
||||||
ADDU $4, R5
|
ADDU $4, R5
|
||||||
@ -337,27 +337,27 @@ loop4cont:
|
|||||||
MULU R1, R6
|
MULU R1, R6
|
||||||
MOVW LO, R10
|
MOVW LO, R10
|
||||||
MOVW HI, R11
|
MOVW HI, R11
|
||||||
ADDU R2, R10, R6 // ADDS R2, R10, R6 (cr=R26)
|
ADDU R2, R10, R6 // ADDS R2, R10, R6 (cr=R24)
|
||||||
SGTU R2, R6, R26 // ...
|
SGTU R2, R6, R24 // ...
|
||||||
ADDU R26, R11, R2 // ADC $0, R11, R2
|
ADDU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MULU R1, R7
|
MULU R1, R7
|
||||||
MOVW LO, R10
|
MOVW LO, R10
|
||||||
MOVW HI, R11
|
MOVW HI, R11
|
||||||
ADDU R2, R10, R7 // ADDS R2, R10, R7 (cr=R26)
|
ADDU R2, R10, R7 // ADDS R2, R10, R7 (cr=R24)
|
||||||
SGTU R2, R7, R26 // ...
|
SGTU R2, R7, R24 // ...
|
||||||
ADDU R26, R11, R2 // ADC $0, R11, R2
|
ADDU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MULU R1, R8
|
MULU R1, R8
|
||||||
MOVW LO, R10
|
MOVW LO, R10
|
||||||
MOVW HI, R11
|
MOVW HI, R11
|
||||||
ADDU R2, R10, R8 // ADDS R2, R10, R8 (cr=R26)
|
ADDU R2, R10, R8 // ADDS R2, R10, R8 (cr=R24)
|
||||||
SGTU R2, R8, R26 // ...
|
SGTU R2, R8, R24 // ...
|
||||||
ADDU R26, R11, R2 // ADC $0, R11, R2
|
ADDU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MULU R1, R9
|
MULU R1, R9
|
||||||
MOVW LO, R10
|
MOVW LO, R10
|
||||||
MOVW HI, R11
|
MOVW HI, R11
|
||||||
ADDU R2, R10, R9 // ADDS R2, R10, R9 (cr=R26)
|
ADDU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24)
|
||||||
SGTU R2, R9, R26 // ...
|
SGTU R2, R9, R24 // ...
|
||||||
ADDU R26, R11, R2 // ADC $0, R11, R2
|
ADDU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MOVW R6, 0(R5)
|
MOVW R6, 0(R5)
|
||||||
MOVW R7, 4(R5)
|
MOVW R7, 4(R5)
|
||||||
MOVW R8, 8(R5)
|
MOVW R8, 8(R5)
|
||||||
@ -391,12 +391,12 @@ loop1cont:
|
|||||||
MULU R1, R9
|
MULU R1, R9
|
||||||
MOVW LO, R10
|
MOVW LO, R10
|
||||||
MOVW HI, R11
|
MOVW HI, R11
|
||||||
ADDU R8, R10 // ADDS R8, R10, R10 (cr=R26)
|
ADDU R8, R10 // ADDS R8, R10, R10 (cr=R24)
|
||||||
SGTU R8, R10, R26 // ...
|
SGTU R8, R10, R24 // ...
|
||||||
ADDU R26, R11 // ADC $0, R11, R11
|
ADDU R24, R11 // ADC $0, R11, R11
|
||||||
ADDU R2, R10, R9 // ADDS R2, R10, R9 (cr=R26)
|
ADDU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24)
|
||||||
SGTU R2, R9, R26 // ...
|
SGTU R2, R9, R24 // ...
|
||||||
ADDU R26, R11, R2 // ADC $0, R11, R2
|
ADDU R24, R11, R2 // ADC $0, R11, R2
|
||||||
MOVW R9, 0(R6)
|
MOVW R9, 0(R6)
|
||||||
ADDU $4, R4
|
ADDU $4, R4
|
||||||
ADDU $4, R5
|
ADDU $4, R5
|
||||||
@ -420,39 +420,39 @@ loop4cont:
|
|||||||
MULU R1, R11
|
MULU R1, R11
|
||||||
MOVW LO, R15
|
MOVW LO, R15
|
||||||
MOVW HI, R16
|
MOVW HI, R16
|
||||||
ADDU R7, R15 // ADDS R7, R15, R15 (cr=R26)
|
ADDU R7, R15 // ADDS R7, R15, R15 (cr=R24)
|
||||||
SGTU R7, R15, R26 // ...
|
SGTU R7, R15, R24 // ...
|
||||||
ADDU R26, R16 // ADC $0, R16, R16
|
ADDU R24, R16 // ADC $0, R16, R16
|
||||||
ADDU R2, R15, R11 // ADDS R2, R15, R11 (cr=R26)
|
ADDU R2, R15, R11 // ADDS R2, R15, R11 (cr=R24)
|
||||||
SGTU R2, R11, R26 // ...
|
SGTU R2, R11, R24 // ...
|
||||||
ADDU R26, R16, R2 // ADC $0, R16, R2
|
ADDU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MULU R1, R12
|
MULU R1, R12
|
||||||
MOVW LO, R15
|
MOVW LO, R15
|
||||||
MOVW HI, R16
|
MOVW HI, R16
|
||||||
ADDU R8, R15 // ADDS R8, R15, R15 (cr=R26)
|
ADDU R8, R15 // ADDS R8, R15, R15 (cr=R24)
|
||||||
SGTU R8, R15, R26 // ...
|
SGTU R8, R15, R24 // ...
|
||||||
ADDU R26, R16 // ADC $0, R16, R16
|
ADDU R24, R16 // ADC $0, R16, R16
|
||||||
ADDU R2, R15, R12 // ADDS R2, R15, R12 (cr=R26)
|
ADDU R2, R15, R12 // ADDS R2, R15, R12 (cr=R24)
|
||||||
SGTU R2, R12, R26 // ...
|
SGTU R2, R12, R24 // ...
|
||||||
ADDU R26, R16, R2 // ADC $0, R16, R2
|
ADDU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MULU R1, R13
|
MULU R1, R13
|
||||||
MOVW LO, R15
|
MOVW LO, R15
|
||||||
MOVW HI, R16
|
MOVW HI, R16
|
||||||
ADDU R9, R15 // ADDS R9, R15, R15 (cr=R26)
|
ADDU R9, R15 // ADDS R9, R15, R15 (cr=R24)
|
||||||
SGTU R9, R15, R26 // ...
|
SGTU R9, R15, R24 // ...
|
||||||
ADDU R26, R16 // ADC $0, R16, R16
|
ADDU R24, R16 // ADC $0, R16, R16
|
||||||
ADDU R2, R15, R13 // ADDS R2, R15, R13 (cr=R26)
|
ADDU R2, R15, R13 // ADDS R2, R15, R13 (cr=R24)
|
||||||
SGTU R2, R13, R26 // ...
|
SGTU R2, R13, R24 // ...
|
||||||
ADDU R26, R16, R2 // ADC $0, R16, R2
|
ADDU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MULU R1, R14
|
MULU R1, R14
|
||||||
MOVW LO, R15
|
MOVW LO, R15
|
||||||
MOVW HI, R16
|
MOVW HI, R16
|
||||||
ADDU R10, R15 // ADDS R10, R15, R15 (cr=R26)
|
ADDU R10, R15 // ADDS R10, R15, R15 (cr=R24)
|
||||||
SGTU R10, R15, R26 // ...
|
SGTU R10, R15, R24 // ...
|
||||||
ADDU R26, R16 // ADC $0, R16, R16
|
ADDU R24, R16 // ADC $0, R16, R16
|
||||||
ADDU R2, R15, R14 // ADDS R2, R15, R14 (cr=R26)
|
ADDU R2, R15, R14 // ADDS R2, R15, R14 (cr=R24)
|
||||||
SGTU R2, R14, R26 // ...
|
SGTU R2, R14, R24 // ...
|
||||||
ADDU R26, R16, R2 // ADC $0, R16, R2
|
ADDU R24, R16, R2 // ADC $0, R16, R2
|
||||||
MOVW R11, 0(R6)
|
MOVW R11, 0(R6)
|
||||||
MOVW R12, 4(R6)
|
MOVW R12, 4(R6)
|
||||||
MOVW R13, 8(R6)
|
MOVW R13, 8(R6)
|
||||||
|
@ -14,19 +14,19 @@ var ArchMIPS = &Arch{
|
|||||||
regs: []string{
|
regs: []string{
|
||||||
// R0 is 0
|
// R0 is 0
|
||||||
// R23 is the assembler/linker temporary (which we use too).
|
// R23 is the assembler/linker temporary (which we use too).
|
||||||
// R26 and R27 are our virtual carry flags.
|
// R24 and R25 are our virtual carry flags.
|
||||||
// R28 is SB.
|
// R28 is SB.
|
||||||
// R29 is SP.
|
// R29 is SP.
|
||||||
// R30 is g.
|
// R30 is g.
|
||||||
// R31 is LR.
|
// R31 is LR.
|
||||||
"R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9",
|
"R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9",
|
||||||
"R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19",
|
"R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19",
|
||||||
"R20", "R21", "R22", "R24", "R25", "R26", "R27",
|
"R20", "R21", "R22", "R24", "R25",
|
||||||
},
|
},
|
||||||
reg0: "R0",
|
reg0: "R0",
|
||||||
regTmp: "R23",
|
regTmp: "R23",
|
||||||
regCarry: "R26",
|
regCarry: "R24",
|
||||||
regAltCarry: "R27",
|
regAltCarry: "R25",
|
||||||
|
|
||||||
mov: "MOVW",
|
mov: "MOVW",
|
||||||
add: "ADDU",
|
add: "ADDU",
|
||||||
|
@ -14,19 +14,19 @@ var ArchMIPS64x = &Arch{
|
|||||||
regs: []string{
|
regs: []string{
|
||||||
// R0 is 0
|
// R0 is 0
|
||||||
// R23 is the assembler/linker temporary (which we use too).
|
// R23 is the assembler/linker temporary (which we use too).
|
||||||
// R26 and R27 are our virtual carry flags.
|
// R24 and R25 are our virtual carry flags.
|
||||||
// R28 is SB.
|
// R28 is SB.
|
||||||
// R29 is SP.
|
// R29 is SP.
|
||||||
// R30 is g.
|
// R30 is g.
|
||||||
// R31 is LR.
|
// R31 is LR.
|
||||||
"R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9",
|
"R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9",
|
||||||
"R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19",
|
"R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19",
|
||||||
"R20", "R21", "R22", "R24", "R25", "R26", "R27",
|
"R20", "R21", "R22", "R24", "R25",
|
||||||
},
|
},
|
||||||
reg0: "R0",
|
reg0: "R0",
|
||||||
regTmp: "R23",
|
regTmp: "R23",
|
||||||
regCarry: "R26",
|
regCarry: "R24",
|
||||||
regAltCarry: "R27",
|
regAltCarry: "R25",
|
||||||
|
|
||||||
mov: "MOVV",
|
mov: "MOVV",
|
||||||
add: "ADDVU",
|
add: "ADDVU",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user