mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
cmd/internal/obj: continue to optimize ARM's constant pool
Both Keith's https://go-review.googlesource.com/c/41612/ and and Ben's https://go-review.googlesource.com/c/41679/ optimized ARM's constant pool. But neither was complete. First, BIC was forgotten. 1. "BIC $0xff00ff00, Reg" can be optimized to "BIC $0xff000000, Reg BIC $0x0000ff00, Reg" 2. "BIC $0xffff00ff, Reg" can be optimized to "AND $0x0000ff00, Reg" 3. "AND $0xffff00ff, Reg" can be optimized to "BIC $0x0000ff00, Reg" Second, break a non-ARMImmRot to the subtraction of two ARMImmRots was left as TODO. 1. "ADD $0x00fffff0, Reg" can be optimized to "ADD $0x01000000, Reg SUB $0x00000010, Reg" 2. "SUB $0x00fffff0, Reg" can be optimized to "SUB $0x01000000, Reg ADD $0x00000010, Reg" This patch fixes them and issue #19844. The go1 benchmark shows improvements. name old time/op new time/op delta BinaryTree17-4 41.4s ± 1% 41.7s ± 1% +0.54% (p=0.000 n=50+49) Fannkuch11-4 24.7s ± 1% 25.1s ± 0% +1.70% (p=0.000 n=50+49) FmtFprintfEmpty-4 853ns ± 1% 852ns ± 1% ~ (p=0.833 n=50+50) FmtFprintfString-4 1.33µs ± 1% 1.33µs ± 1% ~ (p=0.163 n=50+50) FmtFprintfInt-4 1.40µs ± 1% 1.40µs ± 0% ~ (p=0.293 n=50+35) FmtFprintfIntInt-4 2.09µs ± 1% 2.08µs ± 1% -0.39% (p=0.000 n=50+49) FmtFprintfPrefixedInt-4 2.43µs ± 1% 2.43µs ± 1% ~ (p=0.552 n=50+50) FmtFprintfFloat-4 4.57µs ± 1% 4.42µs ± 1% -3.18% (p=0.000 n=50+50) FmtManyArgs-4 8.62µs ± 1% 8.52µs ± 0% -1.08% (p=0.000 n=50+50) GobDecode-4 101ms ± 1% 101ms ± 2% +0.45% (p=0.001 n=49+49) GobEncode-4 90.7ms ± 1% 91.1ms ± 2% +0.51% (p=0.001 n=50+50) Gzip-4 4.23s ± 1% 4.21s ± 1% -0.62% (p=0.000 n=50+50) Gunzip-4 623ms ± 1% 619ms ± 0% -0.63% (p=0.000 n=50+42) HTTPClientServer-4 721µs ± 5% 683µs ± 3% -5.25% (p=0.000 n=50+47) JSONEncode-4 251ms ± 1% 253ms ± 1% +0.54% (p=0.000 n=49+50) JSONDecode-4 941ms ± 1% 944ms ± 1% +0.30% (p=0.001 n=49+50) Mandelbrot200-4 49.3ms ± 1% 49.3ms ± 0% ~ (p=0.918 n=50+48) GoParse-4 47.1ms ± 1% 47.2ms ± 1% +0.18% (p=0.025 n=50+50) RegexpMatchEasy0_32-4 1.23µs ± 1% 1.24µs ± 1% +0.30% (p=0.000 n=49+50) RegexpMatchEasy0_1K-4 7.74µs ± 7% 7.76µs ± 5% ~ (p=0.888 n=50+50) RegexpMatchEasy1_32-4 1.32µs ± 1% 1.32µs ± 1% +0.23% (p=0.003 n=50+50) RegexpMatchEasy1_1K-4 10.6µs ± 2% 10.5µs ± 3% -1.29% (p=0.000 n=49+50) RegexpMatchMedium_32-4 2.19µs ± 1% 2.10µs ± 1% -3.79% (p=0.000 n=49+49) RegexpMatchMedium_1K-4 544µs ± 0% 545µs ± 0% ~ (p=0.123 n=41+50) RegexpMatchHard_32-4 28.8µs ± 0% 28.8µs ± 1% ~ (p=0.580 n=46+50) RegexpMatchHard_1K-4 863µs ± 1% 865µs ± 1% +0.31% (p=0.027 n=47+50) Revcomp-4 82.2ms ± 2% 82.3ms ± 2% ~ (p=0.894 n=48+49) Template-4 1.06s ± 1% 1.04s ± 1% -1.18% (p=0.000 n=50+49) TimeParse-4 7.25µs ± 1% 7.35µs ± 0% +1.48% (p=0.000 n=50+50) TimeFormat-4 13.3µs ± 1% 13.2µs ± 1% -0.13% (p=0.007 n=50+50) [Geo mean] 736µs 733µs -0.37% name old speed new speed delta GobDecode-4 7.60MB/s ± 1% 7.56MB/s ± 2% -0.46% (p=0.001 n=49+49) GobEncode-4 8.47MB/s ± 1% 8.42MB/s ± 2% -0.50% (p=0.001 n=50+50) Gzip-4 4.58MB/s ± 1% 4.61MB/s ± 1% +0.59% (p=0.000 n=50+50) Gunzip-4 31.2MB/s ± 1% 31.4MB/s ± 0% +0.63% (p=0.000 n=50+42) JSONEncode-4 7.73MB/s ± 1% 7.69MB/s ± 1% -0.53% (p=0.000 n=49+50) JSONDecode-4 2.06MB/s ± 1% 2.06MB/s ± 1% ~ (p=0.052 n=44+50) GoParse-4 1.23MB/s ± 0% 1.23MB/s ± 2% ~ (p=0.526 n=26+50) RegexpMatchEasy0_32-4 25.9MB/s ± 1% 25.9MB/s ± 1% -0.30% (p=0.000 n=49+50) RegexpMatchEasy0_1K-4 132MB/s ± 7% 132MB/s ± 6% ~ (p=0.885 n=50+50) RegexpMatchEasy1_32-4 24.2MB/s ± 1% 24.1MB/s ± 1% -0.22% (p=0.003 n=50+50) RegexpMatchEasy1_1K-4 96.4MB/s ± 2% 97.8MB/s ± 3% +1.36% (p=0.000 n=50+50) RegexpMatchMedium_32-4 460kB/s ± 0% 476kB/s ± 1% +3.43% (p=0.000 n=49+50) RegexpMatchMedium_1K-4 1.88MB/s ± 0% 1.88MB/s ± 0% ~ (all equal) RegexpMatchHard_32-4 1.11MB/s ± 0% 1.11MB/s ± 1% +0.34% (p=0.000 n=45+50) RegexpMatchHard_1K-4 1.19MB/s ± 1% 1.18MB/s ± 1% -0.34% (p=0.033 n=50+50) Revcomp-4 30.9MB/s ± 2% 30.9MB/s ± 2% ~ (p=0.894 n=48+49) Template-4 1.84MB/s ± 1% 1.86MB/s ± 2% +1.19% (p=0.000 n=48+50) [Geo mean] 6.63MB/s 6.65MB/s +0.26% Fixes #19844. Change-Id: I5ad16cc0b29267bb4579aca3dcc10a0b8ade1aa4 Reviewed-on: https://go-review.googlesource.com/42430 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
19b05acd13
commit
6897030fe3
@ -798,6 +798,8 @@
|
|||||||
// generic constant folding
|
// generic constant folding
|
||||||
(ADDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (SUBconst [int64(int32(-c))] x)
|
(ADDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (SUBconst [int64(int32(-c))] x)
|
||||||
(SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (ADDconst [int64(int32(-c))] x)
|
(SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (ADDconst [int64(int32(-c))] x)
|
||||||
|
(ANDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) -> (BICconst [int64(^uint32(c))] x)
|
||||||
|
(BICconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) -> (ANDconst [int64(^uint32(c))] x)
|
||||||
(ADDconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c+d))])
|
(ADDconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c+d))])
|
||||||
(ADDconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(c+d))] x)
|
(ADDconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(c+d))] x)
|
||||||
(ADDconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x)
|
(ADDconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x)
|
||||||
|
@ -3223,6 +3223,20 @@ func rewriteValueARM_OpARMANDconst_0(v *Value) bool {
|
|||||||
v.AddArg(x)
|
v.AddArg(x)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (ANDconst [c] x)
|
||||||
|
// cond: !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c))
|
||||||
|
// result: (BICconst [int64(^uint32(c))] x)
|
||||||
|
for {
|
||||||
|
c := v.AuxInt
|
||||||
|
x := v.Args[0]
|
||||||
|
if !(!isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c))) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpARMBICconst)
|
||||||
|
v.AuxInt = int64(^uint32(c))
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (ANDconst [c] (MOVWconst [d]))
|
// match: (ANDconst [c] (MOVWconst [d]))
|
||||||
// cond:
|
// cond:
|
||||||
// result: (MOVWconst [c&d])
|
// result: (MOVWconst [c&d])
|
||||||
@ -3722,6 +3736,20 @@ func rewriteValueARM_OpARMBICconst_0(v *Value) bool {
|
|||||||
v.AuxInt = 0
|
v.AuxInt = 0
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (BICconst [c] x)
|
||||||
|
// cond: !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c))
|
||||||
|
// result: (ANDconst [int64(^uint32(c))] x)
|
||||||
|
for {
|
||||||
|
c := v.AuxInt
|
||||||
|
x := v.Args[0]
|
||||||
|
if !(!isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c))) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpARMANDconst)
|
||||||
|
v.AuxInt = int64(^uint32(c))
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (BICconst [c] (MOVWconst [d]))
|
// match: (BICconst [c] (MOVWconst [d]))
|
||||||
// cond:
|
// cond:
|
||||||
// result: (MOVWconst [d&^c])
|
// result: (MOVWconst [d&^c])
|
||||||
|
@ -121,10 +121,11 @@ const (
|
|||||||
C_PSR
|
C_PSR
|
||||||
C_FCR
|
C_FCR
|
||||||
|
|
||||||
C_RCON /* 0xff rotated */
|
C_RCON /* 0xff rotated */
|
||||||
C_NCON /* ~RCON */
|
C_NCON /* ~RCON */
|
||||||
C_RCON2 /* OR of two disjoint C_RCON constants */
|
C_RCON2A /* OR of two disjoint C_RCON constants */
|
||||||
C_SCON /* 0xffff */
|
C_RCON2S /* subtraction of two disjoint C_RCON constants */
|
||||||
|
C_SCON /* 0xffff */
|
||||||
C_LCON
|
C_LCON
|
||||||
C_LCONADDR
|
C_LCONADDR
|
||||||
C_ZFCON
|
C_ZFCON
|
||||||
|
@ -16,7 +16,8 @@ var cnames5 = []string{
|
|||||||
"FCR",
|
"FCR",
|
||||||
"RCON",
|
"RCON",
|
||||||
"NCON",
|
"NCON",
|
||||||
"RCON2",
|
"RCON2A",
|
||||||
|
"RCON2S",
|
||||||
"SCON",
|
"SCON",
|
||||||
"LCON",
|
"LCON",
|
||||||
"LCONADDR",
|
"LCONADDR",
|
||||||
|
@ -88,6 +88,8 @@ var optab = []Optab{
|
|||||||
{AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
{AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
||||||
{AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
|
{AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
|
||||||
{AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
{AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
||||||
|
{AORR, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
|
||||||
|
{AORR, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
||||||
{AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
{AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
||||||
{AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
{AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
||||||
{ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
|
{ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
|
||||||
@ -95,6 +97,8 @@ var optab = []Optab{
|
|||||||
{AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
{AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
||||||
{AAND, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
|
{AAND, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
|
||||||
{AAND, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
{AAND, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
||||||
|
{AORR, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
|
||||||
|
{AORR, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
||||||
{AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
{AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
||||||
{AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
{AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
|
||||||
{ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0},
|
{ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0},
|
||||||
@ -102,6 +106,8 @@ var optab = []Optab{
|
|||||||
{AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
{AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||||
{AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
|
{AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
|
||||||
{AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
{AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||||
|
{AORR, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
|
||||||
|
{AORR, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||||
{AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
{AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||||
{ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
|
{ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
|
||||||
{AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0},
|
{AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0},
|
||||||
@ -136,20 +142,27 @@ var optab = []Optab{
|
|||||||
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
{AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
{AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
|
{AORR, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||||
|
{AORR, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
|
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
|
||||||
{AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
{AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
{AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AAND, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
{AAND, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AAND, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
{AAND, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
|
{AORR, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||||
|
{AORR, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
{AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
{AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||||
{ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
|
{ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
|
||||||
{AADD, C_RCON2, C_REG, C_REG, 106, 8, 0, 0, 0},
|
{AADD, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0},
|
||||||
// TODO: RCON2: how to do AND and BIC?
|
{AORR, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0},
|
||||||
|
{AADD, C_RCON2S, C_REG, C_REG, 107, 8, 0, 0, 0},
|
||||||
{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
|
{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
{AAND, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
|
{AAND, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
{AAND, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
{AAND, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
|
{AORR, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
|
{AORR, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
||||||
{ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0},
|
{ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0},
|
||||||
{AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
{AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
|
||||||
@ -970,10 +983,10 @@ func immrot(v uint32) int32 {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// immrot2 returns bits encoding the immediate constant fields of two instructions,
|
// immrot2a returns bits encoding the immediate constant fields of two instructions,
|
||||||
// such that the encoded constants x, y satisfy x|y==v, x&y==0.
|
// such that the encoded constants x, y satisfy x|y==v, x&y==0.
|
||||||
// Returns 0,0 if no such decomposition of v exists.
|
// Returns 0,0 if no such decomposition of v exists.
|
||||||
func immrot2(v uint32) (uint32, uint32) {
|
func immrot2a(v uint32) (uint32, uint32) {
|
||||||
for i := uint(1); i < 32; i++ {
|
for i := uint(1); i < 32; i++ {
|
||||||
m := uint32(1<<i - 1)
|
m := uint32(1<<i - 1)
|
||||||
if x, y := immrot(v&m), immrot(v&^m); x != 0 && y != 0 {
|
if x, y := immrot(v&m), immrot(v&^m); x != 0 && y != 0 {
|
||||||
@ -985,6 +998,32 @@ func immrot2(v uint32) (uint32, uint32) {
|
|||||||
return 0, 0
|
return 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// immrot2s returns bits encoding the immediate constant fields of two instructions,
|
||||||
|
// such that the encoded constants y, x satisfy y-x==v, y&x==0.
|
||||||
|
// Returns 0,0 if no such decomposition of v exists.
|
||||||
|
func immrot2s(v uint32) (uint32, uint32) {
|
||||||
|
if immrot(v) == 0 {
|
||||||
|
return v, 0
|
||||||
|
}
|
||||||
|
// suppose v in the form of {leading 00, upper effective bits, lower 8 effective bits, trailing 00}
|
||||||
|
// omit trailing 00
|
||||||
|
var i uint32
|
||||||
|
for i = 2; i < 32; i += 2 {
|
||||||
|
if v&(1<<i-1) != 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// i must be <= 24, then adjust i just above lower 8 effective bits of v
|
||||||
|
i += 6
|
||||||
|
// let x = {the complement of lower 8 effective bits, trailing 00}, y = x + v
|
||||||
|
x := 1<<i - v&(1<<i-1)
|
||||||
|
y := v + x
|
||||||
|
if y, x = uint32(immrot(y)), uint32(immrot(x)); y != 0 && x != 0 {
|
||||||
|
return y, x
|
||||||
|
}
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
func immaddr(v int32) int32 {
|
func immaddr(v int32) int32 {
|
||||||
if v >= 0 && v <= 0xfff {
|
if v >= 0 && v <= 0xfff {
|
||||||
return v&0xfff | 1<<24 | 1<<23 /* pre indexing */ /* pre indexing, up */
|
return v&0xfff | 1<<24 | 1<<23 /* pre indexing */ /* pre indexing, up */
|
||||||
@ -1159,8 +1198,11 @@ func (c *ctxt5) aclass(a *obj.Addr) int {
|
|||||||
if uint32(c.instoffset) <= 0xffff && objabi.GOARM == 7 {
|
if uint32(c.instoffset) <= 0xffff && objabi.GOARM == 7 {
|
||||||
return C_SCON
|
return C_SCON
|
||||||
}
|
}
|
||||||
if x, y := immrot2(uint32(c.instoffset)); x != 0 && y != 0 {
|
if x, y := immrot2a(uint32(c.instoffset)); x != 0 && y != 0 {
|
||||||
return C_RCON2
|
return C_RCON2A
|
||||||
|
}
|
||||||
|
if y, x := immrot2s(uint32(c.instoffset)); x != 0 && y != 0 {
|
||||||
|
return C_RCON2S
|
||||||
}
|
}
|
||||||
return C_LCON
|
return C_LCON
|
||||||
|
|
||||||
@ -1226,13 +1268,12 @@ func (c *ctxt5) oplook(p *obj.Prog) *Optab {
|
|||||||
a2 = C_REG
|
a2 = C_REG
|
||||||
}
|
}
|
||||||
|
|
||||||
// If Scond != 0, we must use the constant pool instead of
|
// If current instruction has a .S suffix (flags update),
|
||||||
// splitting the instruction in two. The most common reason is
|
// we must use the constant pool instead of splitting it.
|
||||||
// .S (flag updating) instructions. There may be others.
|
if (a1 == C_RCON2A || a1 == C_RCON2S) && p.Scond&C_SBIT != 0 {
|
||||||
if a1 == C_RCON2 && p.Scond != 0 {
|
|
||||||
a1 = C_LCON
|
a1 = C_LCON
|
||||||
}
|
}
|
||||||
if a3 == C_RCON2 && p.Scond != 0 {
|
if (a3 == C_RCON2A || a3 == C_RCON2S) && p.Scond&C_SBIT != 0 {
|
||||||
a3 = C_LCON
|
a3 = C_LCON
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1266,7 +1307,7 @@ func cmp(a int, b int) bool {
|
|||||||
}
|
}
|
||||||
switch a {
|
switch a {
|
||||||
case C_LCON:
|
case C_LCON:
|
||||||
if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2 {
|
if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2A || b == C_RCON2S {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1406,16 +1447,14 @@ func buildop(ctxt *obj.Link) {
|
|||||||
log.Fatalf("bad code")
|
log.Fatalf("bad code")
|
||||||
|
|
||||||
case AADD:
|
case AADD:
|
||||||
opset(AEOR, r0)
|
|
||||||
opset(ASUB, r0)
|
opset(ASUB, r0)
|
||||||
opset(ARSB, r0)
|
opset(ARSB, r0)
|
||||||
opset(AADC, r0)
|
opset(AADC, r0)
|
||||||
opset(ASBC, r0)
|
opset(ASBC, r0)
|
||||||
opset(ARSC, r0)
|
opset(ARSC, r0)
|
||||||
opset(AORR, r0)
|
|
||||||
|
|
||||||
case AAND:
|
case AORR:
|
||||||
opset(AAND, r0)
|
opset(AEOR, r0)
|
||||||
opset(ABIC, r0)
|
opset(ABIC, r0)
|
||||||
|
|
||||||
case ACMP:
|
case ACMP:
|
||||||
@ -1541,6 +1580,7 @@ func buildop(ctxt *obj.Link) {
|
|||||||
ALDREXD,
|
ALDREXD,
|
||||||
ASTREXD,
|
ASTREXD,
|
||||||
APLD,
|
APLD,
|
||||||
|
AAND,
|
||||||
obj.AUNDEF,
|
obj.AUNDEF,
|
||||||
obj.AFUNCDATA,
|
obj.AFUNCDATA,
|
||||||
obj.APCDATA,
|
obj.APCDATA,
|
||||||
@ -1609,11 +1649,11 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||||||
c.aclass(&p.From)
|
c.aclass(&p.From)
|
||||||
r := int(p.Reg)
|
r := int(p.Reg)
|
||||||
rt := int(p.To.Reg)
|
rt := int(p.To.Reg)
|
||||||
x, y := immrot2(uint32(c.instoffset))
|
x, y := immrot2a(uint32(c.instoffset))
|
||||||
var as2 obj.As
|
var as2 obj.As
|
||||||
switch p.As {
|
switch p.As {
|
||||||
case AADD, ASUB, AORR, AEOR:
|
case AADD, ASUB, AORR, AEOR, ABIC:
|
||||||
as2 = p.As // ADD, SUB, ORR, EOR
|
as2 = p.As // ADD, SUB, ORR, EOR, BIC
|
||||||
case ARSB:
|
case ARSB:
|
||||||
as2 = AADD // RSB -> RSB/ADD pair
|
as2 = AADD // RSB -> RSB/ADD pair
|
||||||
case AADC:
|
case AADC:
|
||||||
@ -1632,6 +1672,35 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||||||
o1 |= x
|
o1 |= x
|
||||||
o2 |= y
|
o2 |= y
|
||||||
|
|
||||||
|
case 107: /* op $I,R,R where I can be decomposed into 2 immediates */
|
||||||
|
c.aclass(&p.From)
|
||||||
|
r := int(p.Reg)
|
||||||
|
rt := int(p.To.Reg)
|
||||||
|
y, x := immrot2s(uint32(c.instoffset))
|
||||||
|
var as2 obj.As
|
||||||
|
switch p.As {
|
||||||
|
case AADD:
|
||||||
|
as2 = ASUB // ADD -> ADD/SUB pair
|
||||||
|
case ASUB:
|
||||||
|
as2 = AADD // SUB -> SUB/ADD pair
|
||||||
|
case ARSB:
|
||||||
|
as2 = ASUB // RSB -> RSB/SUB pair
|
||||||
|
case AADC:
|
||||||
|
as2 = ASUB // ADC -> ADC/SUB pair
|
||||||
|
case ASBC:
|
||||||
|
as2 = AADD // SBC -> SBC/ADD pair
|
||||||
|
case ARSC:
|
||||||
|
as2 = ASUB // RSC -> RSC/SUB pair
|
||||||
|
default:
|
||||||
|
c.ctxt.Diag("unknown second op for %v", p)
|
||||||
|
}
|
||||||
|
o1 = c.oprrr(p, p.As, int(p.Scond))
|
||||||
|
o2 = c.oprrr(p, as2, int(p.Scond))
|
||||||
|
o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12
|
||||||
|
o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12
|
||||||
|
o1 |= y
|
||||||
|
o2 |= x
|
||||||
|
|
||||||
case 3: /* add R<<[IR],[R],R */
|
case 3: /* add R<<[IR],[R],R */
|
||||||
o1 = c.mov(p)
|
o1 = c.mov(p)
|
||||||
|
|
||||||
|
134
test/armimm.go
134
test/armimm.go
@ -11,57 +11,99 @@ package main
|
|||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
const c32 = 0xaa00dd
|
const c32a = 0x00aa00dd
|
||||||
const c64 = 0xaa00dd55000066
|
const c32s = 0x00ffff00
|
||||||
|
const c64a = 0x00aa00dd55000066
|
||||||
|
const c64s = 0x00ffff00004fff00
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func add32(x uint32) uint32 {
|
func add32a(x uint32) uint32 {
|
||||||
return x + c32
|
return x + c32a
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func sub32(x uint32) uint32 {
|
func add32s(x uint32) uint32 {
|
||||||
return x - c32
|
return x + c32s
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func sub32a(x uint32) uint32 {
|
||||||
|
return x - c32a
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func sub32s(x uint32) uint32 {
|
||||||
|
return x - c32s
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func or32(x uint32) uint32 {
|
func or32(x uint32) uint32 {
|
||||||
return x | c32
|
return x | c32a
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func xor32(x uint32) uint32 {
|
func xor32(x uint32) uint32 {
|
||||||
return x ^ c32
|
return x ^ c32a
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func subr32(x uint32) uint32 {
|
func subr32a(x uint32) uint32 {
|
||||||
return c32 - x
|
return c32a - x
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func add64(x uint64) uint64 {
|
func subr32s(x uint32) uint32 {
|
||||||
return x + c64
|
return c32s - x
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func sub64(x uint64) uint64 {
|
func bic32(x uint32) uint32 {
|
||||||
return x - c64
|
return x &^ c32a
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func add64a(x uint64) uint64 {
|
||||||
|
return x + c64a
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func add64s(x uint64) uint64 {
|
||||||
|
return x + c64s
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func sub64a(x uint64) uint64 {
|
||||||
|
return x - c64a
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func sub64s(x uint64) uint64 {
|
||||||
|
return x - c64s
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func or64(x uint64) uint64 {
|
func or64(x uint64) uint64 {
|
||||||
return x | c64
|
return x | c64a
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func xor64(x uint64) uint64 {
|
func xor64(x uint64) uint64 {
|
||||||
return x ^ c64
|
return x ^ c64a
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:noinline
|
//go:noinline
|
||||||
func subr64(x uint64) uint64 {
|
func subr64a(x uint64) uint64 {
|
||||||
return c64 - x
|
return c64a - x
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func subr64s(x uint64) uint64 {
|
||||||
|
return c64s - x
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func bic64(x uint64) uint64 {
|
||||||
|
return x &^ c64a
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: x-c gets rewritten to x+(-c), so SUB and SBC are not directly testable.
|
// Note: x-c gets rewritten to x+(-c), so SUB and SBC are not directly testable.
|
||||||
@ -75,39 +117,63 @@ func main() {
|
|||||||
func test32() {
|
func test32() {
|
||||||
var a uint32 = 0x11111111
|
var a uint32 = 0x11111111
|
||||||
var want, got uint32
|
var want, got uint32
|
||||||
if want, got = a+c32, add32(a); got != want {
|
if want, got = a+c32a, add32a(a); got != want {
|
||||||
panic(fmt.Sprintf("add32(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("add32a(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = a-c32, sub32(a); got != want {
|
if want, got = a+c32s, add32s(a); got != want {
|
||||||
panic(fmt.Sprintf("sub32(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("add32s(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = a|c32, or32(a); got != want {
|
if want, got = a-c32a, sub32a(a); got != want {
|
||||||
|
panic(fmt.Sprintf("sub32a(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = a-c32s, sub32s(a); got != want {
|
||||||
|
panic(fmt.Sprintf("sub32s(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = a|c32a, or32(a); got != want {
|
||||||
panic(fmt.Sprintf("or32(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("or32(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = a^c32, xor32(a); got != want {
|
if want, got = a^c32a, xor32(a); got != want {
|
||||||
panic(fmt.Sprintf("xor32(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("xor32(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = c32-a, subr32(a); got != want {
|
if want, got = c32a-a, subr32a(a); got != want {
|
||||||
panic(fmt.Sprintf("subr32(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("subr32a(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = c32s-a, subr32s(a); got != want {
|
||||||
|
panic(fmt.Sprintf("subr32s(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = a&^c32a, bic32(a); got != want {
|
||||||
|
panic(fmt.Sprintf("bic32(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func test64() {
|
func test64() {
|
||||||
var a uint64 = 0x1111111111111111
|
var a uint64 = 0x1111111111111111
|
||||||
var want, got uint64
|
var want, got uint64
|
||||||
if want, got = a+c64, add64(a); got != want {
|
if want, got = a+c64a, add64a(a); got != want {
|
||||||
panic(fmt.Sprintf("add64(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("add64a(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = a-c64, sub64(a); got != want {
|
if want, got = a+c64s, add64s(a); got != want {
|
||||||
panic(fmt.Sprintf("sub64(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("add64s(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = a|c64, or64(a); got != want {
|
if want, got = a-c64a, sub64a(a); got != want {
|
||||||
|
panic(fmt.Sprintf("sub64a(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = a-c64s, sub64s(a); got != want {
|
||||||
|
panic(fmt.Sprintf("sub64s(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = a|c64a, or64(a); got != want {
|
||||||
panic(fmt.Sprintf("or64(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("or64(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = a^c64, xor64(a); got != want {
|
if want, got = a^c64a, xor64(a); got != want {
|
||||||
panic(fmt.Sprintf("xor64(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("xor64(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
if want, got = c64-a, subr64(a); got != want {
|
if want, got = c64a-a, subr64a(a); got != want {
|
||||||
panic(fmt.Sprintf("subr64(%x) = %x, want %x", a, got, want))
|
panic(fmt.Sprintf("subr64a(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = c64s-a, subr64s(a); got != want {
|
||||||
|
panic(fmt.Sprintf("subr64s(%x) = %x, want %x", a, got, want))
|
||||||
|
}
|
||||||
|
if want, got = a&^c64a, bic64(a); got != want {
|
||||||
|
panic(fmt.Sprintf("bic64(%x) = %x, want %x", a, got, want))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user