cmd/compile: don't use BTS when OR works, add direct memory BTS operations

Stop using BTSconst and friends when ORLconst can be used instead.
OR can be issued by more function units than BTS can, so it could
lead to better IPC. OR might take a few more bytes to encode, but
not a lot more.

Still use BTSconst for cases where the constant otherwise wouldn't
fit and would require a separate movabs instruction to materialize
the constant. This happens when setting bits 31-63 of 64-bit targets.

Add BTS-to-memory operations so we don't need to load/bts/store.

Fixes #61694

Change-Id: I00379608df8fb0167cb01466e97d11dec7c1596c
Reviewed-on: https://go-review.googlesource.com/c/go/+/515755
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Keith Randall 2023-08-01 14:32:56 -07:00 committed by Keith Randall
parent 51cb12e83b
commit 611706b171
8 changed files with 223 additions and 683 deletions

View File

@ -714,9 +714,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Offset = v.AuxInt
case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
ssa.OpAMD64BTSQconst,
ssa.OpAMD64BTCQconst,
ssa.OpAMD64BTRQconst:
op := v.Op
if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
// Emit 32-bit version because it's shorter
@ -851,7 +851,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
}
fallthrough
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
sc := v.AuxValAndOff()
off := sc.Off64()
val := sc.Val64()

View File

@ -82,8 +82,8 @@
(Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
(Ctz64 <t> x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
(Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
(Ctz16 x) => (BSFL (BTSLconst <typ.UInt32> [16] x))
(Ctz8 x) => (BSFL (BTSLconst <typ.UInt32> [ 8] x))
(Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [1<<16] x))
(Ctz8 x) => (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
(Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x)
(Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
@ -659,29 +659,16 @@
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
// Convert ORconst into BTS, if the code gets smaller, with boundary being
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Qconst [int8(log32(c))] x)
((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Lconst [int8(log32(c))] x)
((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
=> (BT(S|C)Qconst [int8(log64(c))] x)
((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Lconst [int8(log32(c))] x)
// Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in
// the constant field of the OR/XOR instruction. See issue 61694.
((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x)
// Recognize bit clearing: a &^= 1<<b
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRQconst [int8(log32(^c))] x)
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x)
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
=> (BTRQconst [int8(log64(^c))] x)
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x)
// Note: only convert AND to BTR if the constant wouldn't fit in
// the constant field of the AND instruction. See issue 61694.
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x)
// Special-case bit patterns on first/last bit.
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@ -695,9 +682,9 @@
// Special case resetting first/last bit
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
=> (BTR(L|Q)const [0] x)
=> (AND(L|Q)const [-2] x)
(SHRLconst [1] (SHLLconst [1] x))
=> (BTRLconst [31] x)
=> (ANDLconst [0x7fffffff] x)
(SHRQconst [1] (SHLQconst [1] x))
=> (BTRQconst [63] x)
@ -731,10 +718,10 @@
=> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
// Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
(BTSQconst [c] (BTRQconst [c] x)) => (BTSQconst [c] x)
(BTSQconst [c] (BTCQconst [c] x)) => (BTSQconst [c] x)
(BTRQconst [c] (BTSQconst [c] x)) => (BTRQconst [c] x)
(BTRQconst [c] (BTCQconst [c] x)) => (BTRQconst [c] x)
// Fold boolean negation into SETcc.
(XORLconst [1] (SETNE x)) => (SETEQ x)
@ -778,31 +765,6 @@
(XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x)
(OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x)
(BTRLconst [c] (ANDLconst [d] x)) => (ANDLconst [d &^ (1<<uint32(c))] x)
(ANDLconst [c] (BTRLconst [d] x)) => (ANDLconst [c &^ (1<<uint32(d))] x)
(BTRLconst [c] (BTRLconst [d] x)) => (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
(BTCLconst [c] (XORLconst [d] x)) => (XORLconst [d ^ 1<<uint32(c)] x)
(XORLconst [c] (BTCLconst [d] x)) => (XORLconst [c ^ 1<<uint32(d)] x)
(BTCLconst [c] (BTCLconst [d] x)) => (XORLconst [1<<uint32(c) | 1<<uint32(d)] x)
(BTSLconst [c] (ORLconst [d] x)) => (ORLconst [d | 1<<uint32(c)] x)
(ORLconst [c] (BTSLconst [d] x)) => (ORLconst [c | 1<<uint32(d)] x)
(BTSLconst [c] (BTSLconst [d] x)) => (ORLconst [1<<uint32(c) | 1<<uint32(d)] x)
(BTRQconst [c] (ANDQconst [d] x)) && is32Bit(int64(d) &^ (1<<uint32(c))) => (ANDQconst [d &^ (1<<uint32(c))] x)
(ANDQconst [c] (BTRQconst [d] x)) && is32Bit(int64(c) &^ (1<<uint32(d))) => (ANDQconst [c &^ (1<<uint32(d))] x)
(BTRQconst [c] (BTRQconst [d] x)) && is32Bit(^(1<<uint32(c) | 1<<uint32(d))) => (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
(BTCQconst [c] (XORQconst [d] x)) && is32Bit(int64(d) ^ 1<<uint32(c)) => (XORQconst [d ^ 1<<uint32(c)] x)
(XORQconst [c] (BTCQconst [d] x)) && is32Bit(int64(c) ^ 1<<uint32(d)) => (XORQconst [c ^ 1<<uint32(d)] x)
(BTCQconst [c] (BTCQconst [d] x)) && is32Bit(1<<uint32(c) ^ 1<<uint32(d)) => (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
(BTSQconst [c] (ORQconst [d] x)) && is32Bit(int64(d) | 1<<uint32(c)) => (ORQconst [d | 1<<uint32(c)] x)
(ORQconst [c] (BTSQconst [d] x)) && is32Bit(int64(c) | 1<<uint32(d)) => (ORQconst [c | 1<<uint32(d)] x)
(BTSQconst [c] (BTSQconst [d] x)) && is32Bit(1<<uint32(c) | 1<<uint32(d)) => (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
(MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x)
(MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x)
@ -1422,11 +1384,8 @@
(NOTQ (MOVQconst [c])) => (MOVQconst [^c])
(NOTL (MOVLconst [c])) => (MOVLconst [^c])
(BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))])
(BTSLconst [c] (MOVLconst [d])) => (MOVLconst [d|(1<<uint32(c))])
(BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))])
(BTRLconst [c] (MOVLconst [d])) => (MOVLconst [d&^(1<<uint32(c))])
(BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))])
(BTCLconst [c] (MOVLconst [d])) => (MOVLconst [d^(1<<uint32(c))])
// If c or d doesn't fit into 32 bits, then we can't construct ORQconst,
// but we can still constant-fold.
@ -1513,6 +1472,8 @@
(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr x:(BT(S|R|C)Qconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) && x.Uses == 1 && l.Uses == 1 && clobber(x, l) =>
(BT(S|R|C)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>

View File

@ -399,12 +399,27 @@ func init() {
{name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg1%64 in arg0
{name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
{name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
{name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32
{name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64
{name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32
{name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
{name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 31 <= auxint < 64
{name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 31 <= auxint < 64
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 31 <= auxint < 64
// BT[SRC]Qconstmodify
//
// S: set bit
// R: reset (clear) bit
// C: complement bit
//
// Apply operation to bit ValAndOff(AuxInt).Val() in the 64 bits at
// memory address arg0+ValAndOff(AuxInt).Off()+aux
// Bit index must be in range (31-63).
// (We use OR/AND/XOR for thinner targets and lower bit indexes.)
// arg1=mem, returns mem
//
// Note that there aren't non-const versions of these instructions.
// Well, there are such instructions, but they are slow and weird so we don't use them.
{name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
{name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
{name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
// TESTx: compare (arg0 & arg1) to 0
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"},

View File

@ -716,12 +716,12 @@ const (
OpAMD64BTSQ
OpAMD64BTLconst
OpAMD64BTQconst
OpAMD64BTCLconst
OpAMD64BTCQconst
OpAMD64BTRLconst
OpAMD64BTRQconst
OpAMD64BTSLconst
OpAMD64BTSQconst
OpAMD64BTSQconstmodify
OpAMD64BTRQconstmodify
OpAMD64BTCQconstmodify
OpAMD64TESTQ
OpAMD64TESTL
OpAMD64TESTW
@ -8778,22 +8778,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTCLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "BTCQconst",
auxType: auxInt8,
@ -8810,22 +8794,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTRLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "BTRQconst",
auxType: auxInt8,
@ -8842,22 +8810,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTSLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "BTSQconst",
auxType: auxInt8,
@ -8874,6 +8826,48 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTSQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "TESTQ",
argLen: 2,

View File

@ -73,20 +73,14 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64BSWAPL(v)
case OpAMD64BSWAPQ:
return rewriteValueAMD64_OpAMD64BSWAPQ(v)
case OpAMD64BTCLconst:
return rewriteValueAMD64_OpAMD64BTCLconst(v)
case OpAMD64BTCQconst:
return rewriteValueAMD64_OpAMD64BTCQconst(v)
case OpAMD64BTLconst:
return rewriteValueAMD64_OpAMD64BTLconst(v)
case OpAMD64BTQconst:
return rewriteValueAMD64_OpAMD64BTQconst(v)
case OpAMD64BTRLconst:
return rewriteValueAMD64_OpAMD64BTRLconst(v)
case OpAMD64BTRQconst:
return rewriteValueAMD64_OpAMD64BTRQconst(v)
case OpAMD64BTSLconst:
return rewriteValueAMD64_OpAMD64BTSLconst(v)
case OpAMD64BTSQconst:
return rewriteValueAMD64_OpAMD64BTSQconst(v)
case OpAMD64CMOVLCC:
@ -2626,26 +2620,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
}
break
}
// match: (ANDL (MOVLconst [c]) x)
// cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
// result: (BTRLconst [int8(log32(^c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64MOVLconst {
continue
}
c := auxIntToInt32(v_0.AuxInt)
x := v_1
if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
continue
}
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(int8(log32(^c)))
v.AddArg(x)
return true
}
break
}
// match: (ANDL x (MOVLconst [c]))
// result: (ANDLconst [c] x)
for {
@ -2754,20 +2728,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (ANDLconst [c] x)
// cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
// result: (BTRLconst [int8(log32(^c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
break
}
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(int8(log32(^c)))
v.AddArg(x)
return true
}
// match: (ANDLconst [c] (ANDLconst [d] x))
// result: (ANDLconst [c & d] x)
for {
@ -2782,20 +2742,6 @@ func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ANDLconst [c] (BTRLconst [d] x))
// result: (ANDLconst [c &^ (1<<uint32(d))] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTRLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(c &^ (1 << uint32(d)))
v.AddArg(x)
return true
}
// match: (ANDLconst [ 0xFF] x)
// result: (MOVBQZX x)
for {
@ -3099,7 +3045,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
break
}
// match: (ANDQ (MOVQconst [c]) x)
// cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128
// cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31
// result: (BTRQconst [int8(log64(^c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@ -3108,7 +3054,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
}
c := auxIntToInt64(v_0.AuxInt)
x := v_1
if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) {
if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31) {
continue
}
v.reset(OpAMD64BTRQconst)
@ -3230,20 +3176,6 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (ANDQconst [c] x)
// cond: isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
// result: (BTRQconst [int8(log32(^c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
break
}
v.reset(OpAMD64BTRQconst)
v.AuxInt = int8ToAuxInt(int8(log32(^c)))
v.AddArg(x)
return true
}
// match: (ANDQconst [c] (ANDQconst [d] x))
// result: (ANDQconst [c & d] x)
for {
@ -3258,24 +3190,6 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ANDQconst [c] (BTRQconst [d] x))
// cond: is32Bit(int64(c) &^ (1<<uint32(d)))
// result: (ANDQconst [c &^ (1<<uint32(d))] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTRQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(c) &^ (1 << uint32(d)))) {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(c &^ (1 << uint32(d)))
v.AddArg(x)
return true
}
// match: (ANDQconst [ 0xFF] x)
// result: (MOVBQZX x)
for {
@ -3677,88 +3591,8 @@ func rewriteValueAMD64_OpAMD64BSWAPQ(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64BTCLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTCLconst [c] (XORLconst [d] x))
// result: (XORLconst [d ^ 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64XORLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(d ^ 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTCLconst [c] (BTCLconst [d] x))
// result: (XORLconst [1<<uint32(c) | 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTCLconst [c] (MOVLconst [d]))
// result: (MOVLconst [d^(1<<uint32(c))])
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64MOVLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
v.reset(OpAMD64MOVLconst)
v.AuxInt = int32ToAuxInt(d ^ (1 << uint32(c)))
return true
}
return false
}
func rewriteValueAMD64_OpAMD64BTCQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTCQconst [c] (XORQconst [d] x))
// cond: is32Bit(int64(d) ^ 1<<uint32(c))
// result: (XORQconst [d ^ 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64XORQconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(d) ^ 1<<uint32(c))) {
break
}
v.reset(OpAMD64XORQconst)
v.AuxInt = int32ToAuxInt(d ^ 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTCQconst [c] (BTCQconst [d] x))
// cond: is32Bit(1<<uint32(c) ^ 1<<uint32(d))
// result: (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(1<<uint32(c) ^ 1<<uint32(d))) {
break
}
v.reset(OpAMD64XORQconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) ^ 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTCQconst [c] (MOVQconst [d]))
// result: (MOVQconst [d^(1<<uint32(c))])
for {
@ -3953,76 +3787,6 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64BTRLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTRLconst [c] (BTSLconst [c] x))
// result: (BTRLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTSLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (BTCLconst [c] x))
// result: (BTRLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (ANDLconst [d] x))
// result: (ANDLconst [d &^ (1<<uint32(c))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ANDLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (BTRLconst [d] x))
// result: (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTRLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(^(1<<uint32(c) | 1<<uint32(d)))
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (MOVLconst [d]))
// result: (MOVLconst [d&^(1<<uint32(c))])
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64MOVLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
v.reset(OpAMD64MOVLconst)
v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
return true
}
return false
}
func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTRQconst [c] (BTSQconst [c] x))
@ -4051,42 +3815,6 @@ func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (BTRQconst [c] (ANDQconst [d] x))
// cond: is32Bit(int64(d) &^ (1<<uint32(c)))
// result: (ANDQconst [d &^ (1<<uint32(c))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ANDQconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(d) &^ (1 << uint32(c)))) {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
v.AddArg(x)
return true
}
// match: (BTRQconst [c] (BTRQconst [d] x))
// cond: is32Bit(^(1<<uint32(c) | 1<<uint32(d)))
// result: (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTRQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(^(1<<uint32(c) | 1<<uint32(d)))) {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(^(1<<uint32(c) | 1<<uint32(d)))
v.AddArg(x)
return true
}
// match: (BTRQconst [c] (MOVQconst [d]))
// result: (MOVQconst [d&^(1<<uint32(c))])
for {
@ -4101,76 +3829,6 @@ func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64BTSLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTSLconst [c] (BTRLconst [c] x))
// result: (BTSLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTRLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (BTCLconst [c] x))
// result: (BTSLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (ORLconst [d] x))
// result: (ORLconst [d | 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ORLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ORLconst)
v.AuxInt = int32ToAuxInt(d | 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (BTSLconst [d] x))
// result: (ORLconst [1<<uint32(c) | 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTSLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ORLconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (MOVLconst [d]))
// result: (MOVLconst [d|(1<<uint32(c))])
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64MOVLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
v.reset(OpAMD64MOVLconst)
v.AuxInt = int32ToAuxInt(d | (1 << uint32(c)))
return true
}
return false
}
func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTSQconst [c] (BTRQconst [c] x))
@ -4199,42 +3857,6 @@ func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (BTSQconst [c] (ORQconst [d] x))
// cond: is32Bit(int64(d) | 1<<uint32(c))
// result: (ORQconst [d | 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ORQconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(d) | 1<<uint32(c))) {
break
}
v.reset(OpAMD64ORQconst)
v.AuxInt = int32ToAuxInt(d | 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTSQconst [c] (BTSQconst [d] x))
// cond: is32Bit(1<<uint32(c) | 1<<uint32(d))
// result: (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTSQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(1<<uint32(c) | 1<<uint32(d))) {
break
}
v.reset(OpAMD64ORQconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTSQconst [c] (MOVQconst [d]))
// result: (MOVQconst [d|(1<<uint32(c))])
for {
@ -12306,6 +11928,84 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
}
break
}
// match: (MOVQstore {sym} [off] ptr x:(BTSQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
// cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
// result: (BTSQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr := v_0
x := v_1
if x.Op != OpAMD64BTSQconst {
break
}
c := auxIntToInt8(x.AuxInt)
l := x.Args[0]
if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
break
}
mem := l.Args[1]
if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
break
}
v.reset(OpAMD64BTSQconstmodify)
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (MOVQstore {sym} [off] ptr x:(BTRQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
// cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
// result: (BTRQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr := v_0
x := v_1
if x.Op != OpAMD64BTRQconst {
break
}
c := auxIntToInt8(x.AuxInt)
l := x.Args[0]
if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
break
}
mem := l.Args[1]
if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
break
}
v.reset(OpAMD64BTRQconstmodify)
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (MOVQstore {sym} [off] ptr x:(BTCQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
// cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
// result: (BTCQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr := v_0
x := v_1
if x.Op != OpAMD64BTCQconst {
break
}
c := auxIntToInt8(x.AuxInt)
l := x.Args[0]
if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
break
}
mem := l.Args[1]
if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
break
}
v.reset(OpAMD64BTCQconstmodify)
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
// result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
@ -14643,26 +14343,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
}
break
}
// match: (ORL (MOVLconst [c]) x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTSLconst [int8(log32(c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64MOVLconst {
continue
}
c := auxIntToInt32(v_0.AuxInt)
x := v_1
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
continue
}
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
break
}
// match: (ORL x (MOVLconst [c]))
// result: (ORLconst [c] x)
for {
@ -14718,20 +14398,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (ORLconst [c] x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTSLconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (ORLconst [c] (ORLconst [d] x))
// result: (ORLconst [c | d] x)
for {
@ -14746,20 +14412,6 @@ func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ORLconst [c] (BTSLconst [d] x))
// result: (ORLconst [c | 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTSLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ORLconst)
v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (ORLconst [c] x)
// cond: c==0
// result: x
@ -14993,7 +14645,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
break
}
// match: (ORQ (MOVQconst [c]) x)
// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
// cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
// result: (BTSQconst [int8(log64(c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@ -15002,7 +14654,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
}
c := auxIntToInt64(v_0.AuxInt)
x := v_1
if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
continue
}
v.reset(OpAMD64BTSQconst)
@ -15201,20 +14853,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (ORQconst [c] x)
// cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTSQconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTSQconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (ORQconst [c] (ORQconst [d] x))
// result: (ORQconst [c | d] x)
for {
@ -15229,24 +14867,6 @@ func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ORQconst [c] (BTSQconst [d] x))
// cond: is32Bit(int64(c) | 1<<uint32(d))
// result: (ORQconst [c | 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTSQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(c) | 1<<uint32(d))) {
break
}
v.reset(OpAMD64ORQconst)
v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (ORQconst [0] x)
// result: x
for {
@ -21058,14 +20678,14 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHLLconst [1] (SHRLconst [1] x))
// result: (BTRLconst [0] x)
// result: (ANDLconst [-2] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(0)
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x)
return true
}
@ -21314,14 +20934,14 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHLQconst [1] (SHRQconst [1] x))
// result: (BTRQconst [0] x)
// result: (ANDQconst [-2] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRQconst)
v.AuxInt = int8ToAuxInt(0)
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x)
return true
}
@ -21741,14 +21361,14 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHRLconst [1] (SHLLconst [1] x))
// result: (BTRLconst [31] x)
// result: (ANDLconst [0x7fffffff] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(31)
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(0x7fffffff)
v.AddArg(x)
return true
}
@ -23450,26 +23070,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
}
break
}
// match: (XORL (MOVLconst [c]) x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTCLconst [int8(log32(c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64MOVLconst {
continue
}
c := auxIntToInt32(v_0.AuxInt)
x := v_1
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
continue
}
v.reset(OpAMD64BTCLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
break
}
// match: (XORL x (MOVLconst [c]))
// result: (XORLconst [c] x)
for {
@ -23541,20 +23141,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (XORLconst [c] x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTCLconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTCLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (XORLconst [1] (SETNE x))
// result: (SETEQ x)
for {
@ -23679,20 +23265,6 @@ func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (XORLconst [c] (BTCLconst [d] x))
// result: (XORLconst [c ^ 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(c ^ 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (XORLconst [c] x)
// cond: c==0
// result: x
@ -23914,7 +23486,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
break
}
// match: (XORQ (MOVQconst [c]) x)
// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
// cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
// result: (BTCQconst [int8(log64(c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@ -23923,7 +23495,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
}
c := auxIntToInt64(v_0.AuxInt)
x := v_1
if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
continue
}
v.reset(OpAMD64BTCQconst)
@ -24008,20 +23580,6 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (XORQconst [c] x)
// cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTCQconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTCQconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (XORQconst [c] (XORQconst [d] x))
// result: (XORQconst [c ^ d] x)
for {
@ -24036,24 +23594,6 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (XORQconst [c] (BTCQconst [d] x))
// cond: is32Bit(int64(c) ^ 1<<uint32(d))
// result: (XORQconst [c ^ 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTCQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(c) ^ 1<<uint32(d))) {
break
}
v.reset(OpAMD64XORQconst)
v.AuxInt = int32ToAuxInt(c ^ 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (XORQconst [0] x)
// result: x
for {
@ -25670,12 +25210,12 @@ func rewriteValueAMD64_OpCtz16(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (Ctz16 x)
// result: (BSFL (BTSLconst <typ.UInt32> [16] x))
// result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
for {
x := v_0
v.reset(OpAMD64BSFL)
v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32)
v0.AuxInt = int8ToAuxInt(16)
v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
v0.AuxInt = int32ToAuxInt(1 << 16)
v0.AddArg(x)
v.AddArg(v0)
return true
@ -25848,12 +25388,12 @@ func rewriteValueAMD64_OpCtz8(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (Ctz8 x)
// result: (BSFL (BTSLconst <typ.UInt32> [ 8] x))
// result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
for {
x := v_0
v.reset(OpAMD64BSFL)
v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32)
v0.AuxInt = int8ToAuxInt(8)
v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
v0.AuxInt = int32ToAuxInt(1 << 8)
v0.AddArg(x)
v.AddArg(v0)
return true

View File

@ -220,10 +220,10 @@ func biton32(a, b uint32) (n uint32) {
// amd64:"BTSL"
n += b | (1 << (a & 31))
// amd64:"BTSL\t[$]31"
// amd64:"ORL\t[$]-2147483648"
n += a | (1 << 31)
// amd64:"BTSL\t[$]28"
// amd64:"ORL\t[$]268435456"
n += a | (1 << 28)
// amd64:"ORL\t[$]1"
@ -236,10 +236,10 @@ func bitoff32(a, b uint32) (n uint32) {
// amd64:"BTRL"
n += b &^ (1 << (a & 31))
// amd64:"BTRL\t[$]31"
// amd64:"ANDL\t[$]2147483647"
n += a &^ (1 << 31)
// amd64:"BTRL\t[$]28"
// amd64:"ANDL\t[$]-268435457"
n += a &^ (1 << 28)
// amd64:"ANDL\t[$]-2"
@ -252,10 +252,10 @@ func bitcompl32(a, b uint32) (n uint32) {
// amd64:"BTCL"
n += b ^ (1 << (a & 31))
// amd64:"BTCL\t[$]31"
// amd64:"XORL\t[$]-2147483648"
n += a ^ (1 << 31)
// amd64:"BTCL\t[$]28"
// amd64:"XORL\t[$]268435456"
n += a ^ (1 << 28)
// amd64:"XORL\t[$]1"

View File

@ -335,7 +335,7 @@ func TrailingZeros32(n uint32) int {
}
func TrailingZeros16(n uint16) int {
// amd64:"BSFL","BTSL\\t\\$16"
// amd64:"BSFL","ORL\\t\\$65536"
// 386:"BSFL\t"
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
@ -347,7 +347,7 @@ func TrailingZeros16(n uint16) int {
}
func TrailingZeros8(n uint8) int {
// amd64:"BSFL","BTSL\\t\\$8"
// amd64:"BSFL","ORL\\t\\$256"
// 386:"BSFL"
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"

View File

@ -372,3 +372,32 @@ func storeTest(a []bool, v int, i int) {
// amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
a[3+i] = v&2 != 0
}
func bitOps(p *[12]uint64) {
// amd64: `ORQ\t\$8, \(AX\)`
p[0] |= 8
// amd64: `ORQ\t\$1073741824, 8\(AX\)`
p[1] |= 1 << 30
// amd64: `BTSQ\t\$31, 16\(AX\)`
p[2] |= 1 << 31
// amd64: `BTSQ\t\$63, 24\(AX\)`
p[3] |= 1 << 63
// amd64: `ANDQ\t\$-9, 32\(AX\)`
p[4] &^= 8
// amd64: `ANDQ\t\$-1073741825, 40\(AX\)`
p[5] &^= 1 << 30
// amd64: `BTRQ\t\$31, 48\(AX\)`
p[6] &^= 1 << 31
// amd64: `BTRQ\t\$63, 56\(AX\)`
p[7] &^= 1 << 63
// amd64: `XORQ\t\$8, 64\(AX\)`
p[8] ^= 8
// amd64: `XORQ\t\$1073741824, 72\(AX\)`
p[9] ^= 1 << 30
// amd64: `BTCQ\t\$31, 80\(AX\)`
p[10] ^= 1 << 31
// amd64: `BTCQ\t\$63, 88\(AX\)`
p[11] ^= 1 << 63
}