cmd/compile: don't use BTS when OR works, add direct memory BTS operations

Stop using BTSconst and friends when ORLconst can be used instead.
OR can be issued by more function units than BTS can, so it could
lead to better IPC. OR might take a few more bytes to encode, but
not a lot more.

Still use BTSconst for cases where the constant otherwise wouldn't
fit and would require a separate movabs instruction to materialize
the constant. This happens when setting bits 31-63 of 64-bit targets.

Add BTS-to-memory operations so we don't need to load/bts/store.

Fixes #61694

Change-Id: I00379608df8fb0167cb01466e97d11dec7c1596c
Reviewed-on: https://go-review.googlesource.com/c/go/+/515755
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Keith Randall 2023-08-01 14:32:56 -07:00 committed by Keith Randall
parent 51cb12e83b
commit 611706b171
8 changed files with 223 additions and 683 deletions

View File

@ -714,9 +714,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Offset = v.AuxInt p.To.Offset = v.AuxInt
case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, ssa.OpAMD64BTSQconst,
ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, ssa.OpAMD64BTCQconst,
ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: ssa.OpAMD64BTRQconst:
op := v.Op op := v.Op
if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
// Emit 32-bit version because it's shorter // Emit 32-bit version because it's shorter
@ -851,7 +851,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
} }
fallthrough fallthrough
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
sc := v.AuxValAndOff() sc := v.AuxValAndOff()
off := sc.Off64() off := sc.Off64()
val := sc.Val64() val := sc.Val64()

View File

@ -82,8 +82,8 @@
(Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
(Ctz64 <t> x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x))) (Ctz64 <t> x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
(Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x))) (Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
(Ctz16 x) => (BSFL (BTSLconst <typ.UInt32> [16] x)) (Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [1<<16] x))
(Ctz8 x) => (BSFL (BTSLconst <typ.UInt32> [ 8] x)) (Ctz8 x) => (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
(Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) (Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x)
(Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
@ -659,29 +659,16 @@
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y) (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
// Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in
// Convert ORconst into BTS, if the code gets smaller, with boundary being // the constant field of the OR/XOR instruction. See issue 61694.
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). ((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x)
((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Qconst [int8(log32(c))] x)
((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Lconst [int8(log32(c))] x)
((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
=> (BT(S|C)Qconst [int8(log64(c))] x)
((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Lconst [int8(log32(c))] x)
// Recognize bit clearing: a &^= 1<<b // Recognize bit clearing: a &^= 1<<b
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y) (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 // Note: only convert AND to BTR if the constant wouldn't fit in
=> (BTRQconst [int8(log32(^c))] x) // the constant field of the AND instruction. See issue 61694.
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x)
=> (BTRLconst [int8(log32(^c))] x)
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
=> (BTRQconst [int8(log64(^c))] x)
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x)
// Special-case bit patterns on first/last bit. // Special-case bit patterns on first/last bit.
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@ -695,9 +682,9 @@
// Special case resetting first/last bit // Special case resetting first/last bit
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) (SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
=> (BTR(L|Q)const [0] x) => (AND(L|Q)const [-2] x)
(SHRLconst [1] (SHLLconst [1] x)) (SHRLconst [1] (SHLLconst [1] x))
=> (BTRLconst [31] x) => (ANDLconst [0x7fffffff] x)
(SHRQconst [1] (SHLQconst [1] x)) (SHRQconst [1] (SHLQconst [1] x))
=> (BTRQconst [63] x) => (BTRQconst [63] x)
@ -731,10 +718,10 @@
=> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
// Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) (BTSQconst [c] (BTRQconst [c] x)) => (BTSQconst [c] x)
(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) (BTSQconst [c] (BTCQconst [c] x)) => (BTSQconst [c] x)
(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) (BTRQconst [c] (BTSQconst [c] x)) => (BTRQconst [c] x)
(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) (BTRQconst [c] (BTCQconst [c] x)) => (BTRQconst [c] x)
// Fold boolean negation into SETcc. // Fold boolean negation into SETcc.
(XORLconst [1] (SETNE x)) => (SETEQ x) (XORLconst [1] (SETNE x)) => (SETEQ x)
@ -778,31 +765,6 @@
(XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x) (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x)
(OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x) (OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x)
(BTRLconst [c] (ANDLconst [d] x)) => (ANDLconst [d &^ (1<<uint32(c))] x)
(ANDLconst [c] (BTRLconst [d] x)) => (ANDLconst [c &^ (1<<uint32(d))] x)
(BTRLconst [c] (BTRLconst [d] x)) => (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
(BTCLconst [c] (XORLconst [d] x)) => (XORLconst [d ^ 1<<uint32(c)] x)
(XORLconst [c] (BTCLconst [d] x)) => (XORLconst [c ^ 1<<uint32(d)] x)
(BTCLconst [c] (BTCLconst [d] x)) => (XORLconst [1<<uint32(c) | 1<<uint32(d)] x)
(BTSLconst [c] (ORLconst [d] x)) => (ORLconst [d | 1<<uint32(c)] x)
(ORLconst [c] (BTSLconst [d] x)) => (ORLconst [c | 1<<uint32(d)] x)
(BTSLconst [c] (BTSLconst [d] x)) => (ORLconst [1<<uint32(c) | 1<<uint32(d)] x)
(BTRQconst [c] (ANDQconst [d] x)) && is32Bit(int64(d) &^ (1<<uint32(c))) => (ANDQconst [d &^ (1<<uint32(c))] x)
(ANDQconst [c] (BTRQconst [d] x)) && is32Bit(int64(c) &^ (1<<uint32(d))) => (ANDQconst [c &^ (1<<uint32(d))] x)
(BTRQconst [c] (BTRQconst [d] x)) && is32Bit(^(1<<uint32(c) | 1<<uint32(d))) => (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
(BTCQconst [c] (XORQconst [d] x)) && is32Bit(int64(d) ^ 1<<uint32(c)) => (XORQconst [d ^ 1<<uint32(c)] x)
(XORQconst [c] (BTCQconst [d] x)) && is32Bit(int64(c) ^ 1<<uint32(d)) => (XORQconst [c ^ 1<<uint32(d)] x)
(BTCQconst [c] (BTCQconst [d] x)) && is32Bit(1<<uint32(c) ^ 1<<uint32(d)) => (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
(BTSQconst [c] (ORQconst [d] x)) && is32Bit(int64(d) | 1<<uint32(c)) => (ORQconst [d | 1<<uint32(c)] x)
(ORQconst [c] (BTSQconst [d] x)) && is32Bit(int64(c) | 1<<uint32(d)) => (ORQconst [c | 1<<uint32(d)] x)
(BTSQconst [c] (BTSQconst [d] x)) && is32Bit(1<<uint32(c) | 1<<uint32(d)) => (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
(MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x) (MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x)
(MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x)
@ -1422,11 +1384,8 @@
(NOTQ (MOVQconst [c])) => (MOVQconst [^c]) (NOTQ (MOVQconst [c])) => (MOVQconst [^c])
(NOTL (MOVLconst [c])) => (MOVLconst [^c]) (NOTL (MOVLconst [c])) => (MOVLconst [^c])
(BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))]) (BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))])
(BTSLconst [c] (MOVLconst [d])) => (MOVLconst [d|(1<<uint32(c))])
(BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))]) (BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))])
(BTRLconst [c] (MOVLconst [d])) => (MOVLconst [d&^(1<<uint32(c))])
(BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))]) (BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))])
(BTCLconst [c] (MOVLconst [d])) => (MOVLconst [d^(1<<uint32(c))])
// If c or d doesn't fit into 32 bits, then we can't construct ORQconst, // If c or d doesn't fit into 32 bits, then we can't construct ORQconst,
// but we can still constant-fold. // but we can still constant-fold.
@ -1513,6 +1472,8 @@
(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr x:(BT(S|R|C)Qconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) && x.Uses == 1 && l.Uses == 1 && clobber(x, l) =>
(BT(S|R|C)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
// Merge ADDQconst and LEAQ into atomic loads. // Merge ADDQconst and LEAQ into atomic loads.
(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>

View File

@ -399,12 +399,27 @@ func init() {
{name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg1%64 in arg0 {name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg1%64 in arg0
{name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32 {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
{name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64 {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
{name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32 {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 31 <= auxint < 64
{name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64 {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 31 <= auxint < 64
{name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32 {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 31 <= auxint < 64
{name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 // BT[SRC]Qconstmodify
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 //
// S: set bit
// R: reset (clear) bit
// C: complement bit
//
// Apply operation to bit ValAndOff(AuxInt).Val() in the 64 bits at
// memory address arg0+ValAndOff(AuxInt).Off()+aux
// Bit index must be in range (31-63).
// (We use OR/AND/XOR for thinner targets and lower bit indexes.)
// arg1=mem, returns mem
//
// Note that there aren't non-const versions of these instructions.
// Well, there are such instructions, but they are slow and weird so we don't use them.
{name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
{name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
{name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
// TESTx: compare (arg0 & arg1) to 0 // TESTx: compare (arg0 & arg1) to 0
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"},

View File

@ -716,12 +716,12 @@ const (
OpAMD64BTSQ OpAMD64BTSQ
OpAMD64BTLconst OpAMD64BTLconst
OpAMD64BTQconst OpAMD64BTQconst
OpAMD64BTCLconst
OpAMD64BTCQconst OpAMD64BTCQconst
OpAMD64BTRLconst
OpAMD64BTRQconst OpAMD64BTRQconst
OpAMD64BTSLconst
OpAMD64BTSQconst OpAMD64BTSQconst
OpAMD64BTSQconstmodify
OpAMD64BTRQconstmodify
OpAMD64BTCQconstmodify
OpAMD64TESTQ OpAMD64TESTQ
OpAMD64TESTL OpAMD64TESTL
OpAMD64TESTW OpAMD64TESTW
@ -8778,22 +8778,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "BTCLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "BTCQconst", name: "BTCQconst",
auxType: auxInt8, auxType: auxInt8,
@ -8810,22 +8794,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "BTRLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "BTRQconst", name: "BTRQconst",
auxType: auxInt8, auxType: auxInt8,
@ -8842,22 +8810,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "BTSLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "BTSQconst", name: "BTSQconst",
auxType: auxInt8, auxType: auxInt8,
@ -8874,6 +8826,48 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "BTSQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{ {
name: "TESTQ", name: "TESTQ",
argLen: 2, argLen: 2,

View File

@ -73,20 +73,14 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64BSWAPL(v) return rewriteValueAMD64_OpAMD64BSWAPL(v)
case OpAMD64BSWAPQ: case OpAMD64BSWAPQ:
return rewriteValueAMD64_OpAMD64BSWAPQ(v) return rewriteValueAMD64_OpAMD64BSWAPQ(v)
case OpAMD64BTCLconst:
return rewriteValueAMD64_OpAMD64BTCLconst(v)
case OpAMD64BTCQconst: case OpAMD64BTCQconst:
return rewriteValueAMD64_OpAMD64BTCQconst(v) return rewriteValueAMD64_OpAMD64BTCQconst(v)
case OpAMD64BTLconst: case OpAMD64BTLconst:
return rewriteValueAMD64_OpAMD64BTLconst(v) return rewriteValueAMD64_OpAMD64BTLconst(v)
case OpAMD64BTQconst: case OpAMD64BTQconst:
return rewriteValueAMD64_OpAMD64BTQconst(v) return rewriteValueAMD64_OpAMD64BTQconst(v)
case OpAMD64BTRLconst:
return rewriteValueAMD64_OpAMD64BTRLconst(v)
case OpAMD64BTRQconst: case OpAMD64BTRQconst:
return rewriteValueAMD64_OpAMD64BTRQconst(v) return rewriteValueAMD64_OpAMD64BTRQconst(v)
case OpAMD64BTSLconst:
return rewriteValueAMD64_OpAMD64BTSLconst(v)
case OpAMD64BTSQconst: case OpAMD64BTSQconst:
return rewriteValueAMD64_OpAMD64BTSQconst(v) return rewriteValueAMD64_OpAMD64BTSQconst(v)
case OpAMD64CMOVLCC: case OpAMD64CMOVLCC:
@ -2626,26 +2620,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
} }
break break
} }
// match: (ANDL (MOVLconst [c]) x)
// cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
// result: (BTRLconst [int8(log32(^c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64MOVLconst {
continue
}
c := auxIntToInt32(v_0.AuxInt)
x := v_1
if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
continue
}
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(int8(log32(^c)))
v.AddArg(x)
return true
}
break
}
// match: (ANDL x (MOVLconst [c])) // match: (ANDL x (MOVLconst [c]))
// result: (ANDLconst [c] x) // result: (ANDLconst [c] x)
for { for {
@ -2754,20 +2728,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
} }
func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool { func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (ANDLconst [c] x)
// cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
// result: (BTRLconst [int8(log32(^c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
break
}
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(int8(log32(^c)))
v.AddArg(x)
return true
}
// match: (ANDLconst [c] (ANDLconst [d] x)) // match: (ANDLconst [c] (ANDLconst [d] x))
// result: (ANDLconst [c & d] x) // result: (ANDLconst [c & d] x)
for { for {
@ -2782,20 +2742,6 @@ func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ANDLconst [c] (BTRLconst [d] x))
// result: (ANDLconst [c &^ (1<<uint32(d))] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTRLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(c &^ (1 << uint32(d)))
v.AddArg(x)
return true
}
// match: (ANDLconst [ 0xFF] x) // match: (ANDLconst [ 0xFF] x)
// result: (MOVBQZX x) // result: (MOVBQZX x)
for { for {
@ -3099,7 +3045,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
break break
} }
// match: (ANDQ (MOVQconst [c]) x) // match: (ANDQ (MOVQconst [c]) x)
// cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31
// result: (BTRQconst [int8(log64(^c))] x) // result: (BTRQconst [int8(log64(^c))] x)
for { for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@ -3108,7 +3054,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
} }
c := auxIntToInt64(v_0.AuxInt) c := auxIntToInt64(v_0.AuxInt)
x := v_1 x := v_1
if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) { if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31) {
continue continue
} }
v.reset(OpAMD64BTRQconst) v.reset(OpAMD64BTRQconst)
@ -3230,20 +3176,6 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
} }
func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (ANDQconst [c] x)
// cond: isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
// result: (BTRQconst [int8(log32(^c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
break
}
v.reset(OpAMD64BTRQconst)
v.AuxInt = int8ToAuxInt(int8(log32(^c)))
v.AddArg(x)
return true
}
// match: (ANDQconst [c] (ANDQconst [d] x)) // match: (ANDQconst [c] (ANDQconst [d] x))
// result: (ANDQconst [c & d] x) // result: (ANDQconst [c & d] x)
for { for {
@ -3258,24 +3190,6 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ANDQconst [c] (BTRQconst [d] x))
// cond: is32Bit(int64(c) &^ (1<<uint32(d)))
// result: (ANDQconst [c &^ (1<<uint32(d))] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTRQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(c) &^ (1 << uint32(d)))) {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(c &^ (1 << uint32(d)))
v.AddArg(x)
return true
}
// match: (ANDQconst [ 0xFF] x) // match: (ANDQconst [ 0xFF] x)
// result: (MOVBQZX x) // result: (MOVBQZX x)
for { for {
@ -3677,88 +3591,8 @@ func rewriteValueAMD64_OpAMD64BSWAPQ(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64BTCLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTCLconst [c] (XORLconst [d] x))
// result: (XORLconst [d ^ 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64XORLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(d ^ 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTCLconst [c] (BTCLconst [d] x))
// result: (XORLconst [1<<uint32(c) | 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTCLconst [c] (MOVLconst [d]))
// result: (MOVLconst [d^(1<<uint32(c))])
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64MOVLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
v.reset(OpAMD64MOVLconst)
v.AuxInt = int32ToAuxInt(d ^ (1 << uint32(c)))
return true
}
return false
}
func rewriteValueAMD64_OpAMD64BTCQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64BTCQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (BTCQconst [c] (XORQconst [d] x))
// cond: is32Bit(int64(d) ^ 1<<uint32(c))
// result: (XORQconst [d ^ 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64XORQconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(d) ^ 1<<uint32(c))) {
break
}
v.reset(OpAMD64XORQconst)
v.AuxInt = int32ToAuxInt(d ^ 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTCQconst [c] (BTCQconst [d] x))
// cond: is32Bit(1<<uint32(c) ^ 1<<uint32(d))
// result: (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(1<<uint32(c) ^ 1<<uint32(d))) {
break
}
v.reset(OpAMD64XORQconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) ^ 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTCQconst [c] (MOVQconst [d])) // match: (BTCQconst [c] (MOVQconst [d]))
// result: (MOVQconst [d^(1<<uint32(c))]) // result: (MOVQconst [d^(1<<uint32(c))])
for { for {
@ -3953,76 +3787,6 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64BTRLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTRLconst [c] (BTSLconst [c] x))
// result: (BTRLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTSLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (BTCLconst [c] x))
// result: (BTRLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (ANDLconst [d] x))
// result: (ANDLconst [d &^ (1<<uint32(c))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ANDLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (BTRLconst [d] x))
// result: (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTRLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(^(1<<uint32(c) | 1<<uint32(d)))
v.AddArg(x)
return true
}
// match: (BTRLconst [c] (MOVLconst [d]))
// result: (MOVLconst [d&^(1<<uint32(c))])
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64MOVLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
v.reset(OpAMD64MOVLconst)
v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
return true
}
return false
}
func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (BTRQconst [c] (BTSQconst [c] x)) // match: (BTRQconst [c] (BTSQconst [c] x))
@ -4051,42 +3815,6 @@ func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (BTRQconst [c] (ANDQconst [d] x))
// cond: is32Bit(int64(d) &^ (1<<uint32(c)))
// result: (ANDQconst [d &^ (1<<uint32(c))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ANDQconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(d) &^ (1 << uint32(c)))) {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(d &^ (1 << uint32(c)))
v.AddArg(x)
return true
}
// match: (BTRQconst [c] (BTRQconst [d] x))
// cond: is32Bit(^(1<<uint32(c) | 1<<uint32(d)))
// result: (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTRQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(^(1<<uint32(c) | 1<<uint32(d)))) {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(^(1<<uint32(c) | 1<<uint32(d)))
v.AddArg(x)
return true
}
// match: (BTRQconst [c] (MOVQconst [d])) // match: (BTRQconst [c] (MOVQconst [d]))
// result: (MOVQconst [d&^(1<<uint32(c))]) // result: (MOVQconst [d&^(1<<uint32(c))])
for { for {
@ -4101,76 +3829,6 @@ func rewriteValueAMD64_OpAMD64BTRQconst(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64BTSLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (BTSLconst [c] (BTRLconst [c] x))
// result: (BTSLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTRLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (BTCLconst [c] x))
// result: (BTSLconst [c] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (ORLconst [d] x))
// result: (ORLconst [d | 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ORLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ORLconst)
v.AuxInt = int32ToAuxInt(d | 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (BTSLconst [d] x))
// result: (ORLconst [1<<uint32(c) | 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTSLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ORLconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTSLconst [c] (MOVLconst [d]))
// result: (MOVLconst [d|(1<<uint32(c))])
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64MOVLconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
v.reset(OpAMD64MOVLconst)
v.AuxInt = int32ToAuxInt(d | (1 << uint32(c)))
return true
}
return false
}
func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (BTSQconst [c] (BTRQconst [c] x)) // match: (BTSQconst [c] (BTRQconst [c] x))
@ -4199,42 +3857,6 @@ func rewriteValueAMD64_OpAMD64BTSQconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (BTSQconst [c] (ORQconst [d] x))
// cond: is32Bit(int64(d) | 1<<uint32(c))
// result: (ORQconst [d | 1<<uint32(c)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ORQconst {
break
}
d := auxIntToInt32(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(d) | 1<<uint32(c))) {
break
}
v.reset(OpAMD64ORQconst)
v.AuxInt = int32ToAuxInt(d | 1<<uint32(c))
v.AddArg(x)
return true
}
// match: (BTSQconst [c] (BTSQconst [d] x))
// cond: is32Bit(1<<uint32(c) | 1<<uint32(d))
// result: (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64BTSQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(1<<uint32(c) | 1<<uint32(d))) {
break
}
v.reset(OpAMD64ORQconst)
v.AuxInt = int32ToAuxInt(1<<uint32(c) | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (BTSQconst [c] (MOVQconst [d])) // match: (BTSQconst [c] (MOVQconst [d]))
// result: (MOVQconst [d|(1<<uint32(c))]) // result: (MOVQconst [d|(1<<uint32(c))])
for { for {
@ -12306,6 +11928,84 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
} }
break break
} }
// match: (MOVQstore {sym} [off] ptr x:(BTSQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
// cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
// result: (BTSQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr := v_0
x := v_1
if x.Op != OpAMD64BTSQconst {
break
}
c := auxIntToInt8(x.AuxInt)
l := x.Args[0]
if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
break
}
mem := l.Args[1]
if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
break
}
v.reset(OpAMD64BTSQconstmodify)
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (MOVQstore {sym} [off] ptr x:(BTRQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
// cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
// result: (BTRQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr := v_0
x := v_1
if x.Op != OpAMD64BTRQconst {
break
}
c := auxIntToInt8(x.AuxInt)
l := x.Args[0]
if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
break
}
mem := l.Args[1]
if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
break
}
v.reset(OpAMD64BTRQconstmodify)
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (MOVQstore {sym} [off] ptr x:(BTCQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem)
// cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l)
// result: (BTCQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
for {
off := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
ptr := v_0
x := v_1
if x.Op != OpAMD64BTCQconst {
break
}
c := auxIntToInt8(x.AuxInt)
l := x.Args[0]
if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
break
}
mem := l.Args[1]
if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) {
break
}
v.reset(OpAMD64BTCQconstmodify)
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
// result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) // result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
@ -14643,26 +14343,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
} }
break break
} }
// match: (ORL (MOVLconst [c]) x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTSLconst [int8(log32(c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64MOVLconst {
continue
}
c := auxIntToInt32(v_0.AuxInt)
x := v_1
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
continue
}
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
break
}
// match: (ORL x (MOVLconst [c])) // match: (ORL x (MOVLconst [c]))
// result: (ORLconst [c] x) // result: (ORLconst [c] x)
for { for {
@ -14718,20 +14398,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
} }
func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (ORLconst [c] x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTSLconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTSLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (ORLconst [c] (ORLconst [d] x)) // match: (ORLconst [c] (ORLconst [d] x))
// result: (ORLconst [c | d] x) // result: (ORLconst [c | d] x)
for { for {
@ -14746,20 +14412,6 @@ func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ORLconst [c] (BTSLconst [d] x))
// result: (ORLconst [c | 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTSLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64ORLconst)
v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (ORLconst [c] x) // match: (ORLconst [c] x)
// cond: c==0 // cond: c==0
// result: x // result: x
@ -14993,7 +14645,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
break break
} }
// match: (ORQ (MOVQconst [c]) x) // match: (ORQ (MOVQconst [c]) x)
// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128 // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
// result: (BTSQconst [int8(log64(c))] x) // result: (BTSQconst [int8(log64(c))] x)
for { for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@ -15002,7 +14654,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
} }
c := auxIntToInt64(v_0.AuxInt) c := auxIntToInt64(v_0.AuxInt)
x := v_1 x := v_1
if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) { if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
continue continue
} }
v.reset(OpAMD64BTSQconst) v.reset(OpAMD64BTSQconst)
@ -15201,20 +14853,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
} }
func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (ORQconst [c] x)
// cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTSQconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTSQconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (ORQconst [c] (ORQconst [d] x)) // match: (ORQconst [c] (ORQconst [d] x))
// result: (ORQconst [c | d] x) // result: (ORQconst [c | d] x)
for { for {
@ -15229,24 +14867,6 @@ func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ORQconst [c] (BTSQconst [d] x))
// cond: is32Bit(int64(c) | 1<<uint32(d))
// result: (ORQconst [c | 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTSQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(c) | 1<<uint32(d))) {
break
}
v.reset(OpAMD64ORQconst)
v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (ORQconst [0] x) // match: (ORQconst [0] x)
// result: x // result: x
for { for {
@ -21058,14 +20678,14 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool { func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (SHLLconst [1] (SHRLconst [1] x)) // match: (SHLLconst [1] (SHRLconst [1] x))
// result: (BTRLconst [0] x) // result: (ANDLconst [-2] x)
for { for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 { if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
break break
} }
x := v_0.Args[0] x := v_0.Args[0]
v.reset(OpAMD64BTRLconst) v.reset(OpAMD64ANDLconst)
v.AuxInt = int8ToAuxInt(0) v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x) v.AddArg(x)
return true return true
} }
@ -21314,14 +20934,14 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (SHLQconst [1] (SHRQconst [1] x)) // match: (SHLQconst [1] (SHRQconst [1] x))
// result: (BTRQconst [0] x) // result: (ANDQconst [-2] x)
for { for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 { if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
break break
} }
x := v_0.Args[0] x := v_0.Args[0]
v.reset(OpAMD64BTRQconst) v.reset(OpAMD64ANDQconst)
v.AuxInt = int8ToAuxInt(0) v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x) v.AddArg(x)
return true return true
} }
@ -21741,14 +21361,14 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool { func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (SHRLconst [1] (SHLLconst [1] x)) // match: (SHRLconst [1] (SHLLconst [1] x))
// result: (BTRLconst [31] x) // result: (ANDLconst [0x7fffffff] x)
for { for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 { if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
break break
} }
x := v_0.Args[0] x := v_0.Args[0]
v.reset(OpAMD64BTRLconst) v.reset(OpAMD64ANDLconst)
v.AuxInt = int8ToAuxInt(31) v.AuxInt = int32ToAuxInt(0x7fffffff)
v.AddArg(x) v.AddArg(x)
return true return true
} }
@ -23450,26 +23070,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
} }
break break
} }
// match: (XORL (MOVLconst [c]) x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTCLconst [int8(log32(c))] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64MOVLconst {
continue
}
c := auxIntToInt32(v_0.AuxInt)
x := v_1
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
continue
}
v.reset(OpAMD64BTCLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
break
}
// match: (XORL x (MOVLconst [c])) // match: (XORL x (MOVLconst [c]))
// result: (XORLconst [c] x) // result: (XORLconst [c] x)
for { for {
@ -23541,20 +23141,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
} }
func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (XORLconst [c] x)
// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTCLconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTCLconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (XORLconst [1] (SETNE x)) // match: (XORLconst [1] (SETNE x))
// result: (SETEQ x) // result: (SETEQ x)
for { for {
@ -23679,20 +23265,6 @@ func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (XORLconst [c] (BTCLconst [d] x))
// result: (XORLconst [c ^ 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTCLconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(c ^ 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (XORLconst [c] x) // match: (XORLconst [c] x)
// cond: c==0 // cond: c==0
// result: x // result: x
@ -23914,7 +23486,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
break break
} }
// match: (XORQ (MOVQconst [c]) x) // match: (XORQ (MOVQconst [c]) x)
// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128 // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
// result: (BTCQconst [int8(log64(c))] x) // result: (BTCQconst [int8(log64(c))] x)
for { for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
@ -23923,7 +23495,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
} }
c := auxIntToInt64(v_0.AuxInt) c := auxIntToInt64(v_0.AuxInt)
x := v_1 x := v_1
if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) { if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
continue continue
} }
v.reset(OpAMD64BTCQconst) v.reset(OpAMD64BTCQconst)
@ -24008,20 +23580,6 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
} }
func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (XORQconst [c] x)
// cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
// result: (BTCQconst [int8(log32(c))] x)
for {
c := auxIntToInt32(v.AuxInt)
x := v_0
if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
break
}
v.reset(OpAMD64BTCQconst)
v.AuxInt = int8ToAuxInt(int8(log32(c)))
v.AddArg(x)
return true
}
// match: (XORQconst [c] (XORQconst [d] x)) // match: (XORQconst [c] (XORQconst [d] x))
// result: (XORQconst [c ^ d] x) // result: (XORQconst [c ^ d] x)
for { for {
@ -24036,24 +23594,6 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (XORQconst [c] (BTCQconst [d] x))
// cond: is32Bit(int64(c) ^ 1<<uint32(d))
// result: (XORQconst [c ^ 1<<uint32(d)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64BTCQconst {
break
}
d := auxIntToInt8(v_0.AuxInt)
x := v_0.Args[0]
if !(is32Bit(int64(c) ^ 1<<uint32(d))) {
break
}
v.reset(OpAMD64XORQconst)
v.AuxInt = int32ToAuxInt(c ^ 1<<uint32(d))
v.AddArg(x)
return true
}
// match: (XORQconst [0] x) // match: (XORQconst [0] x)
// result: x // result: x
for { for {
@ -25670,12 +25210,12 @@ func rewriteValueAMD64_OpCtz16(v *Value) bool {
b := v.Block b := v.Block
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
// match: (Ctz16 x) // match: (Ctz16 x)
// result: (BSFL (BTSLconst <typ.UInt32> [16] x)) // result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
for { for {
x := v_0 x := v_0
v.reset(OpAMD64BSFL) v.reset(OpAMD64BSFL)
v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32) v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
v0.AuxInt = int8ToAuxInt(16) v0.AuxInt = int32ToAuxInt(1 << 16)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
return true return true
@ -25848,12 +25388,12 @@ func rewriteValueAMD64_OpCtz8(v *Value) bool {
b := v.Block b := v.Block
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
// match: (Ctz8 x) // match: (Ctz8 x)
// result: (BSFL (BTSLconst <typ.UInt32> [ 8] x)) // result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
for { for {
x := v_0 x := v_0
v.reset(OpAMD64BSFL) v.reset(OpAMD64BSFL)
v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32) v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
v0.AuxInt = int8ToAuxInt(8) v0.AuxInt = int32ToAuxInt(1 << 8)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
return true return true

View File

@ -220,10 +220,10 @@ func biton32(a, b uint32) (n uint32) {
// amd64:"BTSL" // amd64:"BTSL"
n += b | (1 << (a & 31)) n += b | (1 << (a & 31))
// amd64:"BTSL\t[$]31" // amd64:"ORL\t[$]-2147483648"
n += a | (1 << 31) n += a | (1 << 31)
// amd64:"BTSL\t[$]28" // amd64:"ORL\t[$]268435456"
n += a | (1 << 28) n += a | (1 << 28)
// amd64:"ORL\t[$]1" // amd64:"ORL\t[$]1"
@ -236,10 +236,10 @@ func bitoff32(a, b uint32) (n uint32) {
// amd64:"BTRL" // amd64:"BTRL"
n += b &^ (1 << (a & 31)) n += b &^ (1 << (a & 31))
// amd64:"BTRL\t[$]31" // amd64:"ANDL\t[$]2147483647"
n += a &^ (1 << 31) n += a &^ (1 << 31)
// amd64:"BTRL\t[$]28" // amd64:"ANDL\t[$]-268435457"
n += a &^ (1 << 28) n += a &^ (1 << 28)
// amd64:"ANDL\t[$]-2" // amd64:"ANDL\t[$]-2"
@ -252,10 +252,10 @@ func bitcompl32(a, b uint32) (n uint32) {
// amd64:"BTCL" // amd64:"BTCL"
n += b ^ (1 << (a & 31)) n += b ^ (1 << (a & 31))
// amd64:"BTCL\t[$]31" // amd64:"XORL\t[$]-2147483648"
n += a ^ (1 << 31) n += a ^ (1 << 31)
// amd64:"BTCL\t[$]28" // amd64:"XORL\t[$]268435456"
n += a ^ (1 << 28) n += a ^ (1 << 28)
// amd64:"XORL\t[$]1" // amd64:"XORL\t[$]1"

View File

@ -335,7 +335,7 @@ func TrailingZeros32(n uint32) int {
} }
func TrailingZeros16(n uint16) int { func TrailingZeros16(n uint16) int {
// amd64:"BSFL","BTSL\\t\\$16" // amd64:"BSFL","ORL\\t\\$65536"
// 386:"BSFL\t" // 386:"BSFL\t"
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
@ -347,7 +347,7 @@ func TrailingZeros16(n uint16) int {
} }
func TrailingZeros8(n uint8) int { func TrailingZeros8(n uint8) int {
// amd64:"BSFL","BTSL\\t\\$8" // amd64:"BSFL","ORL\\t\\$256"
// 386:"BSFL" // 386:"BSFL"
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"

View File

@ -372,3 +372,32 @@ func storeTest(a []bool, v int, i int) {
// amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` // amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
a[3+i] = v&2 != 0 a[3+i] = v&2 != 0
} }
func bitOps(p *[12]uint64) {
// amd64: `ORQ\t\$8, \(AX\)`
p[0] |= 8
// amd64: `ORQ\t\$1073741824, 8\(AX\)`
p[1] |= 1 << 30
// amd64: `BTSQ\t\$31, 16\(AX\)`
p[2] |= 1 << 31
// amd64: `BTSQ\t\$63, 24\(AX\)`
p[3] |= 1 << 63
// amd64: `ANDQ\t\$-9, 32\(AX\)`
p[4] &^= 8
// amd64: `ANDQ\t\$-1073741825, 40\(AX\)`
p[5] &^= 1 << 30
// amd64: `BTRQ\t\$31, 48\(AX\)`
p[6] &^= 1 << 31
// amd64: `BTRQ\t\$63, 56\(AX\)`
p[7] &^= 1 << 63
// amd64: `XORQ\t\$8, 64\(AX\)`
p[8] ^= 8
// amd64: `XORQ\t\$1073741824, 72\(AX\)`
p[9] ^= 1 << 30
// amd64: `BTCQ\t\$31, 80\(AX\)`
p[10] ^= 1 << 31
// amd64: `BTCQ\t\$63, 88\(AX\)`
p[11] ^= 1 << 63
}