diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index d32ea7ec16..ab762c24f6 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -714,9 +714,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Offset = v.AuxInt case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, - ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, - ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, - ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: + ssa.OpAMD64BTSQconst, + ssa.OpAMD64BTCQconst, + ssa.OpAMD64BTRQconst: op := v.Op if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { // Emit 32-bit version because it's shorter @@ -851,7 +851,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { } fallthrough case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, - ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: + ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify, + ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify: sc := v.AuxValAndOff() off := sc.Off64() val := sc.Val64() diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index b6937de800..c4f74bb0d9 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -82,8 +82,8 @@ (Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz64 x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 (BSFQ x)) (MOVQconst [64]) (Select1 (BSFQ x))) (Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst [32] x))) -(Ctz16 x) => (BSFL (BTSLconst [16] x)) -(Ctz8 x) => (BSFL (BTSLconst [ 8] x)) +(Ctz16 x) => (BSFL (ORLconst [1<<16] x)) +(Ctz8 x) => (BSFL (ORLconst [1<<8 ] x)) (Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) (Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) @@ -659,29 +659,16 @@ // Recognize bit setting (a |= 1< (BTS(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) - -// Convert ORconst into BTS, if the code gets smaller, with boundary being -// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). -((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 - => (BT(S|C)Qconst [int8(log32(c))] x) -((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - => (BT(S|C)Lconst [int8(log32(c))] x) -((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 - => (BT(S|C)Qconst [int8(log64(c))] x) -((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - => (BT(S|C)Lconst [int8(log32(c))] x) +// Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in +// the constant field of the OR/XOR instruction. See issue 61694. +((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x) // Recognize bit clearing: a &^= 1< (BTR(Q|L) x y) (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) -(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - => (BTRQconst [int8(log32(^c))] x) -(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - => (BTRLconst [int8(log32(^c))] x) -(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 - => (BTRQconst [int8(log64(^c))] x) -(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - => (BTRLconst [int8(log32(^c))] x) +// Note: only convert AND to BTR if the constant wouldn't fit in +// the constant field of the AND instruction. See issue 61694. +(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x) // Special-case bit patterns on first/last bit. // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, @@ -695,9 +682,9 @@ // Special case resetting first/last bit (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) - => (BTR(L|Q)const [0] x) + => (AND(L|Q)const [-2] x) (SHRLconst [1] (SHLLconst [1] x)) - => (BTRLconst [31] x) + => (ANDLconst [0x7fffffff] x) (SHRQconst [1] (SHLQconst [1] x)) => (BTRQconst [63] x) @@ -731,10 +718,10 @@ => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) -(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) -(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) -(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) -(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) +(BTSQconst [c] (BTRQconst [c] x)) => (BTSQconst [c] x) +(BTSQconst [c] (BTCQconst [c] x)) => (BTSQconst [c] x) +(BTRQconst [c] (BTSQconst [c] x)) => (BTRQconst [c] x) +(BTRQconst [c] (BTCQconst [c] x)) => (BTRQconst [c] x) // Fold boolean negation into SETcc. (XORLconst [1] (SETNE x)) => (SETEQ x) @@ -778,31 +765,6 @@ (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x) (OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x) -(BTRLconst [c] (ANDLconst [d] x)) => (ANDLconst [d &^ (1< (ANDLconst [c &^ (1< (ANDLconst [^(1< (XORLconst [d ^ 1< (XORLconst [c ^ 1< (XORLconst [1< (ORLconst [d | 1< (ORLconst [c | 1< (ORLconst [1< (ANDQconst [d &^ (1< (ANDQconst [c &^ (1< (ANDQconst [^(1< (XORQconst [d ^ 1< (XORQconst [c ^ 1< (XORQconst [1< (ORQconst [d | 1< (ORQconst [c | 1< (ORQconst [1< (MULLconst [c * d] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x) @@ -1422,11 +1384,8 @@ (NOTQ (MOVQconst [c])) => (MOVQconst [^c]) (NOTL (MOVLconst [c])) => (MOVLconst [^c]) (BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1< (MOVLconst [d|(1< (MOVQconst [d&^(1< (MOVLconst [d&^(1< (MOVQconst [d^(1< (MOVLconst [d^(1< ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr x:(BT(S|R|C)Qconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) && x.Uses == 1 && l.Uses == 1 && clobber(x, l) => + (BT(S|R|C)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) // Merge ADDQconst and LEAQ into atomic loads. (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index e9205d56c6..606171947b 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -399,12 +399,27 @@ func init() { {name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg1%64 in arg0 {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32 {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64 - {name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32 - {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64 - {name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32 - {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64 - {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 - {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 + {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 31 <= auxint < 64 + {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 31 <= auxint < 64 + {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 31 <= auxint < 64 + + // BT[SRC]Qconstmodify + // + // S: set bit + // R: reset (clear) bit + // C: complement bit + // + // Apply operation to bit ValAndOff(AuxInt).Val() in the 64 bits at + // memory address arg0+ValAndOff(AuxInt).Off()+aux + // Bit index must be in range (31-63). + // (We use OR/AND/XOR for thinner targets and lower bit indexes.) + // arg1=mem, returns mem + // + // Note that there aren't non-const versions of these instructions. + // Well, there are such instructions, but they are slow and weird so we don't use them. + {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, + {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, + {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // TESTx: compare (arg0 & arg1) to 0 {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 64aea38afe..84dcd9a3cc 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -716,12 +716,12 @@ const ( OpAMD64BTSQ OpAMD64BTLconst OpAMD64BTQconst - OpAMD64BTCLconst OpAMD64BTCQconst - OpAMD64BTRLconst OpAMD64BTRQconst - OpAMD64BTSLconst OpAMD64BTSQconst + OpAMD64BTSQconstmodify + OpAMD64BTRQconstmodify + OpAMD64BTCQconstmodify OpAMD64TESTQ OpAMD64TESTL OpAMD64TESTW @@ -8778,22 +8778,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "BTCLconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - clobberFlags: true, - asm: x86.ABTCL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - }, - }, { name: "BTCQconst", auxType: auxInt8, @@ -8810,22 +8794,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "BTRLconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - clobberFlags: true, - asm: x86.ABTRL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - }, - }, { name: "BTRQconst", auxType: auxInt8, @@ -8842,22 +8810,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "BTSLconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - clobberFlags: true, - asm: x86.ABTSL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - }, - }, { name: "BTSQconst", auxType: auxInt8, @@ -8874,6 +8826,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "BTSQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB + }, + }, + }, + { + name: "BTRQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB + }, + }, + }, + { + name: "BTCQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB + }, + }, + }, { name: "TESTQ", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index afe9ed257a..979d9be3a7 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -73,20 +73,14 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64BSWAPL(v) case OpAMD64BSWAPQ: return rewriteValueAMD64_OpAMD64BSWAPQ(v) - case OpAMD64BTCLconst: - return rewriteValueAMD64_OpAMD64BTCLconst(v) case OpAMD64BTCQconst: return rewriteValueAMD64_OpAMD64BTCQconst(v) case OpAMD64BTLconst: return rewriteValueAMD64_OpAMD64BTLconst(v) case OpAMD64BTQconst: return rewriteValueAMD64_OpAMD64BTQconst(v) - case OpAMD64BTRLconst: - return rewriteValueAMD64_OpAMD64BTRLconst(v) case OpAMD64BTRQconst: return rewriteValueAMD64_OpAMD64BTRQconst(v) - case OpAMD64BTSLconst: - return rewriteValueAMD64_OpAMD64BTSLconst(v) case OpAMD64BTSQconst: return rewriteValueAMD64_OpAMD64BTSQconst(v) case OpAMD64CMOVLCC: @@ -2626,26 +2620,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { } break } - // match: (ANDL (MOVLconst [c]) x) - // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - // result: (BTRLconst [int8(log32(^c))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) { - continue - } - v.reset(OpAMD64BTRLconst) - v.AuxInt = int8ToAuxInt(int8(log32(^c))) - v.AddArg(x) - return true - } - break - } // match: (ANDL x (MOVLconst [c])) // result: (ANDLconst [c] x) for { @@ -2754,20 +2728,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { } func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool { v_0 := v.Args[0] - // match: (ANDLconst [c] x) - // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - // result: (BTRLconst [int8(log32(^c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) { - break - } - v.reset(OpAMD64BTRLconst) - v.AuxInt = int8ToAuxInt(int8(log32(^c))) - v.AddArg(x) - return true - } // match: (ANDLconst [c] (ANDLconst [d] x)) // result: (ANDLconst [c & d] x) for { @@ -2782,20 +2742,6 @@ func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDLconst [c] (BTRLconst [d] x)) - // result: (ANDLconst [c &^ (1<= 128 + // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 // result: (BTRQconst [int8(log64(^c))] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -3108,7 +3054,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { } c := auxIntToInt64(v_0.AuxInt) x := v_1 - if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) { + if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31) { continue } v.reset(OpAMD64BTRQconst) @@ -3230,20 +3176,6 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool { v_0 := v.Args[0] - // match: (ANDQconst [c] x) - // cond: isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - // result: (BTRQconst [int8(log32(^c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128) { - break - } - v.reset(OpAMD64BTRQconst) - v.AuxInt = int8ToAuxInt(int8(log32(^c))) - v.AddArg(x) - return true - } // match: (ANDQconst [c] (ANDQconst [d] x)) // result: (ANDQconst [c & d] x) for { @@ -3258,24 +3190,6 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDQconst [c] (BTRQconst [d] x)) - // cond: is32Bit(int64(c) &^ (1<= 128 - // result: (BTSLconst [int8(log32(c))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - continue - } - v.reset(OpAMD64BTSLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } - break - } // match: (ORL x (MOVLconst [c])) // result: (ORLconst [c] x) for { @@ -14718,20 +14398,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { v_0 := v.Args[0] - // match: (ORLconst [c] x) - // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTSLconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTSLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (ORLconst [c] (ORLconst [d] x)) // result: (ORLconst [c | d] x) for { @@ -14746,20 +14412,6 @@ func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { v.AddArg(x) return true } - // match: (ORLconst [c] (BTSLconst [d] x)) - // result: (ORLconst [c | 1<= 128 + // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 // result: (BTSQconst [int8(log64(c))] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -15002,7 +14654,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } c := auxIntToInt64(v_0.AuxInt) x := v_1 - if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) { + if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) { continue } v.reset(OpAMD64BTSQconst) @@ -15201,20 +14853,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { v_0 := v.Args[0] - // match: (ORQconst [c] x) - // cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTSQconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTSQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (ORQconst [c] (ORQconst [d] x)) // result: (ORQconst [c | d] x) for { @@ -15229,24 +14867,6 @@ func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { v.AddArg(x) return true } - // match: (ORQconst [c] (BTSQconst [d] x)) - // cond: is32Bit(int64(c) | 1<= 128 - // result: (BTCLconst [int8(log32(c))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - continue - } - v.reset(OpAMD64BTCLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } - break - } // match: (XORL x (MOVLconst [c])) // result: (XORLconst [c] x) for { @@ -23541,20 +23141,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { } func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { v_0 := v.Args[0] - // match: (XORLconst [c] x) - // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTCLconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTCLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (XORLconst [1] (SETNE x)) // result: (SETEQ x) for { @@ -23679,20 +23265,6 @@ func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { v.AddArg(x) return true } - // match: (XORLconst [c] (BTCLconst [d] x)) - // result: (XORLconst [c ^ 1<= 128 + // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 // result: (BTCQconst [int8(log64(c))] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -23923,7 +23495,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } c := auxIntToInt64(v_0.AuxInt) x := v_1 - if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) { + if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) { continue } v.reset(OpAMD64BTCQconst) @@ -24008,20 +23580,6 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { v_0 := v.Args[0] - // match: (XORQconst [c] x) - // cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTCQconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTCQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (XORQconst [c] (XORQconst [d] x)) // result: (XORQconst [c ^ d] x) for { @@ -24036,24 +23594,6 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { v.AddArg(x) return true } - // match: (XORQconst [c] (BTCQconst [d] x)) - // cond: is32Bit(int64(c) ^ 1< [16] x)) + // result: (BSFL (ORLconst [1<<16] x)) for { x := v_0 v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32) - v0.AuxInt = int8ToAuxInt(16) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 16) v0.AddArg(x) v.AddArg(v0) return true @@ -25848,12 +25388,12 @@ func rewriteValueAMD64_OpCtz8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Ctz8 x) - // result: (BSFL (BTSLconst [ 8] x)) + // result: (BSFL (ORLconst [1<<8 ] x)) for { x := v_0 v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32) - v0.AuxInt = int8ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 8) v0.AddArg(x) v.AddArg(v0) return true diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 018f5b909e..88d5ebe9cf 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -220,10 +220,10 @@ func biton32(a, b uint32) (n uint32) { // amd64:"BTSL" n += b | (1 << (a & 31)) - // amd64:"BTSL\t[$]31" + // amd64:"ORL\t[$]-2147483648" n += a | (1 << 31) - // amd64:"BTSL\t[$]28" + // amd64:"ORL\t[$]268435456" n += a | (1 << 28) // amd64:"ORL\t[$]1" @@ -236,10 +236,10 @@ func bitoff32(a, b uint32) (n uint32) { // amd64:"BTRL" n += b &^ (1 << (a & 31)) - // amd64:"BTRL\t[$]31" + // amd64:"ANDL\t[$]2147483647" n += a &^ (1 << 31) - // amd64:"BTRL\t[$]28" + // amd64:"ANDL\t[$]-268435457" n += a &^ (1 << 28) // amd64:"ANDL\t[$]-2" @@ -252,10 +252,10 @@ func bitcompl32(a, b uint32) (n uint32) { // amd64:"BTCL" n += b ^ (1 << (a & 31)) - // amd64:"BTCL\t[$]31" + // amd64:"XORL\t[$]-2147483648" n += a ^ (1 << 31) - // amd64:"BTCL\t[$]28" + // amd64:"XORL\t[$]268435456" n += a ^ (1 << 28) // amd64:"XORL\t[$]1" diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 797aa23b67..d80bfaeec0 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -335,7 +335,7 @@ func TrailingZeros32(n uint32) int { } func TrailingZeros16(n uint16) int { - // amd64:"BSFL","BTSL\\t\\$16" + // amd64:"BSFL","ORL\\t\\$65536" // 386:"BSFL\t" // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" @@ -347,7 +347,7 @@ func TrailingZeros16(n uint16) int { } func TrailingZeros8(n uint8) int { - // amd64:"BSFL","BTSL\\t\\$8" + // amd64:"BSFL","ORL\\t\\$256" // 386:"BSFL" // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" diff --git a/test/codegen/memops.go b/test/codegen/memops.go index f6cf9450a1..e5e89c2acc 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -372,3 +372,32 @@ func storeTest(a []bool, v int, i int) { // amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` a[3+i] = v&2 != 0 } + +func bitOps(p *[12]uint64) { + // amd64: `ORQ\t\$8, \(AX\)` + p[0] |= 8 + // amd64: `ORQ\t\$1073741824, 8\(AX\)` + p[1] |= 1 << 30 + // amd64: `BTSQ\t\$31, 16\(AX\)` + p[2] |= 1 << 31 + // amd64: `BTSQ\t\$63, 24\(AX\)` + p[3] |= 1 << 63 + + // amd64: `ANDQ\t\$-9, 32\(AX\)` + p[4] &^= 8 + // amd64: `ANDQ\t\$-1073741825, 40\(AX\)` + p[5] &^= 1 << 30 + // amd64: `BTRQ\t\$31, 48\(AX\)` + p[6] &^= 1 << 31 + // amd64: `BTRQ\t\$63, 56\(AX\)` + p[7] &^= 1 << 63 + + // amd64: `XORQ\t\$8, 64\(AX\)` + p[8] ^= 8 + // amd64: `XORQ\t\$1073741824, 72\(AX\)` + p[9] ^= 1 << 30 + // amd64: `BTCQ\t\$31, 80\(AX\)` + p[10] ^= 1 << 31 + // amd64: `BTCQ\t\$63, 88\(AX\)` + p[11] ^= 1 << 63 +}