cmd/compile: AMD64v3 remove unnecessary TEST comparision in isPowerOfTwo

With GOAMD64=V3 the canonical isPowerOfTwo function:
  func isPowerOfTwo(x uintptr) bool {
    return x&(x-1) == 0
  }

Used to compile to:
  temp := BLSR(x) // x&(x-1)
  flags = TEST(temp, temp)
  return flags.zf

However the blsr instruction already set ZF according to the result.
So we can remove the TEST instruction if we are just checking ZF.
Such as in multiple pieces of code around memory allocations.

This make the code smaller and faster.

Change-Id: Ia12d5a73aa3cb49188c0b647b1eff7b56c5a7b58
Reviewed-on: https://go-review.googlesource.com/c/go/+/448255
Run-TryBot: Jakub Ciolek <jakub@ciolek.dev>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Jorropo 2022-11-06 06:37:13 +01:00 committed by Keith Randall
parent fc814056aa
commit 5c67ebbb31
6 changed files with 583 additions and 29 deletions

View File

@ -274,7 +274,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg() p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
switch v.Op {
case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
p.To.Reg = v.Reg0()
default:
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
}
case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL: case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())

View File

@ -2176,7 +2176,16 @@
(AND(Q|L) x (NOT(Q|L) y)) && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y) (AND(Q|L) x (NOT(Q|L) y)) && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y)
(AND(Q|L) x (NEG(Q|L) x)) && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x) (AND(Q|L) x (NEG(Q|L) x)) && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x)
(XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x) (XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x)
(AND(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSR(Q|L) x) (AND(Q|L) <t> x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (Select0 <t> (BLSR(Q|L) x))
// eliminate TEST instruction in classical "isPowerOfTwo" check
(SETEQ (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (SETEQ (Select1 <types.TypeFlags> blsr))
(CMOVQEQ x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVQEQ x y (Select1 <types.TypeFlags> blsr))
(CMOVLEQ x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVLEQ x y (Select1 <types.TypeFlags> blsr))
(EQ (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s) yes no) => (EQ (Select1 <types.TypeFlags> blsr) yes no)
(SETNE (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (SETNE (Select1 <types.TypeFlags> blsr))
(CMOVQNE x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVQNE x y (Select1 <types.TypeFlags> blsr))
(CMOVLNE x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVLNE x y (Select1 <types.TypeFlags> blsr))
(NE (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s) yes no) => (NE (Select1 <types.TypeFlags> blsr) yes no)
(BSWAP(Q|L) (BSWAP(Q|L) p)) => p (BSWAP(Q|L) (BSWAP(Q|L) p)) => p

View File

@ -1024,8 +1024,8 @@ func init() {
{name: "BLSIL", argLength: 1, reg: gp11, asm: "BLSIL", clobberFlags: true}, // arg0 & -arg0 {name: "BLSIL", argLength: 1, reg: gp11, asm: "BLSIL", clobberFlags: true}, // arg0 & -arg0
{name: "BLSMSKQ", argLength: 1, reg: gp11, asm: "BLSMSKQ", clobberFlags: true}, // arg0 ^ (arg0 - 1) {name: "BLSMSKQ", argLength: 1, reg: gp11, asm: "BLSMSKQ", clobberFlags: true}, // arg0 ^ (arg0 - 1)
{name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1) {name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1)
{name: "BLSRQ", argLength: 1, reg: gp11, asm: "BLSRQ", clobberFlags: true}, // arg0 & (arg0 - 1) {name: "BLSRQ", argLength: 1, reg: gp11flags, asm: "BLSRQ", typ: "(UInt64,Flags)"}, // arg0 & (arg0 - 1)
{name: "BLSRL", argLength: 1, reg: gp11, asm: "BLSRL", clobberFlags: true}, // arg0 & (arg0 - 1) {name: "BLSRL", argLength: 1, reg: gp11flags, asm: "BLSRL", typ: "(UInt32,Flags)"}, // arg0 & (arg0 - 1)
// count the number of trailing zero bits, prefer TZCNTQ over BSFQ, as TZCNTQ(0)==64 // count the number of trailing zero bits, prefer TZCNTQ over BSFQ, as TZCNTQ(0)==64
// and BSFQ(0) is undefined. Same for TZCNTL(0)==32 // and BSFQ(0) is undefined. Same for TZCNTL(0)==32
{name: "TZCNTQ", argLength: 1, reg: gp11, asm: "TZCNTQ", clobberFlags: true}, {name: "TZCNTQ", argLength: 1, reg: gp11, asm: "TZCNTQ", clobberFlags: true},

View File

@ -13982,13 +13982,13 @@ var opcodeTable = [...]opInfo{
{ {
name: "BLSRQ", name: "BLSRQ",
argLen: 1, argLen: 1,
clobberFlags: true,
asm: x86.ABLSRQ, asm: x86.ABLSRQ,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
}, },
outputs: []outputInfo{ outputs: []outputInfo{
{1, 0},
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
}, },
}, },
@ -13996,13 +13996,13 @@ var opcodeTable = [...]opInfo{
{ {
name: "BLSRL", name: "BLSRL",
argLen: 1, argLen: 1,
clobberFlags: true,
asm: x86.ABLSRL, asm: x86.ABLSRL,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
}, },
outputs: []outputInfo{ outputs: []outputInfo{
{1, 0},
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
}, },
}, },

View File

@ -2590,6 +2590,8 @@ func rewriteValueAMD64_OpAMD64ADDSSload(v *Value) bool {
func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (ANDL (NOTL (SHLL (MOVLconst [1]) y)) x) // match: (ANDL (NOTL (SHLL (MOVLconst [1]) y)) x)
// result: (BTRL x y) // result: (BTRL x y)
for { for {
@ -2718,17 +2720,21 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
} }
break break
} }
// match: (ANDL x (ADDLconst [-1] x)) // match: (ANDL <t> x (ADDLconst [-1] x))
// cond: buildcfg.GOAMD64 >= 3 // cond: buildcfg.GOAMD64 >= 3
// result: (BLSRL x) // result: (Select0 <t> (BLSRL x))
for { for {
t := v.Type
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0 x := v_0
if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
continue continue
} }
v.reset(OpAMD64BLSRL) v.reset(OpSelect0)
v.AddArg(x) v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64BLSRL, types.NewTuple(typ.UInt32, types.TypeFlags))
v0.AddArg(x)
v.AddArg(v0)
return true return true
} }
break break
@ -3056,6 +3062,8 @@ func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool {
func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (ANDQ (NOTQ (SHLQ (MOVQconst [1]) y)) x) // match: (ANDQ (NOTQ (SHLQ (MOVQconst [1]) y)) x)
// result: (BTRQ x y) // result: (BTRQ x y)
for { for {
@ -3188,17 +3196,21 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
} }
break break
} }
// match: (ANDQ x (ADDQconst [-1] x)) // match: (ANDQ <t> x (ADDQconst [-1] x))
// cond: buildcfg.GOAMD64 >= 3 // cond: buildcfg.GOAMD64 >= 3
// result: (BLSRQ x) // result: (Select0 <t> (BLSRQ x))
for { for {
t := v.Type
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0 x := v_0
if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
continue continue
} }
v.reset(OpAMD64BLSRQ) v.reset(OpSelect0)
v.AddArg(x) v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64BLSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
v0.AddArg(x)
v.AddArg(v0)
return true return true
} }
break break
@ -4346,6 +4358,7 @@ func rewriteValueAMD64_OpAMD64CMOVLEQ(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block
// match: (CMOVLEQ x y (InvertFlags cond)) // match: (CMOVLEQ x y (InvertFlags cond))
// result: (CMOVLEQ x y cond) // result: (CMOVLEQ x y cond)
for { for {
@ -4409,6 +4422,62 @@ func rewriteValueAMD64_OpAMD64CMOVLEQ(v *Value) bool {
v.copyOf(y) v.copyOf(y)
return true return true
} }
// match: (CMOVLEQ x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s))
// result: (CMOVLEQ x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTQ {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVLEQ)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
// match: (CMOVLEQ x y (TESTL s:(Select0 blsr:(BLSRL _)) s))
// result: (CMOVLEQ x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTL {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVLEQ)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
return false return false
} }
func rewriteValueAMD64_OpAMD64CMOVLGE(v *Value) bool { func rewriteValueAMD64_OpAMD64CMOVLGE(v *Value) bool {
@ -4829,6 +4898,7 @@ func rewriteValueAMD64_OpAMD64CMOVLNE(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block
// match: (CMOVLNE x y (InvertFlags cond)) // match: (CMOVLNE x y (InvertFlags cond))
// result: (CMOVLNE x y cond) // result: (CMOVLNE x y cond)
for { for {
@ -4892,6 +4962,62 @@ func rewriteValueAMD64_OpAMD64CMOVLNE(v *Value) bool {
v.copyOf(x) v.copyOf(x)
return true return true
} }
// match: (CMOVLNE x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s))
// result: (CMOVLNE x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTQ {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVLNE)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
// match: (CMOVLNE x y (TESTL s:(Select0 blsr:(BLSRL _)) s))
// result: (CMOVLNE x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTL {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVLNE)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
return false return false
} }
func rewriteValueAMD64_OpAMD64CMOVQCC(v *Value) bool { func rewriteValueAMD64_OpAMD64CMOVQCC(v *Value) bool {
@ -5036,6 +5162,7 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block
// match: (CMOVQEQ x y (InvertFlags cond)) // match: (CMOVQEQ x y (InvertFlags cond))
// result: (CMOVQEQ x y cond) // result: (CMOVQEQ x y cond)
for { for {
@ -5145,6 +5272,62 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ(v *Value) bool {
v.copyOf(x) v.copyOf(x)
return true return true
} }
// match: (CMOVQEQ x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s))
// result: (CMOVQEQ x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTQ {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVQEQ)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
// match: (CMOVQEQ x y (TESTL s:(Select0 blsr:(BLSRL _)) s))
// result: (CMOVQEQ x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTL {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVQEQ)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
return false return false
} }
func rewriteValueAMD64_OpAMD64CMOVQGE(v *Value) bool { func rewriteValueAMD64_OpAMD64CMOVQGE(v *Value) bool {
@ -5565,6 +5748,7 @@ func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block
// match: (CMOVQNE x y (InvertFlags cond)) // match: (CMOVQNE x y (InvertFlags cond))
// result: (CMOVQNE x y cond) // result: (CMOVQNE x y cond)
for { for {
@ -5628,6 +5812,62 @@ func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool {
v.copyOf(x) v.copyOf(x)
return true return true
} }
// match: (CMOVQNE x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s))
// result: (CMOVQNE x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTQ {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVQNE)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
// match: (CMOVQNE x y (TESTL s:(Select0 blsr:(BLSRL _)) s))
// result: (CMOVQNE x y (Select1 <types.TypeFlags> blsr))
for {
x := v_0
y := v_1
if v_2.Op != OpAMD64TESTL {
break
}
_ = v_2.Args[1]
v_2_0 := v_2.Args[0]
v_2_1 := v_2.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 {
s := v_2_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_2_1 {
continue
}
v.reset(OpAMD64CMOVQNE)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg3(x, y, v0)
return true
}
break
}
return false return false
} }
func rewriteValueAMD64_OpAMD64CMOVWCC(v *Value) bool { func rewriteValueAMD64_OpAMD64CMOVWCC(v *Value) bool {
@ -21056,6 +21296,58 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
v.AuxInt = int32ToAuxInt(0) v.AuxInt = int32ToAuxInt(0)
return true return true
} }
// match: (SETEQ (TESTQ s:(Select0 blsr:(BLSRQ _)) s))
// result: (SETEQ (Select1 <types.TypeFlags> blsr))
for {
if v_0.Op != OpAMD64TESTQ {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_0_1 {
continue
}
v.reset(OpAMD64SETEQ)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg(v0)
return true
}
break
}
// match: (SETEQ (TESTL s:(Select0 blsr:(BLSRL _)) s))
// result: (SETEQ (Select1 <types.TypeFlags> blsr))
for {
if v_0.Op != OpAMD64TESTL {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_0_1 {
continue
}
v.reset(OpAMD64SETEQ)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg(v0)
return true
}
break
}
return false return false
} }
func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool { func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
@ -22972,6 +23264,58 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
v.AuxInt = int32ToAuxInt(1) v.AuxInt = int32ToAuxInt(1)
return true return true
} }
// match: (SETNE (TESTQ s:(Select0 blsr:(BLSRQ _)) s))
// result: (SETNE (Select1 <types.TypeFlags> blsr))
for {
if v_0.Op != OpAMD64TESTQ {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_0_1 {
continue
}
v.reset(OpAMD64SETNE)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg(v0)
return true
}
break
}
// match: (SETNE (TESTL s:(Select0 blsr:(BLSRL _)) s))
// result: (SETNE (Select1 <types.TypeFlags> blsr))
for {
if v_0.Op != OpAMD64TESTL {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_0_1 {
continue
}
v.reset(OpAMD64SETNE)
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
v.AddArg(v0)
return true
}
break
}
return false return false
} }
func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool { func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool {
@ -33533,6 +33877,52 @@ func rewriteBlockAMD64(b *Block) bool {
b.swapSuccessors() b.swapSuccessors()
return true return true
} }
// match: (EQ (TESTQ s:(Select0 blsr:(BLSRQ _)) s) yes no)
// result: (EQ (Select1 <types.TypeFlags> blsr) yes no)
for b.Controls[0].Op == OpAMD64TESTQ {
v_0 := b.Controls[0]
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_0_1 {
continue
}
v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
b.resetWithControl(BlockAMD64EQ, v0)
return true
}
break
}
// match: (EQ (TESTL s:(Select0 blsr:(BLSRL _)) s) yes no)
// result: (EQ (Select1 <types.TypeFlags> blsr) yes no)
for b.Controls[0].Op == OpAMD64TESTL {
v_0 := b.Controls[0]
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_0_1 {
continue
}
v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
b.resetWithControl(BlockAMD64EQ, v0)
return true
}
break
}
case BlockAMD64GE: case BlockAMD64GE:
// match: (GE (InvertFlags cmp) yes no) // match: (GE (InvertFlags cmp) yes no)
// result: (LE cmp yes no) // result: (LE cmp yes no)
@ -34414,6 +34804,52 @@ func rewriteBlockAMD64(b *Block) bool {
b.Reset(BlockFirst) b.Reset(BlockFirst)
return true return true
} }
// match: (NE (TESTQ s:(Select0 blsr:(BLSRQ _)) s) yes no)
// result: (NE (Select1 <types.TypeFlags> blsr) yes no)
for b.Controls[0].Op == OpAMD64TESTQ {
v_0 := b.Controls[0]
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRQ || s != v_0_1 {
continue
}
v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
b.resetWithControl(BlockAMD64NE, v0)
return true
}
break
}
// match: (NE (TESTL s:(Select0 blsr:(BLSRL _)) s) yes no)
// result: (NE (Select1 <types.TypeFlags> blsr) yes no)
for b.Controls[0].Op == OpAMD64TESTL {
v_0 := b.Controls[0]
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
s := v_0_0
if s.Op != OpSelect0 {
continue
}
blsr := s.Args[0]
if blsr.Op != OpAMD64BLSRL || s != v_0_1 {
continue
}
v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags)
v0.AddArg(blsr)
b.resetWithControl(BlockAMD64NE, v0)
return true
}
break
}
case BlockAMD64UGE: case BlockAMD64UGE:
// match: (UGE (TESTQ x x) yes no) // match: (UGE (TESTQ x x) yes no)
// result: (First yes no) // result: (First yes no)

View File

@ -46,6 +46,110 @@ func blsr32(x int32) int32 {
return x & (x - 1) return x & (x - 1)
} }
func isPowerOfTwo64(x int64) bool {
// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
return blsr64(x) == 0
}
func isPowerOfTwo32(x int32) bool {
// amd64/v3:"BLSRL",-"TESTL",-"CALL"
return blsr32(x) == 0
}
func isPowerOfTwoSelect64(x, a, b int64) int64 {
var r int64
// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
if isPowerOfTwo64(x) {
r = a
} else {
r = b
}
// amd64/v3:"CMOVQEQ",-"TESTQ",-"CALL"
return r * 2 // force return blocks joining
}
func isPowerOfTwoSelect32(x, a, b int32) int32 {
var r int32
// amd64/v3:"BLSRL",-"TESTL",-"CALL"
if isPowerOfTwo32(x) {
r = a
} else {
r = b
}
// amd64/v3:"CMOVLEQ",-"TESTL",-"CALL"
return r * 2 // force return blocks joining
}
func isPowerOfTwoBranch64(x int64, a func(bool), b func(string)) {
// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
if isPowerOfTwo64(x) {
a(true)
} else {
b("false")
}
}
func isPowerOfTwoBranch32(x int32, a func(bool), b func(string)) {
// amd64/v3:"BLSRL",-"TESTL",-"CALL"
if isPowerOfTwo32(x) {
a(true)
} else {
b("false")
}
}
func isNotPowerOfTwo64(x int64) bool {
// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
return blsr64(x) != 0
}
func isNotPowerOfTwo32(x int32) bool {
// amd64/v3:"BLSRL",-"TESTL",-"CALL"
return blsr32(x) != 0
}
func isNotPowerOfTwoSelect64(x, a, b int64) int64 {
var r int64
// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
if isNotPowerOfTwo64(x) {
r = a
} else {
r = b
}
// amd64/v3:"CMOVQNE",-"TESTQ",-"CALL"
return r * 2 // force return blocks joining
}
func isNotPowerOfTwoSelect32(x, a, b int32) int32 {
var r int32
// amd64/v3:"BLSRL",-"TESTL",-"CALL"
if isNotPowerOfTwo32(x) {
r = a
} else {
r = b
}
// amd64/v3:"CMOVLNE",-"TESTL",-"CALL"
return r * 2 // force return blocks joining
}
func isNotPowerOfTwoBranch64(x int64, a func(bool), b func(string)) {
// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
if isNotPowerOfTwo64(x) {
a(true)
} else {
b("false")
}
}
func isNotPowerOfTwoBranch32(x int32, a func(bool), b func(string)) {
// amd64/v3:"BLSRL",-"TESTL",-"CALL"
if isNotPowerOfTwo32(x) {
a(true)
} else {
b("false")
}
}
func sarx64(x, y int64) int64 { func sarx64(x, y int64) int64 {
// amd64/v3:"SARXQ" // amd64/v3:"SARXQ"
return x >> y return x >> y