diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index dd3b9a68d7..c0fb252222 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -882,44 +882,10 @@ // (ANDQconst [0xFFFFFFFF] x) => (MOVLQZX x) // strength reduction -// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: -// 1 - addq, shlq, leaq, negq, subq -// 3 - imulq -// This limits the rewrites to two instructions. -// Note that negq always operates in-place, -// which can require a register-register move -// to preserve the original value, -// so it must be used with care. -(MUL(Q|L)const [-9] x) => (NEG(Q|L) (LEA(Q|L)8 x x)) -(MUL(Q|L)const [-5] x) => (NEG(Q|L) (LEA(Q|L)4 x x)) -(MUL(Q|L)const [-3] x) => (NEG(Q|L) (LEA(Q|L)2 x x)) -(MUL(Q|L)const [-1] x) => (NEG(Q|L) x) (MUL(Q|L)const [ 0] _) => (MOV(Q|L)const [0]) (MUL(Q|L)const [ 1] x) => x -(MUL(Q|L)const [ 3] x) => (LEA(Q|L)2 x x) -(MUL(Q|L)const [ 5] x) => (LEA(Q|L)4 x x) -(MUL(Q|L)const [ 7] x) => (LEA(Q|L)2 x (LEA(Q|L)2 x x)) -(MUL(Q|L)const [ 9] x) => (LEA(Q|L)8 x x) -(MUL(Q|L)const [11] x) => (LEA(Q|L)2 x (LEA(Q|L)4 x x)) -(MUL(Q|L)const [13] x) => (LEA(Q|L)4 x (LEA(Q|L)2 x x)) -(MUL(Q|L)const [19] x) => (LEA(Q|L)2 x (LEA(Q|L)8 x x)) -(MUL(Q|L)const [21] x) => (LEA(Q|L)4 x (LEA(Q|L)4 x x)) -(MUL(Q|L)const [25] x) => (LEA(Q|L)8 x (LEA(Q|L)2 x x)) -(MUL(Q|L)const [27] x) => (LEA(Q|L)8 (LEA(Q|L)2 x x) (LEA(Q|L)2 x x)) -(MUL(Q|L)const [37] x) => (LEA(Q|L)4 x (LEA(Q|L)8 x x)) -(MUL(Q|L)const [41] x) => (LEA(Q|L)8 x (LEA(Q|L)4 x x)) -(MUL(Q|L)const [45] x) => (LEA(Q|L)8 (LEA(Q|L)4 x x) (LEA(Q|L)4 x x)) -(MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 x x)) -(MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 x x) (LEA(Q|L)8 x x)) - -(MUL(Q|L)const [c] x) && isPowerOfTwo(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const [int8(log64(int64(c)+1))] x) x) -(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const [int8(log32(c-1))] x) x) -(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const [int8(log32(c-2))] x) x) -(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const [int8(log32(c-4))] x) x) -(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 => (LEA(Q|L)8 (SHL(Q|L)const [int8(log32(c-8))] x) x) -(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) => (SHL(Q|L)const [int8(log32(c/3))] (LEA(Q|L)2 x x)) -(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 x x)) -(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 x x)) +(MULQconst [c] x) && canMulStrengthReduce(config, int64(c)) => {mulStrengthReduce(v, x, int64(c))} +(MULLconst [c] x) && v.Type.Size() <= 4 && canMulStrengthReduce32(config, c) => {mulStrengthReduce32(v, x, c)} // Prefer addition when shifting left by one (SHL(Q|L)const [1] x) => (ADD(Q|L) x x) @@ -955,12 +921,12 @@ (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y) // fold shifts into LEAQx/LEALx -(LEA(Q|L)1 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)2 [c] {s} x y) +(LEA(Q|L)1 [c] {s} x z:(ADD(Q|L) y y)) && x != z => (LEA(Q|L)2 [c] {s} x y) (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y) (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y) -(LEA(Q|L)2 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)4 [c] {s} x y) +(LEA(Q|L)2 [c] {s} x z:(ADD(Q|L) y y)) && x != z => (LEA(Q|L)4 [c] {s} x y) (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y) -(LEA(Q|L)4 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)8 [c] {s} x y) +(LEA(Q|L)4 [c] {s} x z:(ADD(Q|L) y y)) && x != z => (LEA(Q|L)8 [c] {s} x y) // (x + x) << 1 -> x << 2 (LEA(Q|L)2 [0] {s} (ADD(Q|L) x x) x) && s == nil => (SHL(Q|L)const [2] x) diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 7040046711..e906f7b35a 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -1051,27 +1051,14 @@ (SBCSflags x y (Select1 (NEGSflags (MOVDconst [0])))) => (SUBSflags x y) // mul by constant -(MUL x (MOVDconst [-1])) => (NEG x) (MUL _ (MOVDconst [0])) => (MOVDconst [0]) (MUL x (MOVDconst [1])) => x -(MUL x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log64(c)] x) -(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (ADDshiftLL x x [log64(c-1)]) -(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (ADDshiftLL (NEG x) x [log64(c+1)]) -(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst [log64(c/3)] (ADDshiftLL x x [1])) -(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SLLconst [log64(c/5)] (ADDshiftLL x x [2])) -(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst [log64(c/7)] (ADDshiftLL (NEG x) x [3])) -(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SLLconst [log64(c/9)] (ADDshiftLL x x [3])) -(MULW x (MOVDconst [c])) && int32(c)==-1 => (MOVWUreg (NEG x)) (MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0]) (MULW x (MOVDconst [c])) && int32(c)==1 => (MOVWUreg x) -(MULW x (MOVDconst [c])) && isPowerOfTwo(c) => (MOVWUreg (SLLconst [log64(c)] x)) -(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (MOVWUreg (ADDshiftLL x x [log64(c-1)])) -(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (MOVWUreg (ADDshiftLL (NEG x) x [log64(c+1)])) -(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (MOVWUreg (SLLconst [log64(c/3)] (ADDshiftLL x x [1]))) -(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (MOVWUreg (SLLconst [log64(c/5)] (ADDshiftLL x x [2]))) -(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (MOVWUreg (SLLconst [log64(c/7)] (ADDshiftLL (NEG x) x [3]))) -(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (MOVWUreg (SLLconst [log64(c/9)] (ADDshiftLL x x [3]))) + +(MUL x (MOVDconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)} +(MULW x (MOVDconst [c])) && v.Type.Size() <= 4 && canMulStrengthReduce32(config, int32(c)) => {mulStrengthReduce32(v, x, int32(c))} // mneg by constant (MNEG x (MOVDconst [-1])) => x diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules index 613a23f6e4..8c43b960b9 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules @@ -96,3 +96,8 @@ // use zero register (MOVDconst [0]) => (ZERO) + +// Prefer addition when shifting left by one. +// They have the same latency, but ADD can often be done +// by more functional units in the processor. +(SLLconst [1] x) => (ADD x x) diff --git a/src/cmd/compile/internal/ssa/_gen/rulegen.go b/src/cmd/compile/internal/ssa/_gen/rulegen.go index 5f5967a142..c2891da6c8 100644 --- a/src/cmd/compile/internal/ssa/_gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/_gen/rulegen.go @@ -50,6 +50,7 @@ import ( // special rules: trailing ellipsis "..." (in the outermost sexpr?) must match on both sides of a rule. // trailing three underscore "___" in the outermost match sexpr indicate the presence of // extra ignored args that need not appear in the replacement +// if the right-hand side is in {}, then it is code used to generate the result. // extra conditions is just a chunk of Go that evaluates to a boolean. It may use // variables declared in the matching tsexpr. The variable "v" is predefined to be @@ -1182,6 +1183,11 @@ func genResult(rr *RuleRewrite, arch arch, result, pos string) { rr.add(stmtf("b = %s", s[0])) result = s[1] } + if result[0] == '{' { + // Arbitrary code used to make the result + rr.add(stmtf("v.copyOf(%s)", result[1:len(result)-1])) + return + } cse := make(map[string]string) genResult0(rr, arch, result, true, move, pos, cse) } diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index a3131efa41..d4cd32a0d7 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -50,6 +50,14 @@ type Config struct { haveBswap64 bool // architecture implements Bswap64 haveBswap32 bool // architecture implements Bswap32 haveBswap16 bool // architecture implements Bswap16 + + // mulRecipes[x] = function to build v * x from v. + mulRecipes map[int64]mulRecipe +} + +type mulRecipe struct { + cost int + build func(*Value, *Value) *Value // build(m, v) returns v * x built at m. } type ( @@ -364,6 +372,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo opcodeTable[Op386LoweredWB].reg.clobbers |= 1 << 3 // BX } + c.buildRecipes(arch) + return c } @@ -382,3 +392,253 @@ func (c *Config) haveByteSwap(size int64) bool { return false } } + +func (c *Config) buildRecipes(arch string) { + // Information for strength-reducing multiplies. + type linearCombo struct { + // we can compute a*x+b*y in one instruction + a, b int64 + // cost, in arbitrary units (tenths of cycles, usually) + cost int + // builds SSA value for a*x+b*y. Use the position + // information from m. + build func(m, x, y *Value) *Value + } + + // List all the linear combination instructions we have. + var linearCombos []linearCombo + r := func(a, b int64, cost int, build func(m, x, y *Value) *Value) { + linearCombos = append(linearCombos, linearCombo{a: a, b: b, cost: cost, build: build}) + } + var mulCost int + switch arch { + case "amd64": + // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: + // 1 - addq, shlq, leaq, negq, subq + // 3 - imulq + // These costs limit the rewrites to two instructions. + // Operations which have to happen in place (and thus + // may require a reg-reg move) score slightly higher. + mulCost = 30 + // add + r(1, 1, 10, + func(m, x, y *Value) *Value { + v := m.Block.NewValue2(m.Pos, OpAMD64ADDQ, m.Type, x, y) + if m.Type.Size() == 4 { + v.Op = OpAMD64ADDL + } + return v + }) + // neg + r(-1, 0, 11, + func(m, x, y *Value) *Value { + v := m.Block.NewValue1(m.Pos, OpAMD64NEGQ, m.Type, x) + if m.Type.Size() == 4 { + v.Op = OpAMD64NEGL + } + return v + }) + // sub + r(1, -1, 11, + func(m, x, y *Value) *Value { + v := m.Block.NewValue2(m.Pos, OpAMD64SUBQ, m.Type, x, y) + if m.Type.Size() == 4 { + v.Op = OpAMD64SUBL + } + return v + }) + // lea + r(1, 2, 10, + func(m, x, y *Value) *Value { + v := m.Block.NewValue2(m.Pos, OpAMD64LEAQ2, m.Type, x, y) + if m.Type.Size() == 4 { + v.Op = OpAMD64LEAL2 + } + return v + }) + r(1, 4, 10, + func(m, x, y *Value) *Value { + v := m.Block.NewValue2(m.Pos, OpAMD64LEAQ4, m.Type, x, y) + if m.Type.Size() == 4 { + v.Op = OpAMD64LEAL4 + } + return v + }) + r(1, 8, 10, + func(m, x, y *Value) *Value { + v := m.Block.NewValue2(m.Pos, OpAMD64LEAQ8, m.Type, x, y) + if m.Type.Size() == 4 { + v.Op = OpAMD64LEAL8 + } + return v + }) + // regular shifts + for i := 2; i < 64; i++ { + r(1< 4 { + c++ + } + r(1, 1< 4 { + c++ + } + r(-1< 4 { + c++ + } + r(1, -1< x x)) - for { - if auxIntToInt32(v.AuxInt) != -9 { - break - } - x := v_0 - v.reset(OpAMD64NEGL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULLconst [-5] x) - // result: (NEGL (LEAL4 x x)) - for { - if auxIntToInt32(v.AuxInt) != -5 { - break - } - x := v_0 - v.reset(OpAMD64NEGL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULLconst [-3] x) - // result: (NEGL (LEAL2 x x)) - for { - if auxIntToInt32(v.AuxInt) != -3 { - break - } - x := v_0 - v.reset(OpAMD64NEGL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULLconst [-1] x) - // result: (NEGL x) - for { - if auxIntToInt32(v.AuxInt) != -1 { - break - } - x := v_0 - v.reset(OpAMD64NEGL) - v.AddArg(x) - return true - } // match: (MULLconst [ 0] _) // result: (MOVLconst [0]) for { @@ -13580,321 +13543,16 @@ func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool { v.copyOf(x) return true } - // match: (MULLconst [ 3] x) - // result: (LEAL2 x x) - for { - if auxIntToInt32(v.AuxInt) != 3 { - break - } - x := v_0 - v.reset(OpAMD64LEAL2) - v.AddArg2(x, x) - return true - } - // match: (MULLconst [ 5] x) - // result: (LEAL4 x x) - for { - if auxIntToInt32(v.AuxInt) != 5 { - break - } - x := v_0 - v.reset(OpAMD64LEAL4) - v.AddArg2(x, x) - return true - } - // match: (MULLconst [ 7] x) - // result: (LEAL2 x (LEAL2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 7 { - break - } - x := v_0 - v.reset(OpAMD64LEAL2) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [ 9] x) - // result: (LEAL8 x x) - for { - if auxIntToInt32(v.AuxInt) != 9 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v.AddArg2(x, x) - return true - } - // match: (MULLconst [11] x) - // result: (LEAL2 x (LEAL4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 11 { - break - } - x := v_0 - v.reset(OpAMD64LEAL2) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [13] x) - // result: (LEAL4 x (LEAL2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 13 { - break - } - x := v_0 - v.reset(OpAMD64LEAL4) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [19] x) - // result: (LEAL2 x (LEAL8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 19 { - break - } - x := v_0 - v.reset(OpAMD64LEAL2) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [21] x) - // result: (LEAL4 x (LEAL4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 21 { - break - } - x := v_0 - v.reset(OpAMD64LEAL4) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [25] x) - // result: (LEAL8 x (LEAL2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 25 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [27] x) - // result: (LEAL8 (LEAL2 x x) (LEAL2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 27 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(v0, v0) - return true - } - // match: (MULLconst [37] x) - // result: (LEAL4 x (LEAL8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 37 { - break - } - x := v_0 - v.reset(OpAMD64LEAL4) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [41] x) - // result: (LEAL8 x (LEAL4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 41 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [45] x) - // result: (LEAL8 (LEAL4 x x) (LEAL4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 45 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(v0, v0) - return true - } - // match: (MULLconst [73] x) - // result: (LEAL8 x (LEAL8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 73 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULLconst [81] x) - // result: (LEAL8 (LEAL8 x x) (LEAL8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 81 { - break - } - x := v_0 - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(v0, v0) - return true - } // match: (MULLconst [c] x) - // cond: isPowerOfTwo(int64(c)+1) && c >= 15 - // result: (SUBL (SHLLconst [int8(log64(int64(c)+1))] x) x) + // cond: v.Type.Size() <= 4 && canMulStrengthReduce32(config, c) + // result: {mulStrengthReduce32(v, x, c)} for { c := auxIntToInt32(v.AuxInt) x := v_0 - if !(isPowerOfTwo(int64(c)+1) && c >= 15) { + if !(v.Type.Size() <= 4 && canMulStrengthReduce32(config, c)) { break } - v.reset(OpAMD64SUBL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log64(int64(c) + 1))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULLconst [c] x) - // cond: isPowerOfTwo(c-1) && c >= 17 - // result: (LEAL1 (SHLLconst [int8(log32(c-1))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-1) && c >= 17) { - break - } - v.reset(OpAMD64LEAL1) - v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 1))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULLconst [c] x) - // cond: isPowerOfTwo(c-2) && c >= 34 - // result: (LEAL2 (SHLLconst [int8(log32(c-2))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-2) && c >= 34) { - break - } - v.reset(OpAMD64LEAL2) - v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 2))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULLconst [c] x) - // cond: isPowerOfTwo(c-4) && c >= 68 - // result: (LEAL4 (SHLLconst [int8(log32(c-4))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-4) && c >= 68) { - break - } - v.reset(OpAMD64LEAL4) - v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 4))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULLconst [c] x) - // cond: isPowerOfTwo(c-8) && c >= 136 - // result: (LEAL8 (SHLLconst [int8(log32(c-8))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-8) && c >= 136) { - break - } - v.reset(OpAMD64LEAL8) - v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 8))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULLconst [c] x) - // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (SHLLconst [int8(log32(c/3))] (LEAL2 x x)) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c%3 == 0 && isPowerOfTwo(c/3)) { - break - } - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c / 3))) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULLconst [c] x) - // cond: c%5 == 0 && isPowerOfTwo(c/5) - // result: (SHLLconst [int8(log32(c/5))] (LEAL4 x x)) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c%5 == 0 && isPowerOfTwo(c/5)) { - break - } - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c / 5))) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULLconst [c] x) - // cond: c%9 == 0 && isPowerOfTwo(c/9) - // result: (SHLLconst [int8(log32(c/9))] (LEAL8 x x)) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c%9 == 0 && isPowerOfTwo(c/9)) { - break - } - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c / 9))) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) + v.copyOf(mulStrengthReduce32(v, x, c)) return true } // match: (MULLconst [c] (MOVLconst [d])) @@ -13939,6 +13597,7 @@ func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool { func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool { v_0 := v.Args[0] b := v.Block + config := b.Func.Config // match: (MULQconst [c] (MULQconst [d] x)) // cond: is32Bit(int64(c)*int64(d)) // result: (MULQconst [c * d] x) @@ -13957,56 +13616,6 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool { v.AddArg(x) return true } - // match: (MULQconst [-9] x) - // result: (NEGQ (LEAQ8 x x)) - for { - if auxIntToInt32(v.AuxInt) != -9 { - break - } - x := v_0 - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULQconst [-5] x) - // result: (NEGQ (LEAQ4 x x)) - for { - if auxIntToInt32(v.AuxInt) != -5 { - break - } - x := v_0 - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULQconst [-3] x) - // result: (NEGQ (LEAQ2 x x)) - for { - if auxIntToInt32(v.AuxInt) != -3 { - break - } - x := v_0 - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULQconst [-1] x) - // result: (NEGQ x) - for { - if auxIntToInt32(v.AuxInt) != -1 { - break - } - x := v_0 - v.reset(OpAMD64NEGQ) - v.AddArg(x) - return true - } // match: (MULQconst [ 0] _) // result: (MOVQconst [0]) for { @@ -14027,321 +13636,16 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool { v.copyOf(x) return true } - // match: (MULQconst [ 3] x) - // result: (LEAQ2 x x) - for { - if auxIntToInt32(v.AuxInt) != 3 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ2) - v.AddArg2(x, x) - return true - } - // match: (MULQconst [ 5] x) - // result: (LEAQ4 x x) - for { - if auxIntToInt32(v.AuxInt) != 5 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ4) - v.AddArg2(x, x) - return true - } - // match: (MULQconst [ 7] x) - // result: (LEAQ2 x (LEAQ2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 7 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ2) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [ 9] x) - // result: (LEAQ8 x x) - for { - if auxIntToInt32(v.AuxInt) != 9 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v.AddArg2(x, x) - return true - } - // match: (MULQconst [11] x) - // result: (LEAQ2 x (LEAQ4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 11 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ2) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [13] x) - // result: (LEAQ4 x (LEAQ2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 13 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ4) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [19] x) - // result: (LEAQ2 x (LEAQ8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 19 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ2) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [21] x) - // result: (LEAQ4 x (LEAQ4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 21 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ4) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [25] x) - // result: (LEAQ8 x (LEAQ2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 25 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [27] x) - // result: (LEAQ8 (LEAQ2 x x) (LEAQ2 x x)) - for { - if auxIntToInt32(v.AuxInt) != 27 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type) - v0.AddArg2(x, x) - v.AddArg2(v0, v0) - return true - } - // match: (MULQconst [37] x) - // result: (LEAQ4 x (LEAQ8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 37 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ4) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [41] x) - // result: (LEAQ8 x (LEAQ4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 41 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [45] x) - // result: (LEAQ8 (LEAQ4 x x) (LEAQ4 x x)) - for { - if auxIntToInt32(v.AuxInt) != 45 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type) - v0.AddArg2(x, x) - v.AddArg2(v0, v0) - return true - } - // match: (MULQconst [73] x) - // result: (LEAQ8 x (LEAQ8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 73 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(x, v0) - return true - } - // match: (MULQconst [81] x) - // result: (LEAQ8 (LEAQ8 x x) (LEAQ8 x x)) - for { - if auxIntToInt32(v.AuxInt) != 81 { - break - } - x := v_0 - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type) - v0.AddArg2(x, x) - v.AddArg2(v0, v0) - return true - } // match: (MULQconst [c] x) - // cond: isPowerOfTwo(int64(c)+1) && c >= 15 - // result: (SUBQ (SHLQconst [int8(log64(int64(c)+1))] x) x) + // cond: canMulStrengthReduce(config, int64(c)) + // result: {mulStrengthReduce(v, x, int64(c))} for { c := auxIntToInt32(v.AuxInt) x := v_0 - if !(isPowerOfTwo(int64(c)+1) && c >= 15) { + if !(canMulStrengthReduce(config, int64(c))) { break } - v.reset(OpAMD64SUBQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log64(int64(c) + 1))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULQconst [c] x) - // cond: isPowerOfTwo(c-1) && c >= 17 - // result: (LEAQ1 (SHLQconst [int8(log32(c-1))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-1) && c >= 17) { - break - } - v.reset(OpAMD64LEAQ1) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 1))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULQconst [c] x) - // cond: isPowerOfTwo(c-2) && c >= 34 - // result: (LEAQ2 (SHLQconst [int8(log32(c-2))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-2) && c >= 34) { - break - } - v.reset(OpAMD64LEAQ2) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 2))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULQconst [c] x) - // cond: isPowerOfTwo(c-4) && c >= 68 - // result: (LEAQ4 (SHLQconst [int8(log32(c-4))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-4) && c >= 68) { - break - } - v.reset(OpAMD64LEAQ4) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 4))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULQconst [c] x) - // cond: isPowerOfTwo(c-8) && c >= 136 - // result: (LEAQ8 (SHLQconst [int8(log32(c-8))] x) x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isPowerOfTwo(c-8) && c >= 136) { - break - } - v.reset(OpAMD64LEAQ8) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v0.AuxInt = int8ToAuxInt(int8(log32(c - 8))) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - // match: (MULQconst [c] x) - // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (SHLQconst [int8(log32(c/3))] (LEAQ2 x x)) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c%3 == 0 && isPowerOfTwo(c/3)) { - break - } - v.reset(OpAMD64SHLQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c / 3))) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULQconst [c] x) - // cond: c%5 == 0 && isPowerOfTwo(c/5) - // result: (SHLQconst [int8(log32(c/5))] (LEAQ4 x x)) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c%5 == 0 && isPowerOfTwo(c/5)) { - break - } - v.reset(OpAMD64SHLQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c / 5))) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULQconst [c] x) - // cond: c%9 == 0 && isPowerOfTwo(c/9) - // result: (SHLQconst [int8(log32(c/9))] (LEAQ8 x x)) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c%9 == 0 && isPowerOfTwo(c/9)) { - break - } - v.reset(OpAMD64SHLQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c / 9))) - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) + v.copyOf(mulStrengthReduce(v, x, int64(c))) return true } // match: (MULQconst [c] (MOVQconst [d])) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 7f2feabbf7..0c107262fd 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -12331,6 +12331,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + config := b.Func.Config // match: (MUL (NEG x) y) // result: (MNEG x y) for { @@ -12346,20 +12347,6 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool { } break } - // match: (MUL x (MOVDconst [-1])) - // result: (NEG x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 { - continue - } - v.reset(OpARM64NEG) - v.AddArg(x) - return true - } - break - } // match: (MUL _ (MOVDconst [0])) // result: (MOVDconst [0]) for { @@ -12387,8 +12374,8 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool { break } // match: (MUL x (MOVDconst [c])) - // cond: isPowerOfTwo(c) - // result: (SLLconst [log64(c)] x) + // cond: canMulStrengthReduce(config, c) + // result: {mulStrengthReduce(v, x, c)} for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 @@ -12396,148 +12383,10 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool { continue } c := auxIntToInt64(v_1.AuxInt) - if !(isPowerOfTwo(c)) { + if !(canMulStrengthReduce(config, c)) { continue } - v.reset(OpARM64SLLconst) - v.AuxInt = int64ToAuxInt(log64(c)) - v.AddArg(x) - return true - } - break - } - // match: (MUL x (MOVDconst [c])) - // cond: isPowerOfTwo(c-1) && c >= 3 - // result: (ADDshiftLL x x [log64(c-1)]) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(isPowerOfTwo(c-1) && c >= 3) { - continue - } - v.reset(OpARM64ADDshiftLL) - v.AuxInt = int64ToAuxInt(log64(c - 1)) - v.AddArg2(x, x) - return true - } - break - } - // match: (MUL x (MOVDconst [c])) - // cond: isPowerOfTwo(c+1) && c >= 7 - // result: (ADDshiftLL (NEG x) x [log64(c+1)]) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(isPowerOfTwo(c+1) && c >= 7) { - continue - } - v.reset(OpARM64ADDshiftLL) - v.AuxInt = int64ToAuxInt(log64(c + 1)) - v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v0.AddArg(x) - v.AddArg2(v0, x) - return true - } - break - } - // match: (MUL x (MOVDconst [c])) - // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (SLLconst [log64(c/3)] (ADDshiftLL x x [1])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%3 == 0 && isPowerOfTwo(c/3)) { - continue - } - v.reset(OpARM64SLLconst) - v.AuxInt = int64ToAuxInt(log64(c / 3)) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = int64ToAuxInt(1) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - break - } - // match: (MUL x (MOVDconst [c])) - // cond: c%5 == 0 && isPowerOfTwo(c/5) - // result: (SLLconst [log64(c/5)] (ADDshiftLL x x [2])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%5 == 0 && isPowerOfTwo(c/5)) { - continue - } - v.reset(OpARM64SLLconst) - v.AuxInt = int64ToAuxInt(log64(c / 5)) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = int64ToAuxInt(2) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - break - } - // match: (MUL x (MOVDconst [c])) - // cond: c%7 == 0 && isPowerOfTwo(c/7) - // result: (SLLconst [log64(c/7)] (ADDshiftLL (NEG x) x [3])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%7 == 0 && isPowerOfTwo(c/7)) { - continue - } - v.reset(OpARM64SLLconst) - v.AuxInt = int64ToAuxInt(log64(c / 7)) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = int64ToAuxInt(3) - v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v1.AddArg(x) - v0.AddArg2(v1, x) - v.AddArg(v0) - return true - } - break - } - // match: (MUL x (MOVDconst [c])) - // cond: c%9 == 0 && isPowerOfTwo(c/9) - // result: (SLLconst [log64(c/9)] (ADDshiftLL x x [3])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%9 == 0 && isPowerOfTwo(c/9)) { - continue - } - v.reset(OpARM64SLLconst) - v.AuxInt = int64ToAuxInt(log64(c / 9)) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = int64ToAuxInt(3) - v0.AddArg2(x, x) - v.AddArg(v0) + v.copyOf(mulStrengthReduce(v, x, c)) return true } break @@ -12566,6 +12415,7 @@ func rewriteValueARM64_OpARM64MULW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + config := b.Func.Config // match: (MULW (NEG x) y) // result: (MNEGW x y) for { @@ -12581,27 +12431,6 @@ func rewriteValueARM64_OpARM64MULW(v *Value) bool { } break } - // match: (MULW x (MOVDconst [c])) - // cond: int32(c)==-1 - // result: (MOVWUreg (NEG x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(int32(c) == -1) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } // match: (MULW _ (MOVDconst [c])) // cond: int32(c)==0 // result: (MOVDconst [0]) @@ -12640,8 +12469,8 @@ func rewriteValueARM64_OpARM64MULW(v *Value) bool { break } // match: (MULW x (MOVDconst [c])) - // cond: isPowerOfTwo(c) - // result: (MOVWUreg (SLLconst [log64(c)] x)) + // cond: v.Type.Size() <= 4 && canMulStrengthReduce32(config, int32(c)) + // result: {mulStrengthReduce32(v, x, int32(c))} for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 @@ -12649,162 +12478,10 @@ func rewriteValueARM64_OpARM64MULW(v *Value) bool { continue } c := auxIntToInt64(v_1.AuxInt) - if !(isPowerOfTwo(c)) { + if !(v.Type.Size() <= 4 && canMulStrengthReduce32(config, int32(c))) { continue } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c)) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } - // match: (MULW x (MOVDconst [c])) - // cond: isPowerOfTwo(c-1) && int32(c) >= 3 - // result: (MOVWUreg (ADDshiftLL x x [log64(c-1)])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(isPowerOfTwo(c-1) && int32(c) >= 3) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c - 1)) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - break - } - // match: (MULW x (MOVDconst [c])) - // cond: isPowerOfTwo(c+1) && int32(c) >= 7 - // result: (MOVWUreg (ADDshiftLL (NEG x) x [log64(c+1)])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(isPowerOfTwo(c+1) && int32(c) >= 7) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c + 1)) - v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v1.AddArg(x) - v0.AddArg2(v1, x) - v.AddArg(v0) - return true - } - break - } - // match: (MULW x (MOVDconst [c])) - // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) - // result: (MOVWUreg (SLLconst [log64(c/3)] (ADDshiftLL x x [1]))) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c / 3)) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = int64ToAuxInt(1) - v1.AddArg2(x, x) - v0.AddArg(v1) - v.AddArg(v0) - return true - } - break - } - // match: (MULW x (MOVDconst [c])) - // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) - // result: (MOVWUreg (SLLconst [log64(c/5)] (ADDshiftLL x x [2]))) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c / 5)) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = int64ToAuxInt(2) - v1.AddArg2(x, x) - v0.AddArg(v1) - v.AddArg(v0) - return true - } - break - } - // match: (MULW x (MOVDconst [c])) - // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) - // result: (MOVWUreg (SLLconst [log64(c/7)] (ADDshiftLL (NEG x) x [3]))) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c / 7)) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = int64ToAuxInt(3) - v2 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v2.AddArg(x) - v1.AddArg2(v2, x) - v0.AddArg(v1) - v.AddArg(v0) - return true - } - break - } - // match: (MULW x (MOVDconst [c])) - // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) - // result: (MOVWUreg (SLLconst [log64(c/9)] (ADDshiftLL x x [3]))) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpARM64MOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { - continue - } - v.reset(OpARM64MOVWUreg) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = int64ToAuxInt(log64(c / 9)) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = int64ToAuxInt(3) - v1.AddArg2(x, x) - v0.AddArg(v1) - v.AddArg(v0) + v.copyOf(mulStrengthReduce32(v, x, int32(c))) return true } break diff --git a/src/cmd/compile/internal/ssa/rewriteARM64latelower.go b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go index 3defeba4c5..0fa5e26e93 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64latelower.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go @@ -38,6 +38,8 @@ func rewriteValueARM64latelower(v *Value) bool { return rewriteValueARM64latelower_OpARM64MOVWreg(v) case OpARM64ORconst: return rewriteValueARM64latelower_OpARM64ORconst(v) + case OpARM64SLLconst: + return rewriteValueARM64latelower_OpARM64SLLconst(v) case OpARM64SUBconst: return rewriteValueARM64latelower_OpARM64SUBconst(v) case OpARM64TSTWconst: @@ -996,6 +998,21 @@ func rewriteValueARM64latelower_OpARM64ORconst(v *Value) bool { } return false } +func rewriteValueARM64latelower_OpARM64SLLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLLconst [1] x) + // result: (ADD x x) + for { + if auxIntToInt64(v.AuxInt) != 1 { + break + } + x := v_0 + v.reset(OpARM64ADD) + v.AddArg2(x, x) + return true + } + return false +} func rewriteValueARM64latelower_OpARM64SUBconst(v *Value) bool { v_0 := v.Args[0] b := v.Block diff --git a/src/cmd/compile/internal/test/mulconst_test.go b/src/cmd/compile/internal/test/mulconst_test.go index 314cab32de..c4aed84432 100644 --- a/src/cmd/compile/internal/test/mulconst_test.go +++ b/src/cmd/compile/internal/test/mulconst_test.go @@ -4,7 +4,96 @@ package test -import "testing" +import ( + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestConstantMultiplies(t *testing.T) { + testenv.MustHaveGoRun(t) + + signs := []string{"", "u"} + widths := []int{8, 16, 32, 64} + + // Make test code. + var code bytes.Buffer + fmt.Fprintf(&code, "package main\n") + for _, b := range widths { + for _, s := range signs { + fmt.Fprintf(&code, "type test_%s%d struct {\n", s, b) + fmt.Fprintf(&code, " m %sint%d\n", s, b) + fmt.Fprintf(&code, " f func(%sint%d)%sint%d\n", s, b, s, b) + fmt.Fprintf(&code, "}\n") + fmt.Fprintf(&code, "var test_%s%ds []test_%s%d\n", s, b, s, b) + } + } + for _, b := range widths { + for _, s := range signs { + lo := -256 + hi := 256 + if b == 8 { + lo = -128 + hi = 127 + } + if s == "u" { + lo = 0 + } + for i := lo; i <= hi; i++ { + name := fmt.Sprintf("f_%s%d_%d", s, b, i) + name = strings.ReplaceAll(name, "-", "n") + fmt.Fprintf(&code, "func %s(x %sint%d) %sint%d {\n", name, s, b, s, b) + fmt.Fprintf(&code, " return x*%d\n", i) + fmt.Fprintf(&code, "}\n") + fmt.Fprintf(&code, "func init() {\n") + fmt.Fprintf(&code, " test_%s%ds = append(test_%s%ds, test_%s%d{%d, %s})\n", s, b, s, b, s, b, i, name) + fmt.Fprintf(&code, "}\n") + } + } + } + fmt.Fprintf(&code, "func main() {\n") + for _, b := range widths { + for _, s := range signs { + lo := -256 + hi := 256 + if s == "u" { + lo = 0 + } + fmt.Fprintf(&code, " for _, tst := range test_%s%ds {\n", s, b) + fmt.Fprintf(&code, " for x := %d; x <= %d; x++ {\n", lo, hi) + fmt.Fprintf(&code, " y := %sint%d(x)\n", s, b) + fmt.Fprintf(&code, " if tst.f(y) != y*tst.m {\n") + fmt.Fprintf(&code, " panic(tst.m)\n") + fmt.Fprintf(&code, " }\n") + fmt.Fprintf(&code, " }\n") + fmt.Fprintf(&code, " }\n") + } + } + fmt.Fprintf(&code, "}\n") + + fmt.Printf("CODE:\n%s\n", string(code.Bytes())) + + // Make test file + tmpdir := t.TempDir() + src := filepath.Join(tmpdir, "x.go") + err := os.WriteFile(src, code.Bytes(), 0644) + if err != nil { + t.Fatalf("write file failed: %v", err) + } + + cmd := testenv.Command(t, testenv.GoToolPath(t), "run", src) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("go run failed: %v\n%s", err, out) + } + if len(out) > 0 { + t.Fatalf("got output when expecting none: %s\n", string(out)) + } +} // Benchmark multiplication of an integer by various constants. // diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 530891293e..7bac85eb6c 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -649,7 +649,7 @@ func constantFold2(i0, j0, i1, j1 int) (int, int) { } func constantFold3(i, j int) int { - // arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL" + // arm64: "LSL\t[$]5,","SUB\tR[0-9]+<<1,",-"ADD" // ppc64x:"MULLD\t[$]30","MULLD" r := (5 * i) * (6 * j) return r diff --git a/test/codegen/multiply.go b/test/codegen/multiply.go new file mode 100644 index 0000000000..e7c1ccea1a --- /dev/null +++ b/test/codegen/multiply.go @@ -0,0 +1,312 @@ +// asmcheck + +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains codegen tests related to strength +// reduction of integer multiply. + +func m0(x int64) int64 { + // amd64: "XORL" + // arm64: "MOVD\tZR" + return x * 0 +} +func m2(x int64) int64 { + // amd64: "ADDQ" + // arm64: "ADD" + return x * 2 +} +func m3(x int64) int64 { + // amd64: "LEAQ\t.*[*]2" + // arm64: "ADD\tR[0-9]+<<1," + return x * 3 +} +func m4(x int64) int64 { + // amd64: "SHLQ\t[$]2," + // arm64: "LSL\t[$]2," + return x * 4 +} +func m5(x int64) int64 { + // amd64: "LEAQ\t.*[*]4" + // arm64: "ADD\tR[0-9]+<<2," + return x * 5 +} +func m6(x int64) int64 { + // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]2" + // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<1," + return x * 6 +} +func m7(x int64) int64 { + // amd64: "LEAQ\t.*[*]2" + // arm64: "LSL\t[$]3,", "SUB\tR[0-9]+," + return x * 7 +} +func m8(x int64) int64 { + // amd64: "SHLQ\t[$]3," + // arm64: "LSL\t[$]3," + return x * 8 +} +func m9(x int64) int64 { + // amd64: "LEAQ\t.*[*]8" + // arm64: "ADD\tR[0-9]+<<3," + return x * 9 +} +func m10(x int64) int64 { + // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]4" + // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<2," + return x * 10 +} +func m11(x int64) int64 { + // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]2" + // arm64: "MOVD\t[$]11,", "MUL" + return x * 11 +} +func m12(x int64) int64 { + // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]2," + // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<1," + return x * 12 +} +func m13(x int64) int64 { + // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4" + // arm64: "MOVD\t[$]13,", "MUL" + return x * 13 +} +func m14(x int64) int64 { + // amd64: "IMUL3Q\t[$]14," + // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+<<1," + return x * 14 +} +func m15(x int64) int64 { + // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4" + // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+," + return x * 15 +} +func m16(x int64) int64 { + // amd64: "SHLQ\t[$]4," + // arm64: "LSL\t[$]4," + return x * 16 +} +func m17(x int64) int64 { + // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8" + // arm64: "ADD\tR[0-9]+<<4," + return x * 17 +} +func m18(x int64) int64 { + // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8" + // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<3," + return x * 18 +} +func m19(x int64) int64 { + // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]2" + // arm64: "MOVD\t[$]19,", "MUL" + return x * 19 +} +func m20(x int64) int64 { + // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]2," + // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<2," + return x * 20 +} +func m21(x int64) int64 { + // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4" + // arm64: "MOVD\t[$]21,", "MUL" + return x * 21 +} +func m22(x int64) int64 { + // amd64: "IMUL3Q\t[$]22," + // arm64: "MOVD\t[$]22,", "MUL" + return x * 22 +} +func m23(x int64) int64 { + // amd64: "IMUL3Q\t[$]23," + // arm64: "MOVD\t[$]23,", "MUL" + return x * 23 +} +func m24(x int64) int64 { + // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]3," + // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<1," + return x * 24 +} +func m25(x int64) int64 { + // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4" + // arm64: "MOVD\t[$]25,", "MUL" + return x * 25 +} +func m26(x int64) int64 { + // amd64: "IMUL3Q\t[$]26," + // arm64: "MOVD\t[$]26,", "MUL" + return x * 26 +} +func m27(x int64) int64 { + // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]8" + // arm64: "MOVD\t[$]27,", "MUL" + return x * 27 +} +func m28(x int64) int64 { + // amd64: "IMUL3Q\t[$]28," + // arm64: "LSL\t[$]5, "SUB\tR[0-9]+<<2," + return x * 28 +} +func m29(x int64) int64 { + // amd64: "IMUL3Q\t[$]29," + // arm64: "MOVD\t[$]29,", "MUL" + return x * 29 +} +func m30(x int64) int64 { + // amd64: "IMUL3Q\t[$]30," + // arm64: "LSL\t[$]5,", "SUB\tR[0-9]+<<1," + return x * 30 +} +func m31(x int64) int64 { + // amd64: "SHLQ\t[$]5,", "SUBQ" + // arm64: "LSL\t[$]5,", "SUB\tR[0-9]+," + return x * 31 +} +func m32(x int64) int64 { + // amd64: "SHLQ\t[$]5," + // arm64: "LSL\t[$]5," + return x * 32 +} +func m33(x int64) int64 { + // amd64: "SHLQ\t[$]2,", "LEAQ\t.*[*]8" + // arm64: "ADD\tR[0-9]+<<5," + return x * 33 +} +func m34(x int64) int64 { + // amd64: "SHLQ\t[$]5,", "LEAQ\t.*[*]2" + // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<4," + return x * 34 +} +func m35(x int64) int64 { + // amd64: "IMUL3Q\t[$]35," + // arm64: "MOVD\t[$]35,", "MUL" + return x * 35 +} +func m36(x int64) int64 { + // amd64: "LEAQ\t.*[*]8", "SHLQ\t[$]2," + // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<3," + return x * 36 +} +func m37(x int64) int64 { + // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]4" + // arm64: "MOVD\t[$]37,", "MUL" + return x * 37 +} +func m38(x int64) int64 { + // amd64: "IMUL3Q\t[$]38," + // arm64: "MOVD\t[$]38,", "MUL" + return x * 38 +} +func m39(x int64) int64 { + // amd64: "IMUL3Q\t[$]39," + // arm64: "MOVD\t[$]39,", "MUL" + return x * 39 +} +func m40(x int64) int64 { + // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]3," + // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<2," + return x * 40 +} + +func mn1(x int64) int64 { + // amd64: "NEGQ\t" + // arm64: "NEG\tR[0-9]+," + return x * -1 +} +func mn2(x int64) int64 { + // amd64: "NEGQ", "ADDQ" + // arm64: "NEG\tR[0-9]+<<1," + return x * -2 +} +func mn3(x int64) int64 { + // amd64: "NEGQ", "LEAQ\t.*[*]2" + // arm64: "SUB\tR[0-9]+<<2," + return x * -3 +} +func mn4(x int64) int64 { + // amd64: "NEGQ", "SHLQ\t[$]2," + // arm64: "NEG\tR[0-9]+<<2," + return x * -4 +} +func mn5(x int64) int64 { + // amd64: "NEGQ", "LEAQ\t.*[*]4" + // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<2," + return x * -5 +} +func mn6(x int64) int64 { + // amd64: "IMUL3Q\t[$]-6," + // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<2," + return x * -6 +} +func mn7(x int64) int64 { + // amd64: "NEGQ", "LEAQ\t.*[*]8" + // arm64: "SUB\tR[0-9]+<<3," + return x * -7 +} +func mn8(x int64) int64 { + // amd64: "NEGQ", "SHLQ\t[$]3," + // arm64: "NEG\tR[0-9]+<<3," + return x * -8 +} +func mn9(x int64) int64 { + // amd64: "NEGQ", "LEAQ\t.*[*]8" + // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<3," + return x * -9 +} +func mn10(x int64) int64 { + // amd64: "IMUL3Q\t[$]-10," + // arm64: "MOVD\t[$]-10,", "MUL" + return x * -10 +} +func mn11(x int64) int64 { + // amd64: "IMUL3Q\t[$]-11," + // arm64: "MOVD\t[$]-11,", "MUL" + return x * -11 +} +func mn12(x int64) int64 { + // amd64: "IMUL3Q\t[$]-12," + // arm64: "LSL\t[$]2,", "SUB\tR[0-9]+<<2," + return x * -12 +} +func mn13(x int64) int64 { + // amd64: "IMUL3Q\t[$]-13," + // arm64: "MOVD\t[$]-13,", "MUL" + return x * -13 +} +func mn14(x int64) int64 { + // amd64: "IMUL3Q\t[$]-14," + // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<3," + return x * -14 +} +func mn15(x int64) int64 { + // amd64: "SHLQ\t[$]4,", "SUBQ" + // arm64: "SUB\tR[0-9]+<<4," + return x * -15 +} +func mn16(x int64) int64 { + // amd64: "NEGQ", "SHLQ\t[$]4," + // arm64: "NEG\tR[0-9]+<<4," + return x * -16 +} +func mn17(x int64) int64 { + // amd64: "IMUL3Q\t[$]-17," + // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<4," + return x * -17 +} +func mn18(x int64) int64 { + // amd64: "IMUL3Q\t[$]-18," + // arm64: "MOVD\t[$]-18,", "MUL" + return x * -18 +} +func mn19(x int64) int64 { + // amd64: "IMUL3Q\t[$]-19," + // arm64: "MOVD\t[$]-19,", "MUL" + return x * -19 +} +func mn20(x int64) int64 { + // amd64: "IMUL3Q\t[$]-20," + // arm64: "MOVD\t[$]-20,", "MUL" + return x * -20 +}