cmd/compile: rework PPC64 Mul64uhilo lowering rules

Remove OpPPC64LoweredMuluhilo as this operation can be done
more efficiently with MULHDU and MULLD directly. This has the
benefit of not needing to use tuple select operations, and giving
the scheduler more freedom to place these operations.

The primary reason to avoid using tuples here is to to avoid
suboptimal scheduling when carry ops (e.x ADDC/ADDE) are used in
the same block as 64->128b multiples. CL 432275 modifies the
scheduling priorities which may cause non-flag/non-carry generating
tuple ops to interfere with carry opcodes. Thus resulting in excess
saving and restoring of the XER register.

This allows CL 432275 to adjust the scheduling priorities without
having to workaround odd tuple scheduling behavior.

Change-Id: Id04ef009ec4b86416e5436f2b44ae1474e73720e
Reviewed-on: https://go-review.googlesource.com/c/go/+/434855
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Paul E. Murphy 2022-09-26 15:26:05 -05:00 committed by Paul Murphy
parent caa8e1f977
commit 871a3a409a
5 changed files with 26 additions and 40 deletions

View File

@ -125,24 +125,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = y
}
case ssa.OpPPC64LoweredMuluhilo:
// MULHDU Rarg1, Rarg0, Reg0
// MULLD Rarg1, Rarg0, Reg1
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
p := s.Prog(ppc64.AMULHDU)
p.From.Type = obj.TYPE_REG
p.From.Reg = r1
p.Reg = r0
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
p1 := s.Prog(ppc64.AMULLD)
p1.From.Type = obj.TYPE_REG
p1.From.Reg = r1
p1.Reg = r0
p1.To.Type = obj.TYPE_REG
p1.To.Reg = v.Reg1()
case ssa.OpPPC64LoweredAtomicAnd8,
ssa.OpPPC64LoweredAtomicAnd32,
ssa.OpPPC64LoweredAtomicOr8,

View File

@ -32,7 +32,8 @@
(Mul64 ...) => (MULLD ...)
(Mul(32|16|8) ...) => (MULLW ...)
(Mul64uhilo ...) => (LoweredMuluhilo ...)
(Select0 (Mul64uhilo x y)) => (MULHDU x y)
(Select1 (Mul64uhilo x y)) => (MULLD x y)
(Div64 [false] x y) => (DIVD x y)
(Div64u ...) => (DIVDU ...)

View File

@ -148,7 +148,6 @@ func init() {
gp21xer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
gp2xer1xer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
crgp = regInfo{inputs: nil, outputs: []regMask{gp}}
@ -199,7 +198,6 @@ func init() {
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1

View File

@ -2087,7 +2087,6 @@ const (
OpPPC64MULHW
OpPPC64MULHDU
OpPPC64MULHWU
OpPPC64LoweredMuluhilo
OpPPC64FMUL
OpPPC64FMULS
OpPPC64FMADD
@ -27944,21 +27943,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredMuluhilo",
argLen: 2,
resultNotInArgs: true,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "FMUL",
argLen: 2,

View File

@ -368,9 +368,6 @@ func rewriteValuePPC64(v *Value) bool {
case OpMul64F:
v.Op = OpPPC64FMUL
return true
case OpMul64uhilo:
v.Op = OpPPC64LoweredMuluhilo
return true
case OpMul8:
v.Op = OpPPC64MULLW
return true
@ -16225,6 +16222,18 @@ func rewriteValuePPC64_OpSelect0(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Select0 (Mul64uhilo x y))
// result: (MULHDU x y)
for {
if v_0.Op != OpMul64uhilo {
break
}
y := v_0.Args[1]
x := v_0.Args[0]
v.reset(OpPPC64MULHDU)
v.AddArg2(x, y)
return true
}
// match: (Select0 (Add64carry x y c))
// result: (Select0 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1]))))
for {
@ -16573,6 +16582,18 @@ func rewriteValuePPC64_OpSelect1(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Select1 (Mul64uhilo x y))
// result: (MULLD x y)
for {
if v_0.Op != OpMul64uhilo {
break
}
y := v_0.Args[1]
x := v_0.Args[0]
v.reset(OpPPC64MULLD)
v.AddArg2(x, y)
return true
}
// match: (Select1 (Add64carry x y c))
// result: (ADDZEzero (Select1 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1])))))
for {