mirror of
https://github.com/golang/go.git
synced 2025-05-18 22:04:38 +00:00
cmd/compile: add s390x intrinsics for Ceil, Floor, Round and Trunc
Ceil, Floor and Trunc are pre-existing intrinsics. Round is a new function and has been added as an intrinsic in this CL. All of the functions can be implemented as a single 'LOAD FP INTEGER' instruction, FIDBR, on s390x. name old time/op new time/op delta Ceil 2.34ns ± 0% 0.85ns ± 0% -63.74% (p=0.000 n=5+4) Floor 2.33ns ± 0% 0.85ns ± 1% -63.35% (p=0.008 n=5+5) Round 4.23ns ± 0% 0.85ns ± 0% -79.89% (p=0.000 n=5+4) Trunc 2.35ns ± 0% 0.85ns ± 0% -63.83% (p=0.029 n=4+4) Change-Id: Idee7ba24a2899d12bf9afee4eedd6b4aaad3c510 Reviewed-on: https://go-review.googlesource.com/63890 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
8802b188c6
commit
7582494e06
@ -1457,6 +1457,39 @@ var linuxS390XTests = []*asmTest{
|
||||
`,
|
||||
pos: []string{"\tFLOGR\t"},
|
||||
},
|
||||
// Intrinsic tests for math.
|
||||
{
|
||||
fn: `
|
||||
func ceil(x float64) float64 {
|
||||
return math.Ceil(x)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tFIDBR\t[$]6"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func floor(x float64) float64 {
|
||||
return math.Floor(x)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tFIDBR\t[$]7"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func round(x float64) float64 {
|
||||
return math.Round(x)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tFIDBR\t[$]1"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func trunc(x float64) float64 {
|
||||
return math.Trunc(x)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tFIDBR\t[$]5"},
|
||||
},
|
||||
{
|
||||
// check that stack store is optimized away
|
||||
fn: `
|
||||
|
@ -2734,17 +2734,22 @@ func init() {
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
|
||||
},
|
||||
sys.PPC64)
|
||||
sys.PPC64, sys.S390X)
|
||||
addF("math", "Ceil",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
|
||||
},
|
||||
sys.PPC64)
|
||||
sys.PPC64, sys.S390X)
|
||||
addF("math", "Floor",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
|
||||
},
|
||||
sys.PPC64)
|
||||
sys.PPC64, sys.S390X)
|
||||
addF("math", "Round",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
|
||||
},
|
||||
sys.S390X)
|
||||
|
||||
/******** math/bits ********/
|
||||
addF("math/bits", "TrailingZeros64",
|
||||
|
@ -207,6 +207,13 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p.Reg = r2
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
case ssa.OpS390XFIDBR:
|
||||
switch v.AuxInt {
|
||||
case 0, 1, 3, 4, 5, 6, 7:
|
||||
opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), v.AuxInt)
|
||||
default:
|
||||
v.Fatalf("invalid FIDBR mask: %v", v.AuxInt)
|
||||
}
|
||||
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
|
||||
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
|
||||
ssa.OpS390XMODD, ssa.OpS390XMODW,
|
||||
|
@ -107,7 +107,12 @@
|
||||
(Bswap64 x) -> (MOVDBR x)
|
||||
(Bswap32 x) -> (MOVWBR x)
|
||||
|
||||
// math package intrinsics
|
||||
(Sqrt x) -> (FSQRT x)
|
||||
(Floor x) -> (FIDBR [7] x)
|
||||
(Ceil x) -> (FIDBR [6] x)
|
||||
(Trunc x) -> (FIDBR [5] x)
|
||||
(Round x) -> (FIDBR [1] x)
|
||||
|
||||
// Atomic loads.
|
||||
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
|
||||
|
@ -206,6 +206,17 @@ func init() {
|
||||
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0
|
||||
{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0
|
||||
|
||||
// Round to integer, float64 only.
|
||||
//
|
||||
// aux | rounding mode
|
||||
// ----+-----------------------------------
|
||||
// 1 | round to nearest, ties away from 0
|
||||
// 4 | round to nearest, ties to even
|
||||
// 5 | round toward 0
|
||||
// 6 | round toward +∞
|
||||
// 7 | round toward -∞
|
||||
{name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"},
|
||||
|
||||
{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load
|
||||
{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load
|
||||
{name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true}, // fp32 constant
|
||||
|
@ -255,10 +255,23 @@ var genericOps = []opData{
|
||||
{name: "PopCount32", argLength: 1}, // Count bits in arg[0]
|
||||
{name: "PopCount64", argLength: 1}, // Count bits in arg[0]
|
||||
|
||||
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
|
||||
{name: "Floor", argLength: 1}, // floor(arg0), float64 only
|
||||
{name: "Ceil", argLength: 1}, // ceil(arg0), float64 only
|
||||
{name: "Trunc", argLength: 1}, // trunc(arg0), float64 only
|
||||
// Square root, float64 only.
|
||||
// Special cases:
|
||||
// +∞ → +∞
|
||||
// ±0 → ±0 (sign preserved)
|
||||
// x<0 → NaN
|
||||
// NaN → NaN
|
||||
{name: "Sqrt", argLength: 1}, // √arg0
|
||||
|
||||
// Round to integer, float64 only.
|
||||
// Special cases:
|
||||
// ±∞ → ±∞ (sign preserved)
|
||||
// ±0 → ±0 (sign preserved)
|
||||
// NaN → NaN
|
||||
{name: "Floor", argLength: 1}, // round arg0 toward -∞
|
||||
{name: "Ceil", argLength: 1}, // round arg0 toward +∞
|
||||
{name: "Trunc", argLength: 1}, // round arg0 toward 0
|
||||
{name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
|
||||
|
||||
// Data movement, max argument length for Phi is indefinite so just pick
|
||||
// a really large number
|
||||
|
@ -1448,6 +1448,7 @@ const (
|
||||
OpS390XFMADD
|
||||
OpS390XFMSUBS
|
||||
OpS390XFMSUB
|
||||
OpS390XFIDBR
|
||||
OpS390XFMOVSload
|
||||
OpS390XFMOVDload
|
||||
OpS390XFMOVSconst
|
||||
@ -1836,6 +1837,7 @@ const (
|
||||
OpFloor
|
||||
OpCeil
|
||||
OpTrunc
|
||||
OpRound
|
||||
OpPhi
|
||||
OpCopy
|
||||
OpConvert
|
||||
@ -18602,6 +18604,20 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FIDBR",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
asm: s390x.AFIDBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMOVSload",
|
||||
auxType: auxSymOff,
|
||||
@ -22437,6 +22453,11 @@ var opcodeTable = [...]opInfo{
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Round",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Phi",
|
||||
argLen: -1,
|
||||
|
@ -73,6 +73,8 @@ func rewriteValueS390X(v *Value) bool {
|
||||
return rewriteValueS390X_OpBswap32_0(v)
|
||||
case OpBswap64:
|
||||
return rewriteValueS390X_OpBswap64_0(v)
|
||||
case OpCeil:
|
||||
return rewriteValueS390X_OpCeil_0(v)
|
||||
case OpClosureCall:
|
||||
return rewriteValueS390X_OpClosureCall_0(v)
|
||||
case OpCom16:
|
||||
@ -161,6 +163,8 @@ func rewriteValueS390X(v *Value) bool {
|
||||
return rewriteValueS390X_OpEqB_0(v)
|
||||
case OpEqPtr:
|
||||
return rewriteValueS390X_OpEqPtr_0(v)
|
||||
case OpFloor:
|
||||
return rewriteValueS390X_OpFloor_0(v)
|
||||
case OpGeq16:
|
||||
return rewriteValueS390X_OpGeq16_0(v)
|
||||
case OpGeq16U:
|
||||
@ -371,6 +375,8 @@ func rewriteValueS390X(v *Value) bool {
|
||||
return rewriteValueS390X_OpOr8_0(v)
|
||||
case OpOrB:
|
||||
return rewriteValueS390X_OpOrB_0(v)
|
||||
case OpRound:
|
||||
return rewriteValueS390X_OpRound_0(v)
|
||||
case OpRound32F:
|
||||
return rewriteValueS390X_OpRound32F_0(v)
|
||||
case OpRound64F:
|
||||
@ -685,6 +691,8 @@ func rewriteValueS390X(v *Value) bool {
|
||||
return rewriteValueS390X_OpSub8_0(v)
|
||||
case OpSubPtr:
|
||||
return rewriteValueS390X_OpSubPtr_0(v)
|
||||
case OpTrunc:
|
||||
return rewriteValueS390X_OpTrunc_0(v)
|
||||
case OpTrunc16to8:
|
||||
return rewriteValueS390X_OpTrunc16to8_0(v)
|
||||
case OpTrunc32to16:
|
||||
@ -1172,6 +1180,18 @@ func rewriteValueS390X_OpBswap64_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpCeil_0(v *Value) bool {
|
||||
// match: (Ceil x)
|
||||
// cond:
|
||||
// result: (FIDBR [6] x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpS390XFIDBR)
|
||||
v.AuxInt = 6
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpClosureCall_0(v *Value) bool {
|
||||
// match: (ClosureCall [argwid] entry closure mem)
|
||||
// cond:
|
||||
@ -1911,6 +1931,18 @@ func rewriteValueS390X_OpEqPtr_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpFloor_0(v *Value) bool {
|
||||
// match: (Floor x)
|
||||
// cond:
|
||||
// result: (FIDBR [7] x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpS390XFIDBR)
|
||||
v.AuxInt = 7
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpGeq16_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
@ -4913,6 +4945,18 @@ func rewriteValueS390X_OpOrB_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpRound_0(v *Value) bool {
|
||||
// match: (Round x)
|
||||
// cond:
|
||||
// result: (FIDBR [1] x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpS390XFIDBR)
|
||||
v.AuxInt = 1
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpRound32F_0(v *Value) bool {
|
||||
// match: (Round32F x)
|
||||
// cond:
|
||||
@ -36200,6 +36244,18 @@ func rewriteValueS390X_OpSubPtr_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpTrunc_0(v *Value) bool {
|
||||
// match: (Trunc x)
|
||||
// cond:
|
||||
// result: (FIDBR [5] x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpS390XFIDBR)
|
||||
v.AuxInt = 5
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueS390X_OpTrunc16to8_0(v *Value) bool {
|
||||
// match: (Trunc16to8 x)
|
||||
// cond:
|
||||
|
Loading…
x
Reference in New Issue
Block a user