mirror of
https://github.com/golang/go.git
synced 2025-05-30 11:51:34 +00:00
cmd/compile: make math.Ceil/Floor/Round/Trunc intrinsics on arm64
name old time/op new time/op delta Ceil 550ns ± 0% 486ns ± 7% -11.64% (p=0.000 n=13+18) Floor 495ns ±19% 512ns ±12% ~ (p=0.164 n=20+20) Round 550ns ± 0% 487ns ± 8% -11.49% (p=0.000 n=12+19) Trunc 563ns ± 7% 488ns ±13% -13.44% (p=0.000 n=15+2) Change-Id: I53f234b160b3c026a277506e2cf977d150379464 Reviewed-on: https://go-review.googlesource.com/88295 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
ba99433d33
commit
07f0f09563
@ -559,7 +559,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
ssa.OpARM64RBIT,
|
ssa.OpARM64RBIT,
|
||||||
ssa.OpARM64RBITW,
|
ssa.OpARM64RBITW,
|
||||||
ssa.OpARM64CLZ,
|
ssa.OpARM64CLZ,
|
||||||
ssa.OpARM64CLZW:
|
ssa.OpARM64CLZW,
|
||||||
|
ssa.OpARM64FRINTAD,
|
||||||
|
ssa.OpARM64FRINTMD,
|
||||||
|
ssa.OpARM64FRINTPD,
|
||||||
|
ssa.OpARM64FRINTZD:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
p.From.Reg = v.Args[0].Reg()
|
p.From.Reg = v.Args[0].Reg()
|
||||||
|
@ -248,7 +248,7 @@ var allAsmTests = []*asmTests{
|
|||||||
{
|
{
|
||||||
arch: "arm64",
|
arch: "arm64",
|
||||||
os: "linux",
|
os: "linux",
|
||||||
imports: []string{"encoding/binary", "math/bits"},
|
imports: []string{"encoding/binary", "math", "math/bits"},
|
||||||
tests: linuxARM64Tests,
|
tests: linuxARM64Tests,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -2849,6 +2849,47 @@ var linuxARM64Tests = []*asmTest{
|
|||||||
pos: []string{"\tMOVHU\t\\(R[0-9]+\\)"},
|
pos: []string{"\tMOVHU\t\\(R[0-9]+\\)"},
|
||||||
neg: []string{"ORR\tR[0-9]+<<8\t"},
|
neg: []string{"ORR\tR[0-9]+<<8\t"},
|
||||||
},
|
},
|
||||||
|
// Intrinsic tests for math.
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func sqrt(x float64) float64 {
|
||||||
|
return math.Sqrt(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"FSQRTD"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func ceil(x float64) float64 {
|
||||||
|
return math.Ceil(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"FRINTPD"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func floor(x float64) float64 {
|
||||||
|
return math.Floor(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"FRINTMD"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func round(x float64) float64 {
|
||||||
|
return math.Round(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"FRINTAD"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func trunc(x float64) float64 {
|
||||||
|
return math.Trunc(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"FRINTZD"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var linuxMIPSTests = []*asmTest{
|
var linuxMIPSTests = []*asmTest{
|
||||||
|
@ -2918,22 +2918,22 @@ func init() {
|
|||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
|
||||||
},
|
},
|
||||||
sys.PPC64, sys.S390X)
|
sys.ARM64, sys.PPC64, sys.S390X)
|
||||||
addF("math", "Ceil",
|
addF("math", "Ceil",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
|
||||||
},
|
},
|
||||||
sys.PPC64, sys.S390X)
|
sys.ARM64, sys.PPC64, sys.S390X)
|
||||||
addF("math", "Floor",
|
addF("math", "Floor",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
|
||||||
},
|
},
|
||||||
sys.PPC64, sys.S390X)
|
sys.ARM64, sys.PPC64, sys.S390X)
|
||||||
addF("math", "Round",
|
addF("math", "Round",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
|
||||||
},
|
},
|
||||||
sys.S390X)
|
sys.ARM64, sys.S390X)
|
||||||
addF("math", "RoundToEven",
|
addF("math", "RoundToEven",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
|
||||||
|
@ -81,7 +81,12 @@
|
|||||||
(Com16 x) -> (MVN x)
|
(Com16 x) -> (MVN x)
|
||||||
(Com8 x) -> (MVN x)
|
(Com8 x) -> (MVN x)
|
||||||
|
|
||||||
|
// math package intrinsics
|
||||||
(Sqrt x) -> (FSQRTD x)
|
(Sqrt x) -> (FSQRTD x)
|
||||||
|
(Ceil x) -> (FRINTPD x)
|
||||||
|
(Floor x) -> (FRINTMD x)
|
||||||
|
(Round x) -> (FRINTAD x)
|
||||||
|
(Trunc x) -> (FRINTZD x)
|
||||||
|
|
||||||
(Ctz64 <t> x) -> (CLZ (RBIT <t> x))
|
(Ctz64 <t> x) -> (CLZ (RBIT <t> x))
|
||||||
(Ctz32 <t> x) -> (CLZW (RBITW <t> x))
|
(Ctz32 <t> x) -> (CLZW (RBITW <t> x))
|
||||||
|
@ -323,6 +323,12 @@ func init() {
|
|||||||
{name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"}, // float32 -> float64
|
{name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"}, // float32 -> float64
|
||||||
{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"}, // float64 -> float32
|
{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"}, // float64 -> float32
|
||||||
|
|
||||||
|
// floating-point round to integral
|
||||||
|
{name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"},
|
||||||
|
{name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"},
|
||||||
|
{name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"},
|
||||||
|
{name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"},
|
||||||
|
|
||||||
// conditional instructions
|
// conditional instructions
|
||||||
{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
|
{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
|
||||||
{name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
|
{name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
|
||||||
|
@ -1093,6 +1093,10 @@ const (
|
|||||||
OpARM64FCVTZUD
|
OpARM64FCVTZUD
|
||||||
OpARM64FCVTSD
|
OpARM64FCVTSD
|
||||||
OpARM64FCVTDS
|
OpARM64FCVTDS
|
||||||
|
OpARM64FRINTAD
|
||||||
|
OpARM64FRINTMD
|
||||||
|
OpARM64FRINTPD
|
||||||
|
OpARM64FRINTZD
|
||||||
OpARM64CSELULT
|
OpARM64CSELULT
|
||||||
OpARM64CSELULT0
|
OpARM64CSELULT0
|
||||||
OpARM64CALLstatic
|
OpARM64CALLstatic
|
||||||
@ -13971,6 +13975,58 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "FRINTAD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: arm64.AFRINTAD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "FRINTMD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: arm64.AFRINTMD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "FRINTPD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: arm64.AFRINTPD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "FRINTZD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: arm64.AFRINTZD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "CSELULT",
|
name: "CSELULT",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
@ -289,6 +289,8 @@ func rewriteValueARM64(v *Value) bool {
|
|||||||
return rewriteValueARM64_OpBswap32_0(v)
|
return rewriteValueARM64_OpBswap32_0(v)
|
||||||
case OpBswap64:
|
case OpBswap64:
|
||||||
return rewriteValueARM64_OpBswap64_0(v)
|
return rewriteValueARM64_OpBswap64_0(v)
|
||||||
|
case OpCeil:
|
||||||
|
return rewriteValueARM64_OpCeil_0(v)
|
||||||
case OpClosureCall:
|
case OpClosureCall:
|
||||||
return rewriteValueARM64_OpClosureCall_0(v)
|
return rewriteValueARM64_OpClosureCall_0(v)
|
||||||
case OpCom16:
|
case OpCom16:
|
||||||
@ -393,6 +395,8 @@ func rewriteValueARM64(v *Value) bool {
|
|||||||
return rewriteValueARM64_OpEqB_0(v)
|
return rewriteValueARM64_OpEqB_0(v)
|
||||||
case OpEqPtr:
|
case OpEqPtr:
|
||||||
return rewriteValueARM64_OpEqPtr_0(v)
|
return rewriteValueARM64_OpEqPtr_0(v)
|
||||||
|
case OpFloor:
|
||||||
|
return rewriteValueARM64_OpFloor_0(v)
|
||||||
case OpGeq16:
|
case OpGeq16:
|
||||||
return rewriteValueARM64_OpGeq16_0(v)
|
return rewriteValueARM64_OpGeq16_0(v)
|
||||||
case OpGeq16U:
|
case OpGeq16U:
|
||||||
@ -607,6 +611,8 @@ func rewriteValueARM64(v *Value) bool {
|
|||||||
return rewriteValueARM64_OpPopCount32_0(v)
|
return rewriteValueARM64_OpPopCount32_0(v)
|
||||||
case OpPopCount64:
|
case OpPopCount64:
|
||||||
return rewriteValueARM64_OpPopCount64_0(v)
|
return rewriteValueARM64_OpPopCount64_0(v)
|
||||||
|
case OpRound:
|
||||||
|
return rewriteValueARM64_OpRound_0(v)
|
||||||
case OpRound32F:
|
case OpRound32F:
|
||||||
return rewriteValueARM64_OpRound32F_0(v)
|
return rewriteValueARM64_OpRound32F_0(v)
|
||||||
case OpRound64F:
|
case OpRound64F:
|
||||||
@ -709,6 +715,8 @@ func rewriteValueARM64(v *Value) bool {
|
|||||||
return rewriteValueARM64_OpSub8_0(v)
|
return rewriteValueARM64_OpSub8_0(v)
|
||||||
case OpSubPtr:
|
case OpSubPtr:
|
||||||
return rewriteValueARM64_OpSubPtr_0(v)
|
return rewriteValueARM64_OpSubPtr_0(v)
|
||||||
|
case OpTrunc:
|
||||||
|
return rewriteValueARM64_OpTrunc_0(v)
|
||||||
case OpTrunc16to8:
|
case OpTrunc16to8:
|
||||||
return rewriteValueARM64_OpTrunc16to8_0(v)
|
return rewriteValueARM64_OpTrunc16to8_0(v)
|
||||||
case OpTrunc32to16:
|
case OpTrunc32to16:
|
||||||
@ -11318,6 +11326,17 @@ func rewriteValueARM64_OpBswap64_0(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueARM64_OpCeil_0(v *Value) bool {
|
||||||
|
// match: (Ceil x)
|
||||||
|
// cond:
|
||||||
|
// result: (FRINTPD x)
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpARM64FRINTPD)
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueARM64_OpClosureCall_0(v *Value) bool {
|
func rewriteValueARM64_OpClosureCall_0(v *Value) bool {
|
||||||
// match: (ClosureCall [argwid] entry closure mem)
|
// match: (ClosureCall [argwid] entry closure mem)
|
||||||
// cond:
|
// cond:
|
||||||
@ -12044,6 +12063,17 @@ func rewriteValueARM64_OpEqPtr_0(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueARM64_OpFloor_0(v *Value) bool {
|
||||||
|
// match: (Floor x)
|
||||||
|
// cond:
|
||||||
|
// result: (FRINTMD x)
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpARM64FRINTMD)
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueARM64_OpGeq16_0(v *Value) bool {
|
func rewriteValueARM64_OpGeq16_0(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
@ -14717,6 +14747,17 @@ func rewriteValueARM64_OpPopCount64_0(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueARM64_OpRound_0(v *Value) bool {
|
||||||
|
// match: (Round x)
|
||||||
|
// cond:
|
||||||
|
// result: (FRINTAD x)
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpARM64FRINTAD)
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueARM64_OpRound32F_0(v *Value) bool {
|
func rewriteValueARM64_OpRound32F_0(v *Value) bool {
|
||||||
// match: (Round32F x)
|
// match: (Round32F x)
|
||||||
// cond:
|
// cond:
|
||||||
@ -16079,6 +16120,17 @@ func rewriteValueARM64_OpSubPtr_0(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueARM64_OpTrunc_0(v *Value) bool {
|
||||||
|
// match: (Trunc x)
|
||||||
|
// cond:
|
||||||
|
// result: (FRINTZD x)
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpARM64FRINTZD)
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueARM64_OpTrunc16to8_0(v *Value) bool {
|
func rewriteValueARM64_OpTrunc16to8_0(v *Value) bool {
|
||||||
// match: (Trunc16to8 x)
|
// match: (Trunc16to8 x)
|
||||||
// cond:
|
// cond:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user