cmd/compile: use zero register instead of specialized *zero instructions

This lets us get rid of lots of specialized opcodes for storing zero.
Instead, use regular store opcodes that just happen to use the zero
register as one of their inputs.

Change-Id: I2902a6f9b0831cb598df45189ca6bb57221bef72
Reviewed-on: https://go-review.googlesource.com/c/go/+/633075
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
khr@golang.org 2024-11-24 15:29:56 -08:00 committed by Keith Randall
parent 2d050e91a3
commit e373771490
12 changed files with 795 additions and 2348 deletions

View File

@ -147,13 +147,13 @@ func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
switch op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8,
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
mop.Index = arm64.REG_LSL | 3<<5 | idx&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4,
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
mop.Index = arm64.REG_LSL | 2<<5 | idx&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2:
mop.Index = arm64.REG_LSL | 1<<5 | idx&31
default: // not shifted
mop.Index = idx
@ -188,7 +188,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
case ssa.OpARM64MOVDnop:
case ssa.OpARM64MOVDnop, ssa.OpARM64ZERO:
// nothing to do
case ssa.OpLoadReg:
if v.Type.IsFlags() {
@ -591,35 +591,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.To, v)
case ssa.OpARM64MOVBstorezero,
ssa.OpARM64MOVHstorezero,
ssa.OpARM64MOVWstorezero,
ssa.OpARM64MOVDstorezero:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.To, v)
case ssa.OpARM64MOVBstorezeroidx,
ssa.OpARM64MOVHstorezeroidx,
ssa.OpARM64MOVWstorezeroidx,
ssa.OpARM64MOVDstorezeroidx,
ssa.OpARM64MOVHstorezeroidx2,
ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64MOVDstorezeroidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
case ssa.OpARM64MOVQstorezero:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
p.From.Reg = arm64.REGZERO
p.From.Offset = int64(arm64.REGZERO)
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.To, v)
case ssa.OpARM64BFI,
ssa.OpARM64BFXIL:
p := s.Prog(v.Op.Asm())

View File

@ -485,9 +485,6 @@
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i] ptr mem)
(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem)
// strip off fractional word move
(Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
(Move [8]
@ -817,21 +814,6 @@
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDstore [off1+int32(off2)] {sym} ptr val mem)
(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
(MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
(MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
(MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
// register indexed store
(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
@ -947,70 +929,6 @@
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// store zero
(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
(MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem)
(MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
(STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) => (MOVQstorezero [off] {sym} ptr mem)
// register indexed store zero
(MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx ptr idx mem)
(MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx ptr idx mem)
(MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx ptr idx mem)
(MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBstorezeroidx ptr idx mem)
(MOVDstoreidx ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx ptr idx mem)
(MOVWstoreidx ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx ptr idx mem)
(MOVHstoreidx ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx ptr idx mem)
(MOVBstoreidx ptr idx (MOVDconst [0]) mem) => (MOVBstorezeroidx ptr idx mem)
(MOVDstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDstorezero [int32(c)] ptr mem)
(MOVDstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVDstorezero [int32(c)] idx mem)
(MOVWstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWstorezero [int32(c)] ptr mem)
(MOVWstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVWstorezero [int32(c)] idx mem)
(MOVHstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHstorezero [int32(c)] ptr mem)
(MOVHstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVHstorezero [int32(c)] idx mem)
(MOVBstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBstorezero [int32(c)] ptr mem)
(MOVBstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVBstorezero [int32(c)] idx mem)
// shifted register indexed store zero
(MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDstorezero [int32(c<<3)] ptr mem)
(MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWstorezero [int32(c<<2)] ptr mem)
(MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHstorezero [int32(c<<1)] ptr mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code
@ -1025,35 +943,6 @@
//(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y
(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
(MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
(MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
// don't extend before store
(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)

View File

@ -47,7 +47,7 @@ var regNamesARM64 = []string{
"R15",
"R16",
"R17",
"R18", // platform register, not used
// R18 = platform register, not used
"R19",
"R20",
"R21",
@ -60,7 +60,20 @@ var regNamesARM64 = []string{
"g", // aka R28
"R29", // frame pointer, not used
"R30", // aka REGLINK
"SP", // aka R31
"ZERO", // zero register (aka R31)
"SP", // stack pointer (aka R31)
// Note: both ZERO and SP are register number 31!
// What r31 means in a particular instruction depends on
// the instruction. Generally, for arguments of instructions
// which are addresses to load or store from, r31 means SP.
// In other instructions, r31 means ZERO. But there are
// exceptions.
// See https://stackoverflow.com/questions/61532867
// This does not have much of an effect here, as the
// cmd/internal/obj/arm64 interface treats them as two
// different registers and picks the right instruction
// that encodes what r31 means. But see issue 71651.
"F0",
"F1",
@ -125,16 +138,17 @@ func init() {
// Common individual register masks
var (
gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30")
gpg = gp | buildReg("g")
gpsp = gp | buildReg("SP")
gpspg = gpg | buildReg("SP")
gpspsbg = gpspg | buildReg("SB")
gpg = gp | buildReg("g") | buildReg("ZERO")
gpsp = gp | buildReg("SP") | buildReg("ZERO")
gpspg = gpg | buildReg("SP") | buildReg("ZERO")
gpspsbg = gpspg | buildReg("SB") | buildReg("ZERO")
fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31")
callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
r0 = buildReg("R0")
r1 = buildReg("R1")
r2 = buildReg("R2")
r3 = buildReg("R3")
rz = buildReg("ZERO") // TODO: when 71651 is fixed, we might be able to remove uses of this
)
// Common regInfo
var (
@ -151,15 +165,14 @@ func init() {
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags1flags = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp, 0}}
gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gp2load = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg}, outputs: []regMask{gp}}
gp31 = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gpload2 = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gpg, gpg}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg &^ rz}, outputs: []regMask{gp}}
gpload2 = regInfo{inputs: []regMask{gpspsbg &^ rz}, outputs: []regMask{gpg, gpg}}
gpstore = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg}}
gpstore2 = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg, gpg}}
gpxchg = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg}, outputs: []regMask{gp}}
gpcas = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg, gpg}, outputs: []regMask{gp}}
fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
@ -168,12 +181,12 @@ func init() {
fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
fp2flags = regInfo{inputs: []regMask{fp, fp}}
fp1flags = regInfo{inputs: []regMask{fp}}
fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
fpload2 = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp, fp}}
fp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}}
fpstore = regInfo{inputs: []regMask{gpspsbg, fp}}
fpstoreidx = regInfo{inputs: []regMask{gpspsbg, gpg, fp}}
fpstore2 = regInfo{inputs: []regMask{gpspsbg, fp, fp}}
fpload = regInfo{inputs: []regMask{gpspsbg &^ rz}, outputs: []regMask{fp}}
fpload2 = regInfo{inputs: []regMask{gpspsbg &^ rz}, outputs: []regMask{fp, fp}}
fp2load = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg}, outputs: []regMask{fp}}
fpstore = regInfo{inputs: []regMask{gpspsbg &^ rz, fp}}
fpstoreidx = regInfo{inputs: []regMask{gpspsbg &^ rz, gpg, fp}}
fpstore2 = regInfo{inputs: []regMask{gpspsbg &^ rz, fp, fp}}
readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
prefreg = regInfo{inputs: []regMask{gpspsbg}}
)
@ -445,23 +458,6 @@ func init() {
{name: "FMOVSstoreidx4", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem.
{name: "FMOVDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem.
// register indexed store zero
{name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
{name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
// shifted register indexed store zero
{name: "MOVHstorezeroidx2", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1*2, arg2 = mem.
{name: "MOVWstorezeroidx4", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1*4, arg2 = mem.
{name: "MOVDstorezeroidx8", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1*8, arg2 = mem.
{name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
{name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
{name: "FMOVSgpfp", argLength: 1, reg: gpfp, asm: "FMOVS"}, // move 32bits from int to float reg (no conversion)
@ -520,7 +516,7 @@ func init() {
{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem
// pseudo-ops
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem.
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg &^ rz}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem.
{name: "Equal", argLength: 1, reg: readflags}, // bool, true flags encode x==y false otherwise.
{name: "NotEqual", argLength: 1, reg: readflags}, // bool, true flags encode x!=y false otherwise.
@ -777,6 +773,7 @@ func init() {
// Publication barrier
{name: "DMB", argLength: 1, aux: "Int64", asm: "DMB", hasSideEffects: true}, // Do data barrier. arg0=memory, aux=option.
{name: "ZERO", zeroWidth: true, fixedReg: true}, // reads-as-zero register
}
blocks := []blockData{

View File

@ -93,3 +93,6 @@
// TODO: we should be able to get rid of MOVDnop all together.
// But for now, this is enough to get rid of lots of them.
(MOVDnop (MOVDconst [c])) => (MOVDconst [c])
// use zero register
(MOVDconst [0]) => (ZERO)

View File

@ -363,8 +363,8 @@ var genericOps = []opData{
{name: "Addr", argLength: 1, aux: "Sym", symEffect: "Addr"}, // Address of a variable. Arg0=SB. Aux identifies the variable.
{name: "LocalAddr", argLength: 2, aux: "Sym", symEffect: "Addr"}, // Address of a variable. Arg0=SP. Arg1=mem. Aux identifies the variable.
{name: "SP", zeroWidth: true}, // stack pointer
{name: "SB", typ: "Uintptr", zeroWidth: true}, // static base pointer (a.k.a. globals pointer)
{name: "SP", zeroWidth: true, fixedReg: true}, // stack pointer
{name: "SB", typ: "Uintptr", zeroWidth: true, fixedReg: true}, // static base pointer (a.k.a. globals pointer)
{name: "Invalid"}, // unused value
{name: "SPanchored", typ: "Uintptr", argLength: 2, zeroWidth: true}, // arg0 = SP, arg1 = mem. Result is identical to arg0, but cannot be scheduled before memory state arg1.

View File

@ -69,6 +69,7 @@ type opData struct {
hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182.
zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width.
unsafePoint bool // this op is an unsafe point, i.e. not safe for async preemption
fixedReg bool // this op will be assigned a fixed register
symEffect string // effect this op has on symbol in aux
scale uint8 // amd64/386 indexed load scale
}
@ -338,6 +339,9 @@ func genOp() {
if v.zeroWidth {
fmt.Fprintln(w, "zeroWidth: true,")
}
if v.fixedReg {
fmt.Fprintln(w, "fixedReg: true,")
}
if v.unsafePoint {
fmt.Fprintln(w, "unsafePoint: true,")
}
@ -439,6 +443,8 @@ func genOp() {
objname = pkg + ".REGSP"
case "g":
objname = pkg + ".REGG"
case "ZERO":
objname = pkg + ".REGZERO"
default:
objname = pkg + ".REG_" + r
}

View File

@ -43,6 +43,7 @@ type opInfo struct {
hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182.
zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width.
unsafePoint bool // this op is an unsafe point, i.e. not safe for async preemption
fixedReg bool // this op will be assigned a fixed register
symEffect SymEffect // effect this op has on symbol in aux
scale uint8 // amd64/386 indexed load scale
}

File diff suppressed because it is too large Load Diff

View File

@ -53,7 +53,6 @@ var pairableStores = map[Op]pairableStoreInfo{
OpARM64MOVWstore: {4, OpARM64STPW},
OpARM64FMOVDstore: {8, OpARM64FSTPD},
OpARM64FMOVSstore: {4, OpARM64FSTPS},
// TODO: storezero variants.
}
// offsetOk returns true if a pair instruction should be used
@ -321,13 +320,13 @@ func pairStores(f *Func) {
// we'll be moving.
var width int64
switch w.Op {
case OpARM64MOVDstore, OpARM64MOVDstorezero, OpARM64FMOVDstore:
case OpARM64MOVDstore, OpARM64FMOVDstore:
width = 8
case OpARM64MOVWstore, OpARM64MOVWstorezero, OpARM64FMOVSstore:
case OpARM64MOVWstore, OpARM64FMOVSstore:
width = 4
case OpARM64MOVHstore, OpARM64MOVHstorezero:
case OpARM64MOVHstore:
width = 2
case OpARM64MOVBstore, OpARM64MOVBstorezero:
case OpARM64MOVBstore:
width = 1
case OpCopy:
continue // this was a store we merged earlier

View File

@ -181,6 +181,10 @@ func (m regMask) String() string {
return s
}
func (m regMask) contains(r register) bool {
return m>>r&1 != 0
}
func (s *regAllocState) RegMaskString(m regMask) string {
str := ""
for r := register(0); m != 0; r++ {
@ -247,6 +251,7 @@ type regAllocState struct {
SPReg register
SBReg register
GReg register
ZeroIntReg register
allocatable regMask
// live values at the end of each block. live[b.ID] is a list of value IDs
@ -269,7 +274,8 @@ type regAllocState struct {
// preregalloc Value it was derived from.
orig []*Value
// current state of each register
// current state of each register.
// Includes only registers in allocatable.
regs []regState
// registers that contain values which can't be kicked out
@ -348,6 +354,9 @@ type startReg struct {
// freeReg frees up register r. Any current user of r is kicked out.
func (s *regAllocState) freeReg(r register) {
if !s.allocatable.contains(r) && !s.isGReg(r) {
return
}
v := s.regs[r].v
if v == nil {
s.f.Fatalf("tried to free an already free register %d\n", r)
@ -404,15 +413,19 @@ func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
if s.f.pass.debug > regDebug {
fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
}
// Allocate v to r.
s.values[v.ID].regs |= regMask(1) << r
s.f.setHome(c, &s.registers[r])
// Allocate r to v.
if !s.allocatable.contains(r) && !s.isGReg(r) {
return
}
if s.regs[r].v != nil {
s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
}
// Update state.
s.regs[r] = regState{v, c}
s.values[v.ID].regs |= regMask(1) << r
s.used |= regMask(1) << r
s.f.setHome(c, &s.registers[r])
}
// allocReg chooses a register from the set of registers in mask.
@ -549,6 +562,9 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
// Check if v is already in a requested register.
if mask&vi.regs != 0 {
r := pickReg(mask & vi.regs)
if !s.allocatable.contains(r) {
return v // v is in a fixed register
}
if s.regs[r].v != v || s.regs[r].c == nil {
panic("bad register state")
}
@ -572,11 +588,17 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
if vi.regs != 0 {
// Copy from a register that v is already in.
r2 := pickReg(vi.regs)
var current *Value
if !s.allocatable.contains(r2) {
current = v // v is in a fixed register
} else {
if s.regs[r2].v != v {
panic("bad register state")
}
current = s.regs[r2].c
}
s.usedSinceBlockStart |= regMask(1) << r2
c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
} else if v.rematerializeable() {
// Rematerialize instead of loading from the spill location.
c = v.copyIntoWithXPos(s.curBlock, pos)
@ -637,6 +659,7 @@ func (s *regAllocState) init(f *Func) {
s.SPReg = noRegister
s.SBReg = noRegister
s.GReg = noRegister
s.ZeroIntReg = noRegister
for r := register(0); r < s.numRegs; r++ {
switch s.registers[r].String() {
case "SP":
@ -645,6 +668,8 @@ func (s *regAllocState) init(f *Func) {
s.SBReg = r
case "g":
s.GReg = r
case "ZERO": // TODO: arch-specific?
s.ZeroIntReg = r
}
}
// Make sure we found all required registers.
@ -666,6 +691,9 @@ func (s *regAllocState) init(f *Func) {
if s.f.Config.hasGReg {
s.allocatable &^= 1 << s.GReg
}
if s.ZeroIntReg != noRegister {
s.allocatable &^= 1 << s.ZeroIntReg
}
if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
s.allocatable &^= 1 << uint(s.f.Config.FPReg)
}
@ -818,7 +846,7 @@ func (s *regAllocState) advanceUses(v *Value) {
ai := &s.values[a.ID]
r := ai.uses
ai.uses = r.next
if r.next == nil || (a.Op != OpSP && a.Op != OpSB && r.next.dist > s.nextCall[s.curIdx]) {
if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
// Value is dead (or is not used again until after a call), free all registers that hold it.
s.freeRegs(ai.regs)
}
@ -836,7 +864,7 @@ func (s *regAllocState) dropIfUnused(v *Value) {
}
vi := &s.values[v.ID]
r := vi.uses
if r == nil || (v.Op != OpSP && v.Op != OpSB && r.dist > s.nextCall[s.curIdx]) {
if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
s.freeRegs(vi.regs)
}
}
@ -1366,18 +1394,21 @@ func (s *regAllocState) regalloc(f *Func) {
if v.Op == OpPhi {
f.Fatalf("phi %s not at start of block", v)
}
if v.Op == OpSP {
if opcodeTable[v.Op].fixedReg {
switch v.Op {
case OpSP:
s.assignReg(s.SPReg, v, v)
b.Values = append(b.Values, v)
s.advanceUses(v)
s.sp = v.ID
continue
}
if v.Op == OpSB {
case OpSB:
s.assignReg(s.SBReg, v, v)
s.sb = v.ID
case OpARM64ZERO:
s.assignReg(s.ZeroIntReg, v, v)
default:
f.Fatalf("unknown fixed-register op %s", v)
}
b.Values = append(b.Values, v)
s.advanceUses(v)
s.sb = v.ID
continue
}
if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
@ -2414,6 +2445,10 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
fmt.Printf("moving v%d to %s\n", vid, loc)
fmt.Printf("sources of v%d:", vid)
}
if opcodeTable[v.Op].fixedReg {
c = v
src = e.s.f.getHome(v.ID)
} else {
for _, w := range e.cache[vid] {
h := e.s.f.getHome(w.ID)
if e.s.f.pass.debug > regDebug {
@ -2425,6 +2460,7 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
src = h
}
}
}
if e.s.f.pass.debug > regDebug {
if src != nil {
fmt.Printf(" [use %s]\n", src)
@ -2649,8 +2685,9 @@ func (v *Value) rematerializeable() bool {
return false
}
for _, a := range v.Args {
// SP and SB (generated by OpSP and OpSB) are always available.
if a.Op != OpSP && a.Op != OpSB {
// Fixed-register allocations (SP, SB, etc.) are always available.
// Any other argument of an opcode makes it not rematerializeable.
if !opcodeTable[a.Op].fixedReg {
return false
}
}

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,8 @@ func rewriteValueARM64latelower(v *Value) bool {
return rewriteValueARM64latelower_OpARM64MOVBUreg(v)
case OpARM64MOVBreg:
return rewriteValueARM64latelower_OpARM64MOVBreg(v)
case OpARM64MOVDconst:
return rewriteValueARM64latelower_OpARM64MOVDconst(v)
case OpARM64MOVDnop:
return rewriteValueARM64latelower_OpARM64MOVDnop(v)
case OpARM64MOVDreg:
@ -408,6 +410,18 @@ func rewriteValueARM64latelower_OpARM64MOVBreg(v *Value) bool {
}
return false
}
func rewriteValueARM64latelower_OpARM64MOVDconst(v *Value) bool {
// match: (MOVDconst [0])
// result: (ZERO)
for {
if auxIntToInt64(v.AuxInt) != 0 {
break
}
v.reset(OpARM64ZERO)
return true
}
return false
}
func rewriteValueARM64latelower_OpARM64MOVDnop(v *Value) bool {
v_0 := v.Args[0]
// match: (MOVDnop (MOVDconst [c]))