cmd/compile: implement compiler for riscv64

Based on riscv-go port.

Updates #27532

Change-Id: Ia329daa243db63ff334053b8807ea96b97ce3acf
Reviewed-on: https://go-review.googlesource.com/c/go/+/204631
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Joel Sing 2019-11-04 04:40:47 +11:00
parent 91d75f4e4c
commit 98d2717499
17 changed files with 8511 additions and 12 deletions

View File

@ -175,7 +175,7 @@ func TestIntendedInlining(t *testing.T) {
}
switch runtime.GOARCH {
case "386", "wasm", "arm":
case "386", "wasm", "arm", "riscv64":
default:
// TODO(mvdan): As explained in /test/inline_sync.go, some
// architectures don't have atomic intrinsics, so these go over

View File

@ -705,6 +705,12 @@ func (lv *Liveness) markUnsafePoints() {
v = v.Args[0]
continue
}
case ssa.OpRISCV64SUB:
// RISCV64 lowers Neq32 to include a SUB with multiple arguments.
// TODO(jsing): it would be preferable not to use Neq32 for
// writeBuffer.enabled checks on this platform.
v = v.Args[0]
continue
case ssa.Op386MOVLload, ssa.OpARM64MOVWUload, ssa.OpPPC64MOVWZload, ssa.OpWasmI64Load32U:
// Args[0] is the address of the write
// barrier control. Ignore Args[1],

View File

@ -6533,7 +6533,7 @@ func (s *SSAGenState) Call(v *ssa.Value) *obj.Prog {
} else {
// TODO(mdempsky): Can these differences be eliminated?
switch thearch.LinkArch.Family {
case sys.AMD64, sys.I386, sys.PPC64, sys.S390X, sys.Wasm:
case sys.AMD64, sys.I386, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm:
p.To.Type = obj.TYPE_REG
case sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64:
p.To.Type = obj.TYPE_MEM

View File

@ -0,0 +1,25 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package riscv64
import (
"cmd/compile/internal/gc"
"cmd/internal/obj/riscv"
)
func Init(arch *gc.Arch) {
arch.LinkArch = &riscv.LinkRISCV64
arch.REGSP = riscv.REG_SP
arch.MAXWIDTH = 1 << 50
arch.Ginsnop = ginsnop
arch.Ginsnopdefer = ginsnop
arch.ZeroRange = zeroRange
arch.SSAMarkMoves = ssaMarkMoves
arch.SSAGenValue = ssaGenValue
arch.SSAGenBlock = ssaGenBlock
}

View File

@ -0,0 +1,48 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package riscv64
import (
"cmd/compile/internal/gc"
"cmd/internal/obj"
"cmd/internal/obj/riscv"
)
func zeroRange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
if cnt == 0 {
return p
}
// Adjust the frame to account for LR.
off += gc.Ctxt.FixedFrameSize()
if cnt < int64(4*gc.Widthptr) {
for i := int64(0); i < cnt; i += int64(gc.Widthptr) {
p = pp.Appendpp(p, riscv.AMOV, obj.TYPE_REG, riscv.REG_ZERO, 0, obj.TYPE_MEM, riscv.REG_SP, off+i)
}
return p
}
// TODO(jsing): Add a duff zero implementation for medium sized ranges.
// Loop, zeroing pointer width bytes at a time.
// ADD $(off), SP, T0
// ADD $(cnt), T0, T1
// loop:
// MOV ZERO, (T0)
// ADD $Widthptr, T0
// BNE T0, T1, loop
p = pp.Appendpp(p, riscv.AADD, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_T0, 0)
p.Reg = riscv.REG_SP
p = pp.Appendpp(p, riscv.AADD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, riscv.REG_T1, 0)
p.Reg = riscv.REG_T0
p = pp.Appendpp(p, riscv.AMOV, obj.TYPE_REG, riscv.REG_ZERO, 0, obj.TYPE_MEM, riscv.REG_T0, 0)
loop := p
p = pp.Appendpp(p, riscv.AADD, obj.TYPE_CONST, 0, int64(gc.Widthptr), obj.TYPE_REG, riscv.REG_T0, 0)
p = pp.Appendpp(p, riscv.ABNE, obj.TYPE_REG, riscv.REG_T0, 0, obj.TYPE_BRANCH, 0, 0)
p.Reg = riscv.REG_T1
gc.Patch(p, loop)
return p
}

View File

@ -0,0 +1,20 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package riscv64
import (
"cmd/compile/internal/gc"
"cmd/internal/obj"
"cmd/internal/obj/riscv"
)
func ginsnop(pp *gc.Progs) *obj.Prog {
// Hardware nop is ADD $0, ZERO
p := pp.Prog(riscv.AADD)
p.From.Type = obj.TYPE_CONST
p.Reg = riscv.REG_ZERO
p.To = obj.Addr{Type: obj.TYPE_REG, Reg: riscv.REG_ZERO}
return p
}

View File

@ -0,0 +1,496 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package riscv64
import (
"cmd/compile/internal/gc"
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/riscv"
)
// ssaRegToReg maps ssa register numbers to obj register numbers.
var ssaRegToReg = []int16{
riscv.REG_X0,
// X1 (LR): unused
riscv.REG_X2,
riscv.REG_X3,
riscv.REG_X4,
riscv.REG_X5,
riscv.REG_X6,
riscv.REG_X7,
riscv.REG_X8,
riscv.REG_X9,
riscv.REG_X10,
riscv.REG_X11,
riscv.REG_X12,
riscv.REG_X13,
riscv.REG_X14,
riscv.REG_X15,
riscv.REG_X16,
riscv.REG_X17,
riscv.REG_X18,
riscv.REG_X19,
riscv.REG_X20,
riscv.REG_X21,
riscv.REG_X22,
riscv.REG_X23,
riscv.REG_X24,
riscv.REG_X25,
riscv.REG_X26,
riscv.REG_X27,
riscv.REG_X28,
riscv.REG_X29,
riscv.REG_X30,
riscv.REG_X31,
riscv.REG_F0,
riscv.REG_F1,
riscv.REG_F2,
riscv.REG_F3,
riscv.REG_F4,
riscv.REG_F5,
riscv.REG_F6,
riscv.REG_F7,
riscv.REG_F8,
riscv.REG_F9,
riscv.REG_F10,
riscv.REG_F11,
riscv.REG_F12,
riscv.REG_F13,
riscv.REG_F14,
riscv.REG_F15,
riscv.REG_F16,
riscv.REG_F17,
riscv.REG_F18,
riscv.REG_F19,
riscv.REG_F20,
riscv.REG_F21,
riscv.REG_F22,
riscv.REG_F23,
riscv.REG_F24,
riscv.REG_F25,
riscv.REG_F26,
riscv.REG_F27,
riscv.REG_F28,
riscv.REG_F29,
riscv.REG_F30,
riscv.REG_F31,
0, // SB isn't a real register. We fill an Addr.Reg field with 0 in this case.
}
func loadByType(t *types.Type) obj.As {
width := t.Size()
if t.IsFloat() {
switch width {
case 4:
return riscv.AMOVF
case 8:
return riscv.AMOVD
default:
gc.Fatalf("unknown float width for load %d in type %v", width, t)
return 0
}
}
switch width {
case 1:
if t.IsSigned() {
return riscv.AMOVB
} else {
return riscv.AMOVBU
}
case 2:
if t.IsSigned() {
return riscv.AMOVH
} else {
return riscv.AMOVHU
}
case 4:
if t.IsSigned() {
return riscv.AMOVW
} else {
return riscv.AMOVWU
}
case 8:
return riscv.AMOV
default:
gc.Fatalf("unknown width for load %d in type %v", width, t)
return 0
}
}
// storeByType returns the store instruction of the given type.
func storeByType(t *types.Type) obj.As {
width := t.Size()
if t.IsFloat() {
switch width {
case 4:
return riscv.AMOVF
case 8:
return riscv.AMOVD
default:
gc.Fatalf("unknown float width for store %d in type %v", width, t)
return 0
}
}
switch width {
case 1:
return riscv.AMOVB
case 2:
return riscv.AMOVH
case 4:
return riscv.AMOVW
case 8:
return riscv.AMOV
default:
gc.Fatalf("unknown width for store %d in type %v", width, t)
return 0
}
}
// largestMove returns the largest move instruction possible and its size,
// given the alignment of the total size of the move.
//
// e.g., a 16-byte move may use MOV, but an 11-byte move must use MOVB.
//
// Note that the moves may not be on naturally aligned addresses depending on
// the source and destination.
//
// This matches the calculation in ssa.moveSize.
func largestMove(alignment int64) (obj.As, int64) {
switch {
case alignment%8 == 0:
return riscv.AMOV, 8
case alignment%4 == 0:
return riscv.AMOVW, 4
case alignment%2 == 0:
return riscv.AMOVH, 2
default:
return riscv.AMOVB, 1
}
}
// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
// RISC-V has no flags, so this is a no-op.
func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {}
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
s.SetPos(v.Pos)
switch v.Op {
case ssa.OpInitMem:
// memory arg needs no code
case ssa.OpArg:
// input args need no code
case ssa.OpPhi:
gc.CheckLoweredPhi(v)
case ssa.OpCopy, ssa.OpRISCV64MOVconvert:
if v.Type.IsMemory() {
return
}
rs := v.Args[0].Reg()
rd := v.Reg()
if rs == rd {
return
}
as := riscv.AMOV
if v.Type.IsFloat() {
as = riscv.AMOVD
}
p := s.Prog(as)
p.From.Type = obj.TYPE_REG
p.From.Reg = rs
p.To.Type = obj.TYPE_REG
p.To.Reg = rd
case ssa.OpLoadReg:
if v.Type.IsFlags() {
v.Fatalf("load flags not implemented: %v", v.LongString())
return
}
p := s.Prog(loadByType(v.Type))
gc.AddrAuto(&p.From, v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpStoreReg:
if v.Type.IsFlags() {
v.Fatalf("store flags not implemented: %v", v.LongString())
return
}
p := s.Prog(storeByType(v.Type))
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
gc.AddrAuto(&p.To, v)
case ssa.OpSP, ssa.OpSB, ssa.OpGetG:
// nothing to do
case ssa.OpRISCV64ADD, ssa.OpRISCV64SUB, ssa.OpRISCV64XOR, ssa.OpRISCV64OR, ssa.OpRISCV64AND,
ssa.OpRISCV64SLL, ssa.OpRISCV64SRA, ssa.OpRISCV64SRL,
ssa.OpRISCV64SLT, ssa.OpRISCV64SLTU, ssa.OpRISCV64MUL, ssa.OpRISCV64MULW, ssa.OpRISCV64MULH,
ssa.OpRISCV64MULHU, ssa.OpRISCV64DIV, ssa.OpRISCV64DIVU, ssa.OpRISCV64DIVW,
ssa.OpRISCV64DIVUW, ssa.OpRISCV64REM, ssa.OpRISCV64REMU, ssa.OpRISCV64REMW,
ssa.OpRISCV64REMUW,
ssa.OpRISCV64FADDS, ssa.OpRISCV64FSUBS, ssa.OpRISCV64FMULS, ssa.OpRISCV64FDIVS,
ssa.OpRISCV64FEQS, ssa.OpRISCV64FNES, ssa.OpRISCV64FLTS, ssa.OpRISCV64FLES,
ssa.OpRISCV64FADDD, ssa.OpRISCV64FSUBD, ssa.OpRISCV64FMULD, ssa.OpRISCV64FDIVD,
ssa.OpRISCV64FEQD, ssa.OpRISCV64FNED, ssa.OpRISCV64FLTD, ssa.OpRISCV64FLED:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r2
p.Reg = r1
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD,
ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64ADDI, ssa.OpRISCV64XORI, ssa.OpRISCV64ORI, ssa.OpRISCV64ANDI,
ssa.OpRISCV64SLLI, ssa.OpRISCV64SRAI, ssa.OpRISCV64SRLI, ssa.OpRISCV64SLTI,
ssa.OpRISCV64SLTIU:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64MOVBconst, ssa.OpRISCV64MOVHconst, ssa.OpRISCV64MOVWconst, ssa.OpRISCV64MOVDconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64MOVaddr:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_ADDR
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
var wantreg string
// MOVW $sym+off(base), R
switch v.Aux.(type) {
default:
v.Fatalf("aux is of unknown type %T", v.Aux)
case *obj.LSym:
wantreg = "SB"
gc.AddAux(&p.From, v)
case *gc.Node:
wantreg = "SP"
gc.AddAux(&p.From, v)
case nil:
// No sym, just MOVW $off(SP), R
wantreg = "SP"
p.From.Reg = riscv.REG_SP
p.From.Offset = v.AuxInt
}
if reg := v.Args[0].RegName(); reg != wantreg {
v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
}
case ssa.OpRISCV64MOVBload, ssa.OpRISCV64MOVHload, ssa.OpRISCV64MOVWload, ssa.OpRISCV64MOVDload,
ssa.OpRISCV64MOVBUload, ssa.OpRISCV64MOVHUload, ssa.OpRISCV64MOVWUload,
ssa.OpRISCV64FMOVWload, ssa.OpRISCV64FMOVDload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64MOVBstore, ssa.OpRISCV64MOVHstore, ssa.OpRISCV64MOVWstore, ssa.OpRISCV64MOVDstore,
ssa.OpRISCV64FMOVWstore, ssa.OpRISCV64FMOVDstore:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
case ssa.OpRISCV64SEQZ, ssa.OpRISCV64SNEZ:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64CALLstatic, ssa.OpRISCV64CALLclosure, ssa.OpRISCV64CALLinter:
s.Call(v)
case ssa.OpRISCV64LoweredWB:
p := s.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = v.Aux.(*obj.LSym)
case ssa.OpRISCV64LoweredPanicBoundsA, ssa.OpRISCV64LoweredPanicBoundsB, ssa.OpRISCV64LoweredPanicBoundsC:
p := s.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
s.UseArgs(16) // space used in callee args area by assembly stubs
case ssa.OpRISCV64LoweredZero:
mov, sz := largestMove(v.AuxInt)
// mov ZERO, (Rarg0)
// ADD $sz, Rarg0
// BGEU Rarg1, Rarg0, -2(PC)
p := s.Prog(mov)
p.From.Type = obj.TYPE_REG
p.From.Reg = riscv.REG_ZERO
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
p2 := s.Prog(riscv.AADD)
p2.From.Type = obj.TYPE_CONST
p2.From.Offset = sz
p2.To.Type = obj.TYPE_REG
p2.To.Reg = v.Args[0].Reg()
p3 := s.Prog(riscv.ABGEU)
p3.To.Type = obj.TYPE_BRANCH
p3.Reg = v.Args[0].Reg()
p3.From.Type = obj.TYPE_REG
p3.From.Reg = v.Args[1].Reg()
gc.Patch(p3, p)
case ssa.OpRISCV64LoweredMove:
mov, sz := largestMove(v.AuxInt)
// mov (Rarg1), T2
// mov T2, (Rarg0)
// ADD $sz, Rarg0
// ADD $sz, Rarg1
// BGEU Rarg2, Rarg0, -4(PC)
p := s.Prog(mov)
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = riscv.REG_T2
p2 := s.Prog(mov)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = riscv.REG_T2
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = v.Args[0].Reg()
p3 := s.Prog(riscv.AADD)
p3.From.Type = obj.TYPE_CONST
p3.From.Offset = sz
p3.To.Type = obj.TYPE_REG
p3.To.Reg = v.Args[0].Reg()
p4 := s.Prog(riscv.AADD)
p4.From.Type = obj.TYPE_CONST
p4.From.Offset = sz
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Args[1].Reg()
p5 := s.Prog(riscv.ABGEU)
p5.To.Type = obj.TYPE_BRANCH
p5.Reg = v.Args[1].Reg()
p5.From.Type = obj.TYPE_REG
p5.From.Reg = v.Args[2].Reg()
gc.Patch(p5, p)
case ssa.OpRISCV64LoweredNilCheck:
// Issue a load which will fault if arg is nil.
// TODO: optimizations. See arm and amd64 LoweredNilCheck.
p := s.Prog(riscv.AMOVB)
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = riscv.REG_ZERO
if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos == 1 in generated wrappers
gc.Warnl(v.Pos, "generated nil check")
}
case ssa.OpRISCV64LoweredGetClosurePtr:
// Closure pointer is S4 (riscv.REG_CTXT).
gc.CheckLoweredGetClosurePtr(v)
case ssa.OpRISCV64LoweredGetCallerSP:
// caller's SP is FixedFrameSize below the address of the first arg
p := s.Prog(riscv.AMOV)
p.From.Type = obj.TYPE_ADDR
p.From.Offset = -gc.Ctxt.FixedFrameSize()
p.From.Name = obj.NAME_PARAM
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64LoweredGetCallerPC:
p := s.Prog(obj.AGETCALLERPC)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
default:
v.Fatalf("Unhandled op %v", v.Op)
}
}
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
s.SetPos(b.Pos)
switch b.Kind {
case ssa.BlockDefer:
// defer returns in A0:
// 0 if we should continue executing
// 1 if we should jump to deferreturn call
p := s.Prog(riscv.ABNE)
p.To.Type = obj.TYPE_BRANCH
p.From.Type = obj.TYPE_REG
p.From.Reg = riscv.REG_ZERO
p.Reg = riscv.REG_A0
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
if b.Succs[0].Block() != next {
p := s.Prog(obj.AJMP)
p.To.Type = obj.TYPE_BRANCH
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
}
case ssa.BlockPlain:
if b.Succs[0].Block() != next {
p := s.Prog(obj.AJMP)
p.To.Type = obj.TYPE_BRANCH
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
}
case ssa.BlockExit:
case ssa.BlockRet:
s.Prog(obj.ARET)
case ssa.BlockRetJmp:
p := s.Prog(obj.AJMP)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = b.Aux.(*obj.LSym)
case ssa.BlockRISCV64BNE:
var p *obj.Prog
switch next {
case b.Succs[0].Block():
p = s.Br(riscv.ABNE, b.Succs[1].Block())
p.As = riscv.InvertBranch(p.As)
case b.Succs[1].Block():
p = s.Br(riscv.ABNE, b.Succs[0].Block())
default:
if b.Likely != ssa.BranchUnlikely {
p = s.Br(riscv.ABNE, b.Succs[0].Block())
s.Br(obj.AJMP, b.Succs[1].Block())
} else {
p = s.Br(riscv.ABNE, b.Succs[1].Block())
p.As = riscv.InvertBranch(p.As)
s.Br(obj.AJMP, b.Succs[0].Block())
}
}
p.Reg = b.Controls[0].Reg()
p.From.Type = obj.TYPE_REG
p.From.Reg = riscv.REG_ZERO
default:
b.Fatalf("Unhandled block: %s", b.LongString())
}
}

View File

@ -305,6 +305,16 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config
c.LinkReg = linkRegMIPS
c.hasGReg = true
c.noDuffDevice = true
case "riscv64":
c.PtrSize = 8
c.RegSize = 8
c.lowerBlock = rewriteBlockRISCV64
c.lowerValue = rewriteValueRISCV64
c.registers = registersRISCV64[:]
c.gpRegMask = gpRegMaskRISCV64
c.fpRegMask = fpRegMaskRISCV64
c.FPReg = framepointerRegRISCV64
c.hasGReg = true
case "wasm":
c.PtrSize = 8
c.RegSize = 8

View File

@ -0,0 +1,478 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Optimizations TODO:
// * Somehow track when values are already zero/signed-extended, avoid re-extending.
// * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers
// * Find a more efficient way to do zero/sign extension than left+right shift.
// There are many other options (store then load-extend, LUI+ANDI for zero extend, special case 32->64, ...),
// but left+right shift is simple and uniform, and we don't have real hardware to do perf testing on anyway.
// * Use the zero register instead of moving 0 into a register.
// * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...).
// * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants.
// * Arrange for non-trivial Zero and Move lowerings to use aligned loads and stores.
// * Eliminate zero immediate shifts, adds, etc.
// * Use a Duff's device for some moves and zeros.
// * Avoid using Neq32 for writeBarrier.enabled checks.
// Lowering arithmetic
(Add64 x y) -> (ADD x y)
(AddPtr x y) -> (ADD x y)
(Add32 x y) -> (ADD x y)
(Add16 x y) -> (ADD x y)
(Add8 x y) -> (ADD x y)
(Add32F x y) -> (FADDS x y)
(Add64F x y) -> (FADDD x y)
(Sub64 x y) -> (SUB x y)
(SubPtr x y) -> (SUB x y)
(Sub32 x y) -> (SUB x y)
(Sub16 x y) -> (SUB x y)
(Sub8 x y) -> (SUB x y)
(Sub32F x y) -> (FSUBS x y)
(Sub64F x y) -> (FSUBD x y)
(Mul64 x y) -> (MUL x y)
(Mul32 x y) -> (MULW x y)
(Mul16 x y) -> (MULW (SignExt16to32 x) (SignExt16to32 y))
(Mul8 x y) -> (MULW (SignExt8to32 x) (SignExt8to32 y))
(Mul32F x y) -> (FMULS x y)
(Mul64F x y) -> (FMULD x y)
(Div32F x y) -> (FDIVS x y)
(Div64F x y) -> (FDIVD x y)
(Div64 x y) -> (DIV x y)
(Div64u x y) -> (DIVU x y)
(Div32 x y) -> (DIVW x y)
(Div32u x y) -> (DIVUW x y)
(Div16 x y) -> (DIVW (SignExt16to32 x) (SignExt16to32 y))
(Div16u x y) -> (DIVUW (ZeroExt16to32 x) (ZeroExt16to32 y))
(Div8 x y) -> (DIVW (SignExt8to32 x) (SignExt8to32 y))
(Div8u x y) -> (DIVUW (ZeroExt8to32 x) (ZeroExt8to32 y))
(Hmul64 x y) -> (MULH x y)
(Hmul64u x y) -> (MULHU x y)
(Hmul32 x y) -> (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y)))
(Hmul32u x y) -> (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y)))
// (x + y) / 2 -> (x / 2) + (y / 2) + (x & y & 1)
(Avg64u <t> x y) -> (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
(Mod64 x y) -> (REM x y)
(Mod64u x y) -> (REMU x y)
(Mod32 x y) -> (REMW x y)
(Mod32u x y) -> (REMUW x y)
(Mod16 x y) -> (REMW (SignExt16to32 x) (SignExt16to32 y))
(Mod16u x y) -> (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
(Mod8 x y) -> (REMW (SignExt8to32 x) (SignExt8to32 y))
(Mod8u x y) -> (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
(And64 x y) -> (AND x y)
(And32 x y) -> (AND x y)
(And16 x y) -> (AND x y)
(And8 x y) -> (AND x y)
(Or64 x y) -> (OR x y)
(Or32 x y) -> (OR x y)
(Or16 x y) -> (OR x y)
(Or8 x y) -> (OR x y)
(Xor64 x y) -> (XOR x y)
(Xor32 x y) -> (XOR x y)
(Xor16 x y) -> (XOR x y)
(Xor8 x y) -> (XOR x y)
(Neg64 x) -> (SUB (MOVDconst) x)
(Neg32 x) -> (SUB (MOVWconst) x)
(Neg16 x) -> (SUB (MOVHconst) x)
(Neg8 x) -> (SUB (MOVBconst) x)
(Neg32F x) -> (FNEGS x)
(Neg64F x) -> (FNEGD x)
(Com64 x) -> (XORI [int64(-1)] x)
(Com32 x) -> (XORI [int64(-1)] x)
(Com16 x) -> (XORI [int64(-1)] x)
(Com8 x) -> (XORI [int64(-1)] x)
(Sqrt x) -> (FSQRTD x)
// Zero and sign extension
// Shift left until the bits we want are at the top of the register.
// Then logical/arithmetic shift right for zero/sign extend.
// We always extend to 64 bits; there's no reason not to,
// and optimization rules can then collapse some extensions.
(SignExt8to16 <t> x) -> (SRAI [56] (SLLI <t> [56] x))
(SignExt8to32 <t> x) -> (SRAI [56] (SLLI <t> [56] x))
(SignExt8to64 <t> x) -> (SRAI [56] (SLLI <t> [56] x))
(SignExt16to32 <t> x) -> (SRAI [48] (SLLI <t> [48] x))
(SignExt16to64 <t> x) -> (SRAI [48] (SLLI <t> [48] x))
(SignExt32to64 <t> x) -> (SRAI [32] (SLLI <t> [32] x))
(ZeroExt8to16 <t> x) -> (SRLI [56] (SLLI <t> [56] x))
(ZeroExt8to32 <t> x) -> (SRLI [56] (SLLI <t> [56] x))
(ZeroExt8to64 <t> x) -> (SRLI [56] (SLLI <t> [56] x))
(ZeroExt16to32 <t> x) -> (SRLI [48] (SLLI <t> [48] x))
(ZeroExt16to64 <t> x) -> (SRLI [48] (SLLI <t> [48] x))
(ZeroExt32to64 <t> x) -> (SRLI [32] (SLLI <t> [32] x))
(Cvt32to32F x) -> (FCVTSW x)
(Cvt32to64F x) -> (FCVTDW x)
(Cvt64to32F x) -> (FCVTSL x)
(Cvt64to64F x) -> (FCVTDL x)
(Cvt32Fto32 x) -> (FCVTWS x)
(Cvt32Fto64 x) -> (FCVTLS x)
(Cvt64Fto32 x) -> (FCVTWD x)
(Cvt64Fto64 x) -> (FCVTLD x)
(Cvt32Fto64F x) -> (FCVTDS x)
(Cvt64Fto32F x) -> (FCVTSD x)
(Round32F x) -> x
(Round64F x) -> x
// From genericOps.go:
// "0 if arg0 == 0, -1 if arg0 > 0, undef if arg0<0"
//
// Like other arches, we compute ~((x-1) >> 63), with arithmetic right shift.
// For positive x, bit 63 of x-1 is always 0, so the result is -1.
// For zero x, bit 63 of x-1 is 1, so the result is 0.
//
// TODO(prattmic): Use XORconst etc instead of XOR (MOVDconst).
(Slicemask <t> x) -> (XOR (MOVDconst [-1]) (SRA <t> (SUB <t> x (MOVDconst [1])) (MOVDconst [63])))
// Truncations
// We ignore the unused high parts of registers, so truncates are just copies.
(Trunc16to8 x) -> x
(Trunc32to8 x) -> x
(Trunc32to16 x) -> x
(Trunc64to8 x) -> x
(Trunc64to16 x) -> x
(Trunc64to32 x) -> x
// Shifts
// SLL only considers the bottom 6 bits of y. If y > 64, the result should
// always be 0.
//
// Breaking down the operation:
//
// (SLL x y) generates x << (y & 63).
//
// If y < 64, this is the value we want. Otherwise, we want zero.
//
// So, we AND with -1 * uint64(y < 64), which is 0xfffff... if y < 64 and 0 otherwise.
(Lsh8x8 <t> x y) -> (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Lsh8x16 <t> x y) -> (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Lsh8x32 <t> x y) -> (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Lsh8x64 <t> x y) -> (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] y)))
(Lsh16x8 <t> x y) -> (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Lsh16x16 <t> x y) -> (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Lsh16x32 <t> x y) -> (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Lsh16x64 <t> x y) -> (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] y)))
(Lsh32x8 <t> x y) -> (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Lsh32x16 <t> x y) -> (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Lsh32x32 <t> x y) -> (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Lsh32x64 <t> x y) -> (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] y)))
(Lsh64x8 <t> x y) -> (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Lsh64x16 <t> x y) -> (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Lsh64x32 <t> x y) -> (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Lsh64x64 <t> x y) -> (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
// SRL only considers the bottom 6 bits of y. If y > 64, the result should
// always be 0. See Lsh above for a detailed description.
(Rsh8Ux8 <t> x y) -> (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Rsh8Ux16 <t> x y) -> (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Rsh8Ux32 <t> x y) -> (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Rsh8Ux64 <t> x y) -> (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] y)))
(Rsh16Ux8 <t> x y) -> (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Rsh16Ux16 <t> x y) -> (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Rsh16Ux32 <t> x y) -> (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Rsh16Ux64 <t> x y) -> (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] y)))
(Rsh32Ux8 <t> x y) -> (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Rsh32Ux16 <t> x y) -> (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Rsh32Ux32 <t> x y) -> (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Rsh32Ux64 <t> x y) -> (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] y)))
(Rsh64Ux8 <t> x y) -> (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
(Rsh64Ux16 <t> x y) -> (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
(Rsh64Ux32 <t> x y) -> (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
(Rsh64Ux64 <t> x y) -> (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
// SRA only considers the bottom 6 bits of y. If y > 64, the result should
// be either 0 or -1 based on the sign bit.
//
// We implement this by performing the max shift (-1) if y >= 64.
//
// We OR (uint64(y < 64) - 1) into y before passing it to SRA. This leaves
// us with -1 (0xffff...) if y >= 64.
//
// We don't need to sign-extend the OR result, as it will be at minimum 8 bits,
// more than the 6 bits SRA cares about.
(Rsh8x8 <t> x y) -> (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
(Rsh8x16 <t> x y) -> (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
(Rsh8x32 <t> x y) -> (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
(Rsh8x64 <t> x y) -> (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
(Rsh16x8 <t> x y) -> (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
(Rsh16x16 <t> x y) -> (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
(Rsh16x32 <t> x y) -> (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
(Rsh16x64 <t> x y) -> (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
(Rsh32x8 <t> x y) -> (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
(Rsh32x16 <t> x y) -> (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
(Rsh32x32 <t> x y) -> (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
(Rsh32x64 <t> x y) -> (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
(Rsh64x8 <t> x y) -> (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
(Rsh64x16 <t> x y) -> (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
(Rsh64x32 <t> x y) -> (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
(Rsh64x64 <t> x y) -> (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
// rotates
(RotateLeft8 <t> x (MOVBconst [c])) -> (Or8 (Lsh8x64 <t> x (MOVBconst [c&7])) (Rsh8Ux64 <t> x (MOVBconst [-c&7])))
(RotateLeft16 <t> x (MOVHconst [c])) -> (Or16 (Lsh16x64 <t> x (MOVHconst [c&15])) (Rsh16Ux64 <t> x (MOVHconst [-c&15])))
(RotateLeft32 <t> x (MOVWconst [c])) -> (Or32 (Lsh32x64 <t> x (MOVWconst [c&31])) (Rsh32Ux64 <t> x (MOVWconst [-c&31])))
(RotateLeft64 <t> x (MOVDconst [c])) -> (Or64 (Lsh64x64 <t> x (MOVDconst [c&63])) (Rsh64Ux64 <t> x (MOVDconst [-c&63])))
(Less64 x y) -> (SLT x y)
(Less32 x y) -> (SLT (SignExt32to64 x) (SignExt32to64 y))
(Less16 x y) -> (SLT (SignExt16to64 x) (SignExt16to64 y))
(Less8 x y) -> (SLT (SignExt8to64 x) (SignExt8to64 y))
(Less64U x y) -> (SLTU x y)
(Less32U x y) -> (SLTU (ZeroExt32to64 x) (ZeroExt32to64 y))
(Less16U x y) -> (SLTU (ZeroExt16to64 x) (ZeroExt16to64 y))
(Less8U x y) -> (SLTU (ZeroExt8to64 x) (ZeroExt8to64 y))
(Less64F x y) -> (FLTD x y)
(Less32F x y) -> (FLTS x y)
// Convert x <= y to !(y > x).
(Leq64 x y) -> (Not (Less64 y x))
(Leq32 x y) -> (Not (Less32 y x))
(Leq16 x y) -> (Not (Less16 y x))
(Leq8 x y) -> (Not (Less8 y x))
(Leq64U x y) -> (Not (Less64U y x))
(Leq32U x y) -> (Not (Less32U y x))
(Leq16U x y) -> (Not (Less16U y x))
(Leq8U x y) -> (Not (Less8U y x))
(Leq64F x y) -> (FLED x y)
(Leq32F x y) -> (FLES x y)
// Convert x > y to y < x.
(Greater64 x y) -> (Less64 y x)
(Greater32 x y) -> (Less32 y x)
(Greater16 x y) -> (Less16 y x)
(Greater8 x y) -> (Less8 y x)
(Greater64U x y) -> (Less64U y x)
(Greater32U x y) -> (Less32U y x)
(Greater16U x y) -> (Less16U y x)
(Greater8U x y) -> (Less8U y x)
(Greater64F x y) -> (FLTD y x)
(Greater32F x y) -> (FLTS y x)
// Convert x >= y to !(x < y)
(Geq64 x y) -> (Not (Less64 x y))
(Geq32 x y) -> (Not (Less32 x y))
(Geq16 x y) -> (Not (Less16 x y))
(Geq8 x y) -> (Not (Less8 x y))
(Geq64U x y) -> (Not (Less64U x y))
(Geq32U x y) -> (Not (Less32U x y))
(Geq16U x y) -> (Not (Less16U x y))
(Geq8U x y) -> (Not (Less8U x y))
(Geq64F x y) -> (FLED y x)
(Geq32F x y) -> (FLES y x)
(EqPtr x y) -> (SEQZ (SUB <x.Type> x y))
(Eq64 x y) -> (SEQZ (SUB <x.Type> x y))
(Eq32 x y) -> (SEQZ (ZeroExt32to64 (SUB <x.Type> x y)))
(Eq16 x y) -> (SEQZ (ZeroExt16to64 (SUB <x.Type> x y)))
(Eq8 x y) -> (SEQZ (ZeroExt8to64 (SUB <x.Type> x y)))
(Eq64F x y) -> (FEQD x y)
(Eq32F x y) -> (FEQS x y)
(NeqPtr x y) -> (SNEZ (SUB <x.Type> x y))
(Neq64 x y) -> (SNEZ (SUB <x.Type> x y))
(Neq32 x y) -> (SNEZ (ZeroExt32to64 (SUB <x.Type> x y)))
(Neq16 x y) -> (SNEZ (ZeroExt16to64 (SUB <x.Type> x y)))
(Neq8 x y) -> (SNEZ (ZeroExt8to64 (SUB <x.Type> x y)))
(Neq64F x y) -> (FNED x y)
(Neq32F x y) -> (FNES x y)
// Loads
(Load <t> ptr mem) && t.IsBoolean() -> (MOVBUload ptr mem)
(Load <t> ptr mem) && ( is8BitInt(t) && isSigned(t)) -> (MOVBload ptr mem)
(Load <t> ptr mem) && ( is8BitInt(t) && !isSigned(t)) -> (MOVBUload ptr mem)
(Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) -> (MOVHload ptr mem)
(Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) -> (MOVHUload ptr mem)
(Load <t> ptr mem) && (is32BitInt(t) && isSigned(t)) -> (MOVWload ptr mem)
(Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) -> (MOVWUload ptr mem)
(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
(Load <t> ptr mem) && is32BitFloat(t) -> (FMOVWload ptr mem)
(Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
// Stores
(Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type) -> (MOVWstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type) -> (MOVDstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVWstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
// We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis
// knows what variables are being read/written by the ops.
(MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVBUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVBUload [off1+off2] {sym} base mem)
(MOVBload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVBload [off1+off2] {sym} base mem)
(MOVHUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVHUload [off1+off2] {sym} base mem)
(MOVHload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVHload [off1+off2] {sym} base mem)
(MOVWUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVWUload [off1+off2] {sym} base mem)
(MOVWload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVWload [off1+off2] {sym} base mem)
(MOVDload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(off1+off2) ->
(MOVDload [off1+off2] {sym} base mem)
(MOVBstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(off1+off2) ->
(MOVBstore [off1+off2] {sym} base val mem)
(MOVHstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(off1+off2) ->
(MOVHstore [off1+off2] {sym} base val mem)
(MOVWstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(off1+off2) ->
(MOVWstore [off1+off2] {sym} base val mem)
(MOVDstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(off1+off2) ->
(MOVDstore [off1+off2] {sym} base val mem)
// Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis
// with OffPtr -> ADDI.
(ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+d) -> (MOVaddr [c+d] {s} x)
// Zeroing
// TODO: more optimized zeroing, including attempting to use aligned accesses.
(Zero [0] _ mem) -> mem
(Zero [1] ptr mem) -> (MOVBstore ptr (MOVBconst) mem)
(Zero [2] ptr mem) -> (MOVHstore ptr (MOVHconst) mem)
(Zero [4] ptr mem) -> (MOVWstore ptr (MOVWconst) mem)
(Zero [8] ptr mem) -> (MOVDstore ptr (MOVDconst) mem)
// Generic zeroing uses a loop
(Zero [s] {t} ptr mem) ->
(LoweredZero [t.(*types.Type).Alignment()]
ptr
(ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.(*types.Type).Alignment(), config)]))
mem)
(Convert x mem) -> (MOVconvert x mem)
// Checks
(IsNonNil p) -> (NeqPtr (MOVDconst) p)
(IsInBounds idx len) -> (Less64U idx len)
(IsSliceInBounds idx len) -> (Leq64U idx len)
// Trivial lowering
(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
(GetClosurePtr) -> (LoweredGetClosurePtr)
(GetCallerSP) -> (LoweredGetCallerSP)
(GetCallerPC) -> (LoweredGetCallerPC)
// Write barrier.
(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
// Moves
// TODO: more optimized moves, including attempting to use aligned accesses.
(Move [0] _ _ mem) -> mem
(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
(Move [2] dst src mem) -> (MOVHstore dst (MOVHload src mem) mem)
(Move [4] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
(Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
// Generic move uses a loop
(Move [s] {t} dst src mem) ->
(LoweredMove [t.(*types.Type).Alignment()]
dst
src
(ADDI <src.Type> [s-moveSize(t.(*types.Type).Alignment(), config)] src)
mem)
// Boolean ops; 0=false, 1=true
(AndB x y) -> (AND x y)
(OrB x y) -> (OR x y)
(EqB x y) -> (XORI [1] (XOR <typ.Bool> x y))
(NeqB x y) -> (XOR x y)
(Not x) -> (XORI [1] x)
// Lowering pointer arithmetic
// TODO: Special handling for SP offsets, like ARM
(OffPtr [off] ptr:(SP)) -> (MOVaddr [off] ptr)
(OffPtr [off] ptr) && is32Bit(off) -> (ADDI [off] ptr)
(OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
(Const8 [val]) -> (MOVBconst [val])
(Const16 [val]) -> (MOVHconst [val])
(Const32 [val]) -> (MOVWconst [val])
(Const64 [val]) -> (MOVDconst [val])
(Const32F [val]) -> (FMVSX (MOVWconst [int64(int32(math.Float32bits(float32(math.Float64frombits(uint64(val))))))]))
(Const64F [val]) -> (FMVDX (MOVDconst [val]))
(ConstNil) -> (MOVDconst [0])
(ConstBool [b]) -> (MOVBconst [b])
// Convert 64 bit immediate to two 32 bit immediates, combine with add and shift.
// The lower 32 bit immediate will be treated as signed,
// so if it is negative, adjust for the borrow by incrementing the top half.
// We don't have to worry about overflow from the increment,
// because if the top half is all 1s, and int32(c) is negative,
// then the overall constant fits in an int32.
(MOVDconst <t> [c]) && !is32Bit(c) && int32(c) < 0 -> (ADD (SLLI <t> [32] (MOVDconst [c>>32+1])) (MOVDconst [int64(int32(c))]))
(MOVDconst <t> [c]) && !is32Bit(c) && int32(c) >= 0 -> (ADD (SLLI <t> [32] (MOVDconst [c>>32+0])) (MOVDconst [int64(int32(c))]))
// Fold ADD+MOVDconst into ADDI where possible.
(ADD (MOVDconst [off]) ptr) && is32Bit(off) -> (ADDI [off] ptr)
(Addr {sym} base) -> (MOVaddr {sym} base)
(LocalAddr {sym} base _) -> (MOVaddr {sym} base)
// Conditional branches
//
// cond is 1 if true. BNE compares against 0.
//
// TODO(prattmic): RISCV branch instructions take two operands to compare,
// so we could generate more efficient code by computing the condition in the
// branch itself. This should be revisited now that the compiler has support
// for two control values (https://golang.org/cl/196557).
(If cond yes no) -> (BNE cond yes no)
// Calls
(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
// remove redundant *const ops
(ADDI [0] x) -> x

View File

@ -0,0 +1,312 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import "cmd/internal/obj/riscv"
// Suffixes encode the bit width of various instructions:
//
// D (double word) = 64 bit int
// W (word) = 32 bit int
// H (half word) = 16 bit int
// B (byte) = 8 bit int
// S (single) = 32 bit float
// D (double) = 64 bit float
// L = 64 bit int, used when the opcode starts with F
func init() {
var regNamesRISCV64 []string
var gpMask, fpMask, gpspMask, gpspsbMask regMask
regNamed := make(map[string]regMask)
// Build the list of register names, creating an appropriately indexed
// regMask for the gp and fp registers as we go.
//
// If name is specified, use it rather than the riscv reg number.
addreg := func(r int, name string) regMask {
mask := regMask(1) << uint(len(regNamesRISCV64))
if name == "" {
name = riscv.RegName(r)
}
regNamesRISCV64 = append(regNamesRISCV64, name)
regNamed[name] = mask
return mask
}
// General purpose registers.
for r := riscv.REG_X0; r <= riscv.REG_X31; r++ {
if r == riscv.REG_LR {
// LR is not used by regalloc, so we skip it to leave
// room for pseudo-register SB.
continue
}
mask := addreg(r, "")
// Add general purpose registers to gpMask.
switch r {
// ZERO, g, and TMP are not in any gp mask.
case riscv.REG_ZERO, riscv.REG_G, riscv.REG_TMP:
case riscv.REG_SP:
gpspMask |= mask
gpspsbMask |= mask
default:
gpMask |= mask
gpspMask |= mask
gpspsbMask |= mask
}
}
// Floating pointer registers.
for r := riscv.REG_F0; r <= riscv.REG_F31; r++ {
mask := addreg(r, "")
fpMask |= mask
}
// Pseudo-register: SB
mask := addreg(-1, "SB")
gpspsbMask |= mask
if len(regNamesRISCV64) > 64 {
// regMask is only 64 bits.
panic("Too many RISCV64 registers")
}
regCtxt := regNamed["X20"]
callerSave := gpMask | fpMask | regNamed["g"]
var (
gpstore = regInfo{inputs: []regMask{gpspsbMask, gpspMask, 0}} // SB in first input so we can load from a global, but not in second to avoid using SB as a temporary register
gp01 = regInfo{outputs: []regMask{gpMask}}
gp11 = regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}
gp21 = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask}}
gpload = regInfo{inputs: []regMask{gpspsbMask, 0}, outputs: []regMask{gpMask}}
gp11sb = regInfo{inputs: []regMask{gpspsbMask}, outputs: []regMask{gpMask}}
fp11 = regInfo{inputs: []regMask{fpMask}, outputs: []regMask{fpMask}}
fp21 = regInfo{inputs: []regMask{fpMask, fpMask}, outputs: []regMask{fpMask}}
gpfp = regInfo{inputs: []regMask{gpMask}, outputs: []regMask{fpMask}}
fpgp = regInfo{inputs: []regMask{fpMask}, outputs: []regMask{gpMask}}
fpstore = regInfo{inputs: []regMask{gpspsbMask, fpMask, 0}}
fpload = regInfo{inputs: []regMask{gpspsbMask, 0}, outputs: []regMask{fpMask}}
fp2gp = regInfo{inputs: []regMask{fpMask, fpMask}, outputs: []regMask{gpMask}}
call = regInfo{clobbers: callerSave}
callClosure = regInfo{inputs: []regMask{gpspMask, regCtxt, 0}, clobbers: callerSave}
callInter = regInfo{inputs: []regMask{gpMask}, clobbers: callerSave}
)
RISCV64ops := []opData{
{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1
{name: "ADDI", argLength: 1, reg: gp11sb, asm: "ADDI", aux: "Int64"}, // arg0 + auxint
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1
// M extension. H means high (i.e., it returns the top bits of
// the result). U means unsigned. W means word (i.e., 32-bit).
{name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true, typ: "Int64"}, // arg0 * arg1
{name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true, typ: "Int32"},
{name: "MULH", argLength: 2, reg: gp21, asm: "MULH", commutative: true, typ: "Int64"},
{name: "MULHU", argLength: 2, reg: gp21, asm: "MULHU", commutative: true, typ: "UInt64"},
{name: "DIV", argLength: 2, reg: gp21, asm: "DIV", typ: "Int64"}, // arg0 / arg1
{name: "DIVU", argLength: 2, reg: gp21, asm: "DIVU", typ: "UInt64"},
{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},
{name: "DIVUW", argLength: 2, reg: gp21, asm: "DIVUW", typ: "UInt32"},
{name: "REM", argLength: 2, reg: gp21, asm: "REM", typ: "Int64"}, // arg0 % arg1
{name: "REMU", argLength: 2, reg: gp21, asm: "REMU", typ: "UInt64"},
{name: "REMW", argLength: 2, reg: gp21, asm: "REMW", typ: "Int32"},
{name: "REMUW", argLength: 2, reg: gp21, asm: "REMUW", typ: "UInt32"},
{name: "MOVaddr", argLength: 1, reg: gp11sb, asm: "MOV", aux: "SymOff", rematerializeable: true, symEffect: "RdWr"}, // arg0 + auxint + offset encoded in aux
// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
{name: "MOVBconst", reg: gp01, asm: "MOV", typ: "UInt8", aux: "Int8", rematerializeable: true}, // 8 low bits of auxint
{name: "MOVHconst", reg: gp01, asm: "MOV", typ: "UInt16", aux: "Int16", rematerializeable: true}, // 16 low bits of auxint
{name: "MOVWconst", reg: gp01, asm: "MOV", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint
{name: "MOVDconst", reg: gp01, asm: "MOV", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
// Loads: load <size> bits from arg0+auxint+aux and extend to 64 bits; arg1=mem
{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // 8 bits, sign extend
{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // 16 bits, sign extend
{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // 32 bits, sign extend
{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOV", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // 64 bits
{name: "MOVBUload", argLength: 2, reg: gpload, asm: "MOVBU", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // 8 bits, zero extend
{name: "MOVHUload", argLength: 2, reg: gpload, asm: "MOVHU", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // 16 bits, zero extend
{name: "MOVWUload", argLength: 2, reg: gpload, asm: "MOVWU", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // 32 bits, zero extend
// Stores: store <size> lowest bits in arg1 to arg0+auxint+aux; arg2=mem
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 8 bits
{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 16 bits
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 32 bits
{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOV", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 64 bits
// Shift ops
{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << aux1
{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> aux1, signed
{name: "SRL", argLength: 2, reg: gp21, asm: "SRL"}, // arg0 >> aux1, unsigned
{name: "SLLI", argLength: 1, reg: gp11, asm: "SLLI", aux: "Int64"}, // arg0 << auxint
{name: "SRAI", argLength: 1, reg: gp11, asm: "SRAI", aux: "Int64"}, // arg0 >> auxint, signed
{name: "SRLI", argLength: 1, reg: gp11, asm: "SRLI", aux: "Int64"}, // arg0 >> auxint, unsigned
// Bitwise ops
{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true}, // arg0 ^ arg1
{name: "XORI", argLength: 1, reg: gp11, asm: "XORI", aux: "Int64"}, // arg0 ^ auxint
{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1
{name: "ORI", argLength: 1, reg: gp11, asm: "ORI", aux: "Int64"}, // arg0 | auxint
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
{name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint
// Generate boolean values
{name: "SEQZ", argLength: 1, reg: gp11, asm: "SEQZ"}, // arg0 == 0, result is 0 or 1
{name: "SNEZ", argLength: 1, reg: gp11, asm: "SNEZ"}, // arg0 != 0, result is 0 or 1
{name: "SLT", argLength: 2, reg: gp21, asm: "SLT"}, // arg0 < arg1, result is 0 or 1
{name: "SLTI", argLength: 1, reg: gp11, asm: "SLTI", aux: "Int64"}, // arg0 < auxint, result is 0 or 1
{name: "SLTU", argLength: 2, reg: gp21, asm: "SLTU"}, // arg0 < arg1, unsigned, result is 0 or 1
{name: "SLTIU", argLength: 1, reg: gp11, asm: "SLTIU", aux: "Int64"}, // arg0 < auxint, unsigned, result is 0 or 1
// MOVconvert converts between pointers and integers.
// We have a special op for this so as to not confuse GC
// (particularly stack maps). It takes a memory arg so it
// gets correctly ordered with respect to GC safepoints.
{name: "MOVconvert", argLength: 2, reg: gp11, asm: "MOV"}, // arg0, but converted to int/ptr as appropriate; arg1=mem
// Calls
{name: "CALLstatic", argLength: 1, reg: call, aux: "SymOff", call: true, symEffect: "None"}, // call static function aux.(*gc.Sym). arg0=mem, auxint=argsize, returns mem
{name: "CALLclosure", argLength: 3, reg: callClosure, aux: "Int64", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
{name: "CALLinter", argLength: 2, reg: callInter, aux: "Int64", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
// Generic moves and zeros
// general unaligned zeroing
// arg0 = address of memory to zero (in X5, changed as side effect)
// arg1 = address of the last element to zero (inclusive)
// arg2 = mem
// auxint = element size
// returns mem
// mov ZERO, (X5)
// ADD $sz, X5
// BGEU Rarg1, X5, -2(PC)
{
name: "LoweredZero",
aux: "Int64",
argLength: 3,
reg: regInfo{
inputs: []regMask{regNamed["X5"], gpMask},
clobbers: regNamed["X5"],
},
typ: "Mem",
faultOnNilArg0: true,
},
// general unaligned move
// arg0 = address of dst memory (in X5, changed as side effect)
// arg1 = address of src memory (in X6, changed as side effect)
// arg2 = address of the last element of src (can't be X7 as we clobber it before using arg2)
// arg3 = mem
// auxint = alignment
// clobbers X7 as a tmp register.
// returns mem
// mov (X6), X7
// mov X7, (X5)
// ADD $sz, X5
// ADD $sz, X6
// BGEU Rarg2, X5, -4(PC)
{
name: "LoweredMove",
aux: "Int64",
argLength: 4,
reg: regInfo{
inputs: []regMask{regNamed["X5"], regNamed["X6"], gpMask &^ regNamed["X7"]},
clobbers: regNamed["X5"] | regNamed["X6"] | regNamed["X7"],
},
typ: "Mem",
faultOnNilArg0: true,
faultOnNilArg1: true,
},
// Lowering pass-throughs
{name: "LoweredNilCheck", argLength: 2, faultOnNilArg0: true, nilCheck: true, reg: regInfo{inputs: []regMask{gpspMask}}}, // arg0=ptr,arg1=mem, returns void. Faults if ptr is nil.
{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{regCtxt}}}, // scheduler ensures only at beginning of entry block
// LoweredGetCallerSP returns the SP of the caller of the current function.
{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
// I.e., if f calls g "calls" getcallerpc,
// the result should be the PC within f that g will return to.
// See runtime/stubs.go for a more detailed discussion.
{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary,
// but clobbers RA (LR) because it's a call
// and T6 (REG_TMP).
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{regNamed["X5"], regNamed["X6"]}, clobbers: (callerSave &^ (gpMask | regNamed["g"])) | regNamed["X1"]}, clobberFlags: true, aux: "Sym", symEffect: "None"},
// There are three of these functions so that they can have three different register inputs.
// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
// default registers to match so we don't need to copy registers around unnecessarily.
{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X7"], regNamed["X28"]}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X6"], regNamed["X7"]}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X5"], regNamed["X6"]}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
// F extension.
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true, typ: "Float32"}, // arg0 + arg1
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS", commutative: false, typ: "Float32"}, // arg0 - arg1
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, typ: "Float32"}, // arg0 * arg1
{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", commutative: false, typ: "Float32"}, // arg0 / arg1
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"}, // sqrt(arg0)
{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"}, // -arg0
{name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"}, // reinterpret arg0 as float
{name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"}, // float32(low 32 bits of arg0)
{name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"}, // float32(arg0)
{name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"}, // int32(arg0)
{name: "FCVTLS", argLength: 1, reg: fpgp, asm: "FCVTLS", typ: "Int64"}, // int64(arg0)
{name: "FMOVWload", argLength: 2, reg: fpload, asm: "MOVF", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load float32 from arg0+auxint+aux
{name: "FMOVWstore", argLength: 3, reg: fpstore, asm: "MOVF", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store float32 to arg0+auxint+aux
{name: "FEQS", argLength: 2, reg: fp2gp, asm: "FEQS", commutative: true}, // arg0 == arg1
{name: "FNES", argLength: 2, reg: fp2gp, asm: "FNES", commutative: true}, // arg0 != arg1
{name: "FLTS", argLength: 2, reg: fp2gp, asm: "FLTS"}, // arg0 < arg1
{name: "FLES", argLength: 2, reg: fp2gp, asm: "FLES"}, // arg0 <= arg1
// D extension.
{name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true, typ: "Float64"}, // arg0 + arg1
{name: "FSUBD", argLength: 2, reg: fp21, asm: "FSUBD", commutative: false, typ: "Float64"}, // arg0 - arg1
{name: "FMULD", argLength: 2, reg: fp21, asm: "FMULD", commutative: true, typ: "Float64"}, // arg0 * arg1
{name: "FDIVD", argLength: 2, reg: fp21, asm: "FDIVD", commutative: false, typ: "Float64"}, // arg0 / arg1
{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD", typ: "Float64"}, // sqrt(arg0)
{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"}, // -arg0
{name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"}, // reinterpret arg0 as float
{name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"}, // float64(low 32 bits of arg0)
{name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"}, // float64(arg0)
{name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"}, // int32(arg0)
{name: "FCVTLD", argLength: 1, reg: fpgp, asm: "FCVTLD", typ: "Int64"}, // int64(arg0)
{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS", typ: "Float64"}, // float64(arg0)
{name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD", typ: "Float32"}, // float32(arg0)
{name: "FMOVDload", argLength: 2, reg: fpload, asm: "MOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load float64 from arg0+auxint+aux
{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store float6 to arg0+auxint+aux
{name: "FEQD", argLength: 2, reg: fp2gp, asm: "FEQD", commutative: true}, // arg0 == arg1
{name: "FNED", argLength: 2, reg: fp2gp, asm: "FNED", commutative: true}, // arg0 != arg1
{name: "FLTD", argLength: 2, reg: fp2gp, asm: "FLTD"}, // arg0 < arg1
{name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"}, // arg0 <= arg1
}
RISCV64blocks := []blockData{
{name: "BNE", controls: 1}, // Control != 0 (take a register)
}
archs = append(archs, arch{
name: "RISCV64",
pkg: "cmd/internal/obj/riscv",
genfile: "../../riscv64/ssa.go",
ops: RISCV64ops,
blocks: RISCV64blocks,
regnames: regNamesRISCV64,
gpregmask: gpMask,
fpregmask: fpMask,
framepointerreg: -1, // not used
})
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -66,7 +66,7 @@ func (op Op) isLoweredGetClosurePtr() bool {
switch op {
case OpAMD64LoweredGetClosurePtr, OpPPC64LoweredGetClosurePtr, OpARMLoweredGetClosurePtr, OpARM64LoweredGetClosurePtr,
Op386LoweredGetClosurePtr, OpMIPS64LoweredGetClosurePtr, OpS390XLoweredGetClosurePtr, OpMIPSLoweredGetClosurePtr,
OpWasmLoweredGetClosurePtr:
OpRISCV64LoweredGetClosurePtr, OpWasmLoweredGetClosurePtr:
return true
}
return false
@ -115,7 +115,7 @@ func schedule(f *Func) {
v.Op == OpARMLoweredNilCheck || v.Op == OpARM64LoweredNilCheck ||
v.Op == Op386LoweredNilCheck || v.Op == OpMIPS64LoweredNilCheck ||
v.Op == OpS390XLoweredNilCheck || v.Op == OpMIPSLoweredNilCheck ||
v.Op == OpWasmLoweredNilCheck:
v.Op == OpRISCV64LoweredNilCheck || v.Op == OpWasmLoweredNilCheck:
// Nil checks must come before loads from the same address.
score[v.ID] = ScoreNilCheck
case v.Op == OpPhi:

View File

@ -12,6 +12,7 @@ import (
"cmd/compile/internal/mips"
"cmd/compile/internal/mips64"
"cmd/compile/internal/ppc64"
"cmd/compile/internal/riscv64"
"cmd/compile/internal/s390x"
"cmd/compile/internal/wasm"
"cmd/compile/internal/x86"
@ -32,6 +33,7 @@ var archInits = map[string]func(*gc.Arch){
"mips64le": mips64.Init,
"ppc64": ppc64.Init,
"ppc64le": ppc64.Init,
"riscv64": riscv64.Init,
"s390x": s390x.Init,
"wasm": wasm.Init,
}

View File

@ -45,10 +45,11 @@ var bootstrapDirs = []string{
"cmd/compile/internal/mips",
"cmd/compile/internal/mips64",
"cmd/compile/internal/ppc64",
"cmd/compile/internal/types",
"cmd/compile/internal/riscv64",
"cmd/compile/internal/s390x",
"cmd/compile/internal/ssa",
"cmd/compile/internal/syntax",
"cmd/compile/internal/types",
"cmd/compile/internal/x86",
"cmd/compile/internal/wasm",
"cmd/internal/bio",

View File

@ -11,11 +11,11 @@ import (
)
func init() {
obj.RegisterRegister(obj.RBaseRISCV, REG_END, regName)
obj.RegisterRegister(obj.RBaseRISCV, REG_END, RegName)
obj.RegisterOpcode(obj.ABaseRISCV, Anames)
}
func regName(r int) string {
func RegName(r int) string {
switch {
case r == 0:
return "NONE"

View File

@ -487,8 +487,8 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) {
}
}
// invertBranch inverts the condition of a conditional branch.
func invertBranch(i obj.As) obj.As {
// InvertBranch inverts the condition of a conditional branch.
func InvertBranch(i obj.As) obj.As {
switch i {
case ABEQ:
return ABNE
@ -503,7 +503,7 @@ func invertBranch(i obj.As) obj.As {
case ABGEU:
return ABLTU
default:
panic("invertBranch: not a branch")
panic("InvertBranch: not a branch")
}
}
@ -800,7 +800,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
jmp.To = obj.Addr{Type: obj.TYPE_BRANCH}
jmp.Pcond = p.Pcond
p.As = invertBranch(p.As)
p.As = InvertBranch(p.As)
p.Pcond = jmp.Link
// We may have made previous branches too long,
@ -1005,7 +1005,7 @@ func wantImmU(p *obj.Prog, pos string, a obj.Addr, nbits uint) {
func wantReg(p *obj.Prog, pos string, descr string, r, min, max int16) {
if r < min || r > max {
p.Ctxt.Diag("%v\texpected %s register in %s position but got non-%s register %s", p, descr, pos, descr, regName(int(r)))
p.Ctxt.Diag("%v\texpected %s register in %s position but got non-%s register %s", p, descr, pos, descr, RegName(int(r)))
}
}