mirror of
https://github.com/golang/go.git
synced 2025-05-31 23:25:39 +00:00
We already generate ADDL for byte operations, reflect this in code. This also allows inc/dec for +-1 operation, which are 1-byte shorter, and enables lea for 3-operand addition/subtraction. Change-Id: Ibfdfee50667ca4cd3c28f72e3dece0c6d114d3ae Reviewed-on: https://go-review.googlesource.com/21251 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Ilya Tocar <ilya.tocar@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
1223 lines
36 KiB
Go
1223 lines
36 KiB
Go
// Copyright 2016 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package amd64
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
|
|
"cmd/compile/internal/gc"
|
|
"cmd/compile/internal/ssa"
|
|
"cmd/internal/obj"
|
|
"cmd/internal/obj/x86"
|
|
)
|
|
|
|
// Smallest possible faulting page at address zero.
|
|
const minZeroPage = 4096
|
|
|
|
// ssaRegToReg maps ssa register numbers to obj register numbers.
|
|
var ssaRegToReg = []int16{
|
|
x86.REG_AX,
|
|
x86.REG_CX,
|
|
x86.REG_DX,
|
|
x86.REG_BX,
|
|
x86.REG_SP,
|
|
x86.REG_BP,
|
|
x86.REG_SI,
|
|
x86.REG_DI,
|
|
x86.REG_R8,
|
|
x86.REG_R9,
|
|
x86.REG_R10,
|
|
x86.REG_R11,
|
|
x86.REG_R12,
|
|
x86.REG_R13,
|
|
x86.REG_R14,
|
|
x86.REG_R15,
|
|
x86.REG_X0,
|
|
x86.REG_X1,
|
|
x86.REG_X2,
|
|
x86.REG_X3,
|
|
x86.REG_X4,
|
|
x86.REG_X5,
|
|
x86.REG_X6,
|
|
x86.REG_X7,
|
|
x86.REG_X8,
|
|
x86.REG_X9,
|
|
x86.REG_X10,
|
|
x86.REG_X11,
|
|
x86.REG_X12,
|
|
x86.REG_X13,
|
|
x86.REG_X14,
|
|
x86.REG_X15,
|
|
0, // SB isn't a real register. We fill an Addr.Reg field with 0 in this case.
|
|
}
|
|
|
|
// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
|
|
func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
|
|
flive := b.FlagsLiveAtEnd
|
|
if b.Control != nil && b.Control.Type.IsFlags() {
|
|
flive = true
|
|
}
|
|
for i := len(b.Values) - 1; i >= 0; i-- {
|
|
v := b.Values[i]
|
|
if flive && (v.Op == ssa.OpAMD64MOVBconst || v.Op == ssa.OpAMD64MOVWconst || v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
|
|
// The "mark" is any non-nil Aux value.
|
|
v.Aux = v
|
|
}
|
|
if v.Type.IsFlags() {
|
|
flive = false
|
|
}
|
|
for _, a := range v.Args {
|
|
if a.Type.IsFlags() {
|
|
flive = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// loadByType returns the load instruction of the given type.
|
|
func loadByType(t ssa.Type) obj.As {
|
|
// Avoid partial register write
|
|
if !t.IsFloat() && t.Size() <= 2 {
|
|
if t.Size() == 1 {
|
|
return x86.AMOVBLZX
|
|
} else {
|
|
return x86.AMOVWLZX
|
|
}
|
|
}
|
|
// Otherwise, there's no difference between load and store opcodes.
|
|
return storeByType(t)
|
|
}
|
|
|
|
// storeByType returns the store instruction of the given type.
|
|
func storeByType(t ssa.Type) obj.As {
|
|
width := t.Size()
|
|
if t.IsFloat() {
|
|
switch width {
|
|
case 4:
|
|
return x86.AMOVSS
|
|
case 8:
|
|
return x86.AMOVSD
|
|
}
|
|
} else {
|
|
switch width {
|
|
case 1:
|
|
return x86.AMOVB
|
|
case 2:
|
|
return x86.AMOVW
|
|
case 4:
|
|
return x86.AMOVL
|
|
case 8:
|
|
return x86.AMOVQ
|
|
}
|
|
}
|
|
panic("bad store type")
|
|
}
|
|
|
|
// moveByType returns the reg->reg move instruction of the given type.
|
|
func moveByType(t ssa.Type) obj.As {
|
|
if t.IsFloat() {
|
|
// Moving the whole sse2 register is faster
|
|
// than moving just the correct low portion of it.
|
|
// There is no xmm->xmm move with 1 byte opcode,
|
|
// so use movups, which has 2 byte opcode.
|
|
return x86.AMOVUPS
|
|
} else {
|
|
switch t.Size() {
|
|
case 1:
|
|
// Avoids partial register write
|
|
return x86.AMOVL
|
|
case 2:
|
|
return x86.AMOVL
|
|
case 4:
|
|
return x86.AMOVL
|
|
case 8:
|
|
return x86.AMOVQ
|
|
case 16:
|
|
return x86.AMOVUPS // int128s are in SSE registers
|
|
default:
|
|
panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
|
|
}
|
|
}
|
|
panic("bad register type")
|
|
}
|
|
|
|
// opregreg emits instructions for
|
|
// dest := dest(To) op src(From)
|
|
// and also returns the created obj.Prog so it
|
|
// may be further adjusted (offset, scale, etc).
|
|
func opregreg(op obj.As, dest, src int16) *obj.Prog {
|
|
p := gc.Prog(op)
|
|
p.From.Type = obj.TYPE_REG
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = dest
|
|
p.From.Reg = src
|
|
return p
|
|
}
|
|
|
|
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|
s.SetLineno(v.Line)
|
|
switch v.Op {
|
|
case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL, ssa.OpAMD64ADDW, ssa.OpAMD64ADDB:
|
|
r := gc.SSARegNum(v)
|
|
r1 := gc.SSARegNum(v.Args[0])
|
|
r2 := gc.SSARegNum(v.Args[1])
|
|
switch {
|
|
case r == r1:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = r2
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
case r == r2:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = r1
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
default:
|
|
var asm obj.As
|
|
if v.Op == ssa.OpAMD64ADDQ {
|
|
asm = x86.ALEAQ
|
|
} else {
|
|
asm = x86.ALEAL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = r1
|
|
p.From.Scale = 1
|
|
p.From.Index = r2
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
// 2-address opcode arithmetic, symmetric
|
|
case ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD,
|
|
ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB,
|
|
ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64ORW, ssa.OpAMD64ORB,
|
|
ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64XORW, ssa.OpAMD64XORB,
|
|
ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64MULW, ssa.OpAMD64MULB,
|
|
ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64PXOR:
|
|
r := gc.SSARegNum(v)
|
|
x := gc.SSARegNum(v.Args[0])
|
|
y := gc.SSARegNum(v.Args[1])
|
|
if x != r && y != r {
|
|
opregreg(moveByType(v.Type), r, x)
|
|
x = r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
if x == r {
|
|
p.From.Reg = y
|
|
} else {
|
|
p.From.Reg = x
|
|
}
|
|
// 2-address opcode arithmetic, not symmetric
|
|
case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64SUBW, ssa.OpAMD64SUBB:
|
|
r := gc.SSARegNum(v)
|
|
x := gc.SSARegNum(v.Args[0])
|
|
y := gc.SSARegNum(v.Args[1])
|
|
var neg bool
|
|
if y == r {
|
|
// compute -(y-x) instead
|
|
x, y = y, x
|
|
neg = true
|
|
}
|
|
if x != r {
|
|
opregreg(moveByType(v.Type), r, x)
|
|
}
|
|
opregreg(v.Op.Asm(), r, y)
|
|
|
|
if neg {
|
|
if v.Op == ssa.OpAMD64SUBQ {
|
|
p := gc.Prog(x86.ANEGQ)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
} else { // Avoids partial registers write
|
|
p := gc.Prog(x86.ANEGL)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
}
|
|
case ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD:
|
|
r := gc.SSARegNum(v)
|
|
x := gc.SSARegNum(v.Args[0])
|
|
y := gc.SSARegNum(v.Args[1])
|
|
if y == r && x != r {
|
|
// r/y := x op r/y, need to preserve x and rewrite to
|
|
// r/y := r/y op x15
|
|
x15 := int16(x86.REG_X15)
|
|
// register move y to x15
|
|
// register move x to y
|
|
// rename y with x15
|
|
opregreg(moveByType(v.Type), x15, y)
|
|
opregreg(moveByType(v.Type), r, x)
|
|
y = x15
|
|
} else if x != r {
|
|
opregreg(moveByType(v.Type), r, x)
|
|
}
|
|
opregreg(v.Op.Asm(), r, y)
|
|
|
|
case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW,
|
|
ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU,
|
|
ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW,
|
|
ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU:
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
// and AX is the only output
|
|
x := gc.SSARegNum(v.Args[1])
|
|
|
|
// CPU faults upon signed overflow, which occurs when most
|
|
// negative int is divided by -1.
|
|
var j *obj.Prog
|
|
if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
|
|
v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ ||
|
|
v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW {
|
|
|
|
var c *obj.Prog
|
|
switch v.Op {
|
|
case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ:
|
|
c = gc.Prog(x86.ACMPQ)
|
|
j = gc.Prog(x86.AJEQ)
|
|
// go ahead and sign extend to save doing it later
|
|
gc.Prog(x86.ACQO)
|
|
|
|
case ssa.OpAMD64DIVL, ssa.OpAMD64MODL:
|
|
c = gc.Prog(x86.ACMPL)
|
|
j = gc.Prog(x86.AJEQ)
|
|
gc.Prog(x86.ACDQ)
|
|
|
|
case ssa.OpAMD64DIVW, ssa.OpAMD64MODW:
|
|
c = gc.Prog(x86.ACMPW)
|
|
j = gc.Prog(x86.AJEQ)
|
|
gc.Prog(x86.ACWD)
|
|
}
|
|
c.From.Type = obj.TYPE_REG
|
|
c.From.Reg = x
|
|
c.To.Type = obj.TYPE_CONST
|
|
c.To.Offset = -1
|
|
|
|
j.To.Type = obj.TYPE_BRANCH
|
|
|
|
}
|
|
|
|
// for unsigned ints, we sign extend by setting DX = 0
|
|
// signed ints were sign extended above
|
|
if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU ||
|
|
v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU ||
|
|
v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU {
|
|
c := gc.Prog(x86.AXORQ)
|
|
c.From.Type = obj.TYPE_REG
|
|
c.From.Reg = x86.REG_DX
|
|
c.To.Type = obj.TYPE_REG
|
|
c.To.Reg = x86.REG_DX
|
|
}
|
|
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
|
|
// signed division, rest of the check for -1 case
|
|
if j != nil {
|
|
j2 := gc.Prog(obj.AJMP)
|
|
j2.To.Type = obj.TYPE_BRANCH
|
|
|
|
var n *obj.Prog
|
|
if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
|
|
v.Op == ssa.OpAMD64DIVW {
|
|
// n * -1 = -n
|
|
n = gc.Prog(x86.ANEGQ)
|
|
n.To.Type = obj.TYPE_REG
|
|
n.To.Reg = x86.REG_AX
|
|
} else {
|
|
// n % -1 == 0
|
|
n = gc.Prog(x86.AXORQ)
|
|
n.From.Type = obj.TYPE_REG
|
|
n.From.Reg = x86.REG_DX
|
|
n.To.Type = obj.TYPE_REG
|
|
n.To.Reg = x86.REG_DX
|
|
}
|
|
|
|
j.To.Val = n
|
|
j2.To.Val = s.Pc()
|
|
}
|
|
|
|
case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
|
|
ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
|
|
// the frontend rewrites constant division by 8/16/32 bit integers into
|
|
// HMUL by a constant
|
|
// SSA rewrites generate the 64 bit versions
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
// and DX is the only output we care about (the high bits)
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[1])
|
|
|
|
// IMULB puts the high portion in AH instead of DL,
|
|
// so move it to DL for consistency
|
|
if v.Type.Size() == 1 {
|
|
m := gc.Prog(x86.AMOVB)
|
|
m.From.Type = obj.TYPE_REG
|
|
m.From.Reg = x86.REG_AH
|
|
m.To.Type = obj.TYPE_REG
|
|
m.To.Reg = x86.REG_DX
|
|
}
|
|
|
|
case ssa.OpAMD64AVGQU:
|
|
// compute (x+y)/2 unsigned.
|
|
// Do a 64-bit add, the overflow goes into the carry.
|
|
// Shift right once and pull the carry back into the 63rd bit.
|
|
r := gc.SSARegNum(v)
|
|
x := gc.SSARegNum(v.Args[0])
|
|
y := gc.SSARegNum(v.Args[1])
|
|
if x != r && y != r {
|
|
opregreg(moveByType(v.Type), r, x)
|
|
x = r
|
|
}
|
|
p := gc.Prog(x86.AADDQ)
|
|
p.From.Type = obj.TYPE_REG
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
if x == r {
|
|
p.From.Reg = y
|
|
} else {
|
|
p.From.Reg = x
|
|
}
|
|
p = gc.Prog(x86.ARCRQ)
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = 1
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
|
|
case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB,
|
|
ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
|
|
ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB:
|
|
x := gc.SSARegNum(v.Args[0])
|
|
r := gc.SSARegNum(v)
|
|
if x != r {
|
|
if r == x86.REG_CX {
|
|
v.Fatalf("can't implement %s, target and shift both in CX", v.LongString())
|
|
}
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[1]) // should be CX
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst, ssa.OpAMD64ADDBconst:
|
|
r := gc.SSARegNum(v)
|
|
a := gc.SSARegNum(v.Args[0])
|
|
if r == a {
|
|
if v.AuxInt == 1 {
|
|
var asm obj.As
|
|
// Software optimization manual recommends add $1,reg.
|
|
// But inc/dec is 1 byte smaller. ICC always uses inc
|
|
// Clang/GCC choose depending on flags, but prefer add.
|
|
// Experiments show that inc/dec is both a little faster
|
|
// and make a binary a little smaller.
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
|
asm = x86.AINCQ
|
|
} else {
|
|
asm = x86.AINCL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
return
|
|
} else if v.AuxInt == -1 {
|
|
var asm obj.As
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
|
asm = x86.ADECQ
|
|
} else {
|
|
asm = x86.ADECL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
return
|
|
} else {
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
return
|
|
}
|
|
}
|
|
var asm obj.As
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
|
asm = x86.ALEAQ
|
|
} else {
|
|
asm = x86.ALEAL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = a
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
|
|
case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst,
|
|
ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
|
|
r := gc.SSARegNum(v)
|
|
x := gc.SSARegNum(v.Args[0])
|
|
// Arg0 is in/out, move in to out if not already same
|
|
if r != x {
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
|
|
// Constant into AX, after arg0 movement in case arg0 is in AX
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = x86.REG_AX
|
|
|
|
p = gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x86.REG_AX
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
|
|
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst:
|
|
r := gc.SSARegNum(v)
|
|
x := gc.SSARegNum(v.Args[0])
|
|
if r != x {
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
|
|
// instead of using the MOVQ above.
|
|
//p.From3 = new(obj.Addr)
|
|
//p.From3.Type = obj.TYPE_REG
|
|
//p.From3.Reg = gc.SSARegNum(v.Args[0])
|
|
case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst, ssa.OpAMD64SUBBconst:
|
|
x := gc.SSARegNum(v.Args[0])
|
|
r := gc.SSARegNum(v)
|
|
// We have 3-op add (lea), so transforming a = b - const into
|
|
// a = b + (- const), saves us 1 instruction. We can't fit
|
|
// - (-1 << 31) into 4 bytes offset in lea.
|
|
// We handle 2-address just fine below.
|
|
if v.AuxInt == -1<<31 || x == r {
|
|
if x != r {
|
|
// This code compensates for the fact that the register allocator
|
|
// doesn't understand 2-address instructions yet. TODO: fix that.
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
} else if x == r && v.AuxInt == -1 {
|
|
var asm obj.As
|
|
// x = x - (-1) is the same as x++
|
|
// See OpAMD64ADDQconst comments about inc vs add $1,reg
|
|
if v.Op == ssa.OpAMD64SUBQconst {
|
|
asm = x86.AINCQ
|
|
} else {
|
|
asm = x86.AINCL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
} else if x == r && v.AuxInt == 1 {
|
|
var asm obj.As
|
|
if v.Op == ssa.OpAMD64SUBQconst {
|
|
asm = x86.ADECQ
|
|
} else {
|
|
asm = x86.ADECL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
} else {
|
|
var asm obj.As
|
|
if v.Op == ssa.OpAMD64SUBQconst {
|
|
asm = x86.ALEAQ
|
|
} else {
|
|
asm = x86.ALEAL
|
|
}
|
|
p := gc.Prog(asm)
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = x
|
|
p.From.Offset = -v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
|
|
case ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDWconst, ssa.OpAMD64ANDBconst,
|
|
ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORWconst, ssa.OpAMD64ORBconst,
|
|
ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORWconst, ssa.OpAMD64XORBconst,
|
|
ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst,
|
|
ssa.OpAMD64SHLBconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst,
|
|
ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst,
|
|
ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst,
|
|
ssa.OpAMD64ROLBconst:
|
|
// This code compensates for the fact that the register allocator
|
|
// doesn't understand 2-address instructions yet. TODO: fix that.
|
|
x := gc.SSARegNum(v.Args[0])
|
|
r := gc.SSARegNum(v)
|
|
if x != r {
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
|
|
r := gc.SSARegNum(v)
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = r
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
|
|
r := gc.SSARegNum(v.Args[0])
|
|
i := gc.SSARegNum(v.Args[1])
|
|
p := gc.Prog(x86.ALEAQ)
|
|
switch v.Op {
|
|
case ssa.OpAMD64LEAQ1:
|
|
p.From.Scale = 1
|
|
if i == x86.REG_SP {
|
|
r, i = i, r
|
|
}
|
|
case ssa.OpAMD64LEAQ2:
|
|
p.From.Scale = 2
|
|
case ssa.OpAMD64LEAQ4:
|
|
p.From.Scale = 4
|
|
case ssa.OpAMD64LEAQ8:
|
|
p.From.Scale = 8
|
|
}
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = r
|
|
p.From.Index = i
|
|
gc.AddAux(&p.From, v)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64LEAQ:
|
|
p := gc.Prog(x86.ALEAQ)
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.From, v)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
|
|
ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB:
|
|
opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0]))
|
|
case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
|
|
// Go assembler has swapped operands for UCOMISx relative to CMP,
|
|
// must account for that right here.
|
|
opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]))
|
|
case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
p.To.Type = obj.TYPE_CONST
|
|
p.To.Offset = v.AuxInt
|
|
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
case ssa.OpAMD64MOVBconst, ssa.OpAMD64MOVWconst, ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
|
|
x := gc.SSARegNum(v)
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = v.AuxInt
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = x
|
|
// If flags are live at this instruction, suppress the
|
|
// MOV $0,AX -> XOR AX,AX optimization.
|
|
if v.Aux != nil {
|
|
p.Mark |= x86.PRESERVEFLAGS
|
|
}
|
|
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
|
|
x := gc.SSARegNum(v)
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_FCONST
|
|
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = x
|
|
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.From, v)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.From, v)
|
|
p.From.Scale = 8
|
|
p.From.Index = gc.SSARegNum(v.Args[1])
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.From, v)
|
|
p.From.Scale = 4
|
|
p.From.Index = gc.SSARegNum(v.Args[1])
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64MOVWloadidx2:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.From, v)
|
|
p.From.Scale = 2
|
|
p.From.Index = gc.SSARegNum(v.Args[1])
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
|
|
r := gc.SSARegNum(v.Args[0])
|
|
i := gc.SSARegNum(v.Args[1])
|
|
if i == x86.REG_SP {
|
|
r, i = i, r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = r
|
|
p.From.Scale = 1
|
|
p.From.Index = i
|
|
gc.AddAux(&p.From, v)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[1])
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.To, v)
|
|
case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[2])
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
p.To.Scale = 8
|
|
p.To.Index = gc.SSARegNum(v.Args[1])
|
|
gc.AddAux(&p.To, v)
|
|
case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[2])
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
p.To.Scale = 4
|
|
p.To.Index = gc.SSARegNum(v.Args[1])
|
|
gc.AddAux(&p.To, v)
|
|
case ssa.OpAMD64MOVWstoreidx2:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[2])
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
p.To.Scale = 2
|
|
p.To.Index = gc.SSARegNum(v.Args[1])
|
|
gc.AddAux(&p.To, v)
|
|
case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
|
|
r := gc.SSARegNum(v.Args[0])
|
|
i := gc.SSARegNum(v.Args[1])
|
|
if i == x86.REG_SP {
|
|
r, i = i, r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[2])
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = r
|
|
p.To.Scale = 1
|
|
p.To.Index = i
|
|
gc.AddAux(&p.To, v)
|
|
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
sc := v.AuxValAndOff()
|
|
p.From.Offset = sc.Val()
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux2(&p.To, v, sc.Off())
|
|
case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_CONST
|
|
sc := v.AuxValAndOff()
|
|
p.From.Offset = sc.Val()
|
|
r := gc.SSARegNum(v.Args[0])
|
|
i := gc.SSARegNum(v.Args[1])
|
|
switch v.Op {
|
|
case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
|
|
p.To.Scale = 1
|
|
if i == x86.REG_SP {
|
|
r, i = i, r
|
|
}
|
|
case ssa.OpAMD64MOVWstoreconstidx2:
|
|
p.To.Scale = 2
|
|
case ssa.OpAMD64MOVLstoreconstidx4:
|
|
p.To.Scale = 4
|
|
case ssa.OpAMD64MOVQstoreconstidx8:
|
|
p.To.Scale = 8
|
|
}
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = r
|
|
p.To.Index = i
|
|
gc.AddAux2(&p.To, v, sc.Off())
|
|
case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
|
|
ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD,
|
|
ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
|
|
ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
|
|
opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0]))
|
|
case ssa.OpAMD64DUFFZERO:
|
|
p := gc.Prog(obj.ADUFFZERO)
|
|
p.To.Type = obj.TYPE_ADDR
|
|
p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
|
|
p.To.Offset = v.AuxInt
|
|
case ssa.OpAMD64MOVOconst:
|
|
if v.AuxInt != 0 {
|
|
v.Unimplementedf("MOVOconst can only do constant=0")
|
|
}
|
|
r := gc.SSARegNum(v)
|
|
opregreg(x86.AXORPS, r, r)
|
|
case ssa.OpAMD64DUFFCOPY:
|
|
p := gc.Prog(obj.ADUFFCOPY)
|
|
p.To.Type = obj.TYPE_ADDR
|
|
p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
|
|
p.To.Offset = v.AuxInt
|
|
|
|
case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
|
|
if v.Type.IsMemory() {
|
|
return
|
|
}
|
|
x := gc.SSARegNum(v.Args[0])
|
|
y := gc.SSARegNum(v)
|
|
if x != y {
|
|
opregreg(moveByType(v.Type), y, x)
|
|
}
|
|
case ssa.OpLoadReg:
|
|
if v.Type.IsFlags() {
|
|
v.Unimplementedf("load flags not implemented: %v", v.LongString())
|
|
return
|
|
}
|
|
p := gc.Prog(loadByType(v.Type))
|
|
n, off := gc.AutoVar(v.Args[0])
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Node = n
|
|
p.From.Sym = gc.Linksym(n.Sym)
|
|
p.From.Offset = off
|
|
if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
|
|
p.From.Name = obj.NAME_PARAM
|
|
p.From.Offset += n.Xoffset
|
|
} else {
|
|
p.From.Name = obj.NAME_AUTO
|
|
}
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
|
|
case ssa.OpStoreReg:
|
|
if v.Type.IsFlags() {
|
|
v.Unimplementedf("store flags not implemented: %v", v.LongString())
|
|
return
|
|
}
|
|
p := gc.Prog(storeByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
n, off := gc.AutoVar(v)
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Node = n
|
|
p.To.Sym = gc.Linksym(n.Sym)
|
|
p.To.Offset = off
|
|
if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
|
|
p.To.Name = obj.NAME_PARAM
|
|
p.To.Offset += n.Xoffset
|
|
} else {
|
|
p.To.Name = obj.NAME_AUTO
|
|
}
|
|
case ssa.OpPhi:
|
|
// just check to make sure regalloc and stackalloc did it right
|
|
if v.Type.IsMemory() {
|
|
return
|
|
}
|
|
f := v.Block.Func
|
|
loc := f.RegAlloc[v.ID]
|
|
for _, a := range v.Args {
|
|
if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead?
|
|
v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func)
|
|
}
|
|
}
|
|
case ssa.OpInitMem:
|
|
// memory arg needs no code
|
|
case ssa.OpArg:
|
|
// input args need no code
|
|
case ssa.OpAMD64LoweredGetClosurePtr:
|
|
// Output is hardwired to DX only,
|
|
// and DX contains the closure pointer on
|
|
// closure entry, and this "instruction"
|
|
// is scheduled to the very beginning
|
|
// of the entry block.
|
|
case ssa.OpAMD64LoweredGetG:
|
|
r := gc.SSARegNum(v)
|
|
// See the comments in cmd/internal/obj/x86/obj6.go
|
|
// near CanUse1InsnTLS for a detailed explanation of these instructions.
|
|
if x86.CanUse1InsnTLS(gc.Ctxt) {
|
|
// MOVQ (TLS), r
|
|
p := gc.Prog(x86.AMOVQ)
|
|
p.From.Type = obj.TYPE_MEM
|
|
p.From.Reg = x86.REG_TLS
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
} else {
|
|
// MOVQ TLS, r
|
|
// MOVQ (r)(TLS*1), r
|
|
p := gc.Prog(x86.AMOVQ)
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x86.REG_TLS
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
q := gc.Prog(x86.AMOVQ)
|
|
q.From.Type = obj.TYPE_MEM
|
|
q.From.Reg = r
|
|
q.From.Index = x86.REG_TLS
|
|
q.From.Scale = 1
|
|
q.To.Type = obj.TYPE_REG
|
|
q.To.Reg = r
|
|
}
|
|
case ssa.OpAMD64CALLstatic:
|
|
if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym {
|
|
// Deferred calls will appear to be returning to
|
|
// the CALL deferreturn(SB) that we are about to emit.
|
|
// However, the stack trace code will show the line
|
|
// of the instruction byte before the return PC.
|
|
// To avoid that being an unrelated instruction,
|
|
// insert an actual hardware NOP that will have the right line number.
|
|
// This is different from obj.ANOP, which is a virtual no-op
|
|
// that doesn't make it into the instruction stream.
|
|
ginsnop()
|
|
}
|
|
p := gc.Prog(obj.ACALL)
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Name = obj.NAME_EXTERN
|
|
p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
|
|
if gc.Maxarg < v.AuxInt {
|
|
gc.Maxarg = v.AuxInt
|
|
}
|
|
case ssa.OpAMD64CALLclosure:
|
|
p := gc.Prog(obj.ACALL)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
if gc.Maxarg < v.AuxInt {
|
|
gc.Maxarg = v.AuxInt
|
|
}
|
|
case ssa.OpAMD64CALLdefer:
|
|
p := gc.Prog(obj.ACALL)
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Name = obj.NAME_EXTERN
|
|
p.To.Sym = gc.Linksym(gc.Deferproc.Sym)
|
|
if gc.Maxarg < v.AuxInt {
|
|
gc.Maxarg = v.AuxInt
|
|
}
|
|
case ssa.OpAMD64CALLgo:
|
|
p := gc.Prog(obj.ACALL)
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Name = obj.NAME_EXTERN
|
|
p.To.Sym = gc.Linksym(gc.Newproc.Sym)
|
|
if gc.Maxarg < v.AuxInt {
|
|
gc.Maxarg = v.AuxInt
|
|
}
|
|
case ssa.OpAMD64CALLinter:
|
|
p := gc.Prog(obj.ACALL)
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
if gc.Maxarg < v.AuxInt {
|
|
gc.Maxarg = v.AuxInt
|
|
}
|
|
case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB,
|
|
ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
|
|
ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB:
|
|
x := gc.SSARegNum(v.Args[0])
|
|
r := gc.SSARegNum(v)
|
|
if x != r {
|
|
p := gc.Prog(moveByType(v.Type))
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
}
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = r
|
|
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW,
|
|
ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW,
|
|
ssa.OpAMD64SQRTSD:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
case ssa.OpSP, ssa.OpSB:
|
|
// nothing to do
|
|
case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
|
|
ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
|
|
ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
|
|
ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
|
|
ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
|
|
ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
|
|
ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
|
|
case ssa.OpAMD64SETNEF:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
q := gc.Prog(x86.ASETPS)
|
|
q.To.Type = obj.TYPE_REG
|
|
q.To.Reg = x86.REG_AX
|
|
// ORL avoids partial register write and is smaller than ORQ, used by old compiler
|
|
opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX)
|
|
|
|
case ssa.OpAMD64SETEQF:
|
|
p := gc.Prog(v.Op.Asm())
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = gc.SSARegNum(v)
|
|
q := gc.Prog(x86.ASETPC)
|
|
q.To.Type = obj.TYPE_REG
|
|
q.To.Reg = x86.REG_AX
|
|
// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
|
|
opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX)
|
|
|
|
case ssa.OpAMD64InvertFlags:
|
|
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
|
|
case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
|
|
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
|
|
case ssa.OpAMD64REPSTOSQ:
|
|
gc.Prog(x86.AREP)
|
|
gc.Prog(x86.ASTOSQ)
|
|
case ssa.OpAMD64REPMOVSQ:
|
|
gc.Prog(x86.AREP)
|
|
gc.Prog(x86.AMOVSQ)
|
|
case ssa.OpVarDef:
|
|
gc.Gvardef(v.Aux.(*gc.Node))
|
|
case ssa.OpVarKill:
|
|
gc.Gvarkill(v.Aux.(*gc.Node))
|
|
case ssa.OpVarLive:
|
|
gc.Gvarlive(v.Aux.(*gc.Node))
|
|
case ssa.OpAMD64LoweredNilCheck:
|
|
// Optimization - if the subsequent block has a load or store
|
|
// at the same address, we don't need to issue this instruction.
|
|
mem := v.Args[1]
|
|
for _, w := range v.Block.Succs[0].Values {
|
|
if w.Op == ssa.OpPhi {
|
|
if w.Type.IsMemory() {
|
|
mem = w
|
|
}
|
|
continue
|
|
}
|
|
if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() {
|
|
// w doesn't use a store - can't be a memory op.
|
|
continue
|
|
}
|
|
if w.Args[len(w.Args)-1] != mem {
|
|
v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w)
|
|
}
|
|
switch w.Op {
|
|
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload,
|
|
ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore,
|
|
ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
|
|
ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload,
|
|
ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore:
|
|
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
|
|
if gc.Debug_checknil != 0 && int(v.Line) > 1 {
|
|
gc.Warnl(v.Line, "removed nil check")
|
|
}
|
|
return
|
|
}
|
|
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
|
|
off := ssa.ValAndOff(v.AuxInt).Off()
|
|
if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage {
|
|
if gc.Debug_checknil != 0 && int(v.Line) > 1 {
|
|
gc.Warnl(v.Line, "removed nil check")
|
|
}
|
|
return
|
|
}
|
|
}
|
|
if w.Type.IsMemory() {
|
|
if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
|
|
// these ops are OK
|
|
mem = w
|
|
continue
|
|
}
|
|
// We can't delay the nil check past the next store.
|
|
break
|
|
}
|
|
}
|
|
// Issue a load which will fault if the input is nil.
|
|
// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
|
|
// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
|
|
// but it doesn't have false dependency on AX.
|
|
// Or maybe allocate an output register and use MOVL (reg),reg2 ?
|
|
// That trades clobbering flags for clobbering a register.
|
|
p := gc.Prog(x86.ATESTB)
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x86.REG_AX
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Reg = gc.SSARegNum(v.Args[0])
|
|
gc.AddAux(&p.To, v)
|
|
if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
|
|
gc.Warnl(v.Line, "generated nil check")
|
|
}
|
|
default:
|
|
v.Unimplementedf("genValue not implemented: %s", v.LongString())
|
|
}
|
|
}
|
|
|
|
var blockJump = [...]struct {
|
|
asm, invasm obj.As
|
|
}{
|
|
ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE},
|
|
ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ},
|
|
ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE},
|
|
ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT},
|
|
ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT},
|
|
ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE},
|
|
ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
|
|
ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
|
|
ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
|
|
ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
|
|
ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
|
|
ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
|
|
}
|
|
|
|
var eqfJumps = [2][2]gc.FloatingEQNEJump{
|
|
{{x86.AJNE, 1}, {x86.AJPS, 1}}, // next == b.Succs[0]
|
|
{{x86.AJNE, 1}, {x86.AJPC, 0}}, // next == b.Succs[1]
|
|
}
|
|
var nefJumps = [2][2]gc.FloatingEQNEJump{
|
|
{{x86.AJNE, 0}, {x86.AJPC, 1}}, // next == b.Succs[0]
|
|
{{x86.AJNE, 0}, {x86.AJPS, 0}}, // next == b.Succs[1]
|
|
}
|
|
|
|
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
|
|
s.SetLineno(b.Line)
|
|
|
|
switch b.Kind {
|
|
case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck:
|
|
if b.Succs[0] != next {
|
|
p := gc.Prog(obj.AJMP)
|
|
p.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]})
|
|
}
|
|
case ssa.BlockDefer:
|
|
// defer returns in rax:
|
|
// 0 if we should continue executing
|
|
// 1 if we should jump to deferreturn call
|
|
p := gc.Prog(x86.ATESTL)
|
|
p.From.Type = obj.TYPE_REG
|
|
p.From.Reg = x86.REG_AX
|
|
p.To.Type = obj.TYPE_REG
|
|
p.To.Reg = x86.REG_AX
|
|
p = gc.Prog(x86.AJNE)
|
|
p.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]})
|
|
if b.Succs[0] != next {
|
|
p := gc.Prog(obj.AJMP)
|
|
p.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]})
|
|
}
|
|
case ssa.BlockExit:
|
|
gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
|
|
case ssa.BlockRet:
|
|
gc.Prog(obj.ARET)
|
|
case ssa.BlockRetJmp:
|
|
p := gc.Prog(obj.AJMP)
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Name = obj.NAME_EXTERN
|
|
p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
|
|
|
|
case ssa.BlockAMD64EQF:
|
|
gc.SSAGenFPJump(s, b, next, &eqfJumps)
|
|
|
|
case ssa.BlockAMD64NEF:
|
|
gc.SSAGenFPJump(s, b, next, &nefJumps)
|
|
|
|
case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
|
|
ssa.BlockAMD64LT, ssa.BlockAMD64GE,
|
|
ssa.BlockAMD64LE, ssa.BlockAMD64GT,
|
|
ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
|
|
ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
|
|
jmp := blockJump[b.Kind]
|
|
likely := b.Likely
|
|
var p *obj.Prog
|
|
switch next {
|
|
case b.Succs[0]:
|
|
p = gc.Prog(jmp.invasm)
|
|
likely *= -1
|
|
p.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]})
|
|
case b.Succs[1]:
|
|
p = gc.Prog(jmp.asm)
|
|
p.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]})
|
|
default:
|
|
p = gc.Prog(jmp.asm)
|
|
p.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]})
|
|
q := gc.Prog(obj.AJMP)
|
|
q.To.Type = obj.TYPE_BRANCH
|
|
s.Branches = append(s.Branches, gc.Branch{q, b.Succs[1]})
|
|
}
|
|
|
|
// liblink reorders the instruction stream as it sees fit.
|
|
// Pass along what we know so liblink can make use of it.
|
|
// TODO: Once we've fully switched to SSA,
|
|
// make liblink leave our output alone.
|
|
switch likely {
|
|
case ssa.BranchUnlikely:
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = 0
|
|
case ssa.BranchLikely:
|
|
p.From.Type = obj.TYPE_CONST
|
|
p.From.Offset = 1
|
|
}
|
|
|
|
default:
|
|
b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
|
|
}
|
|
}
|