diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 5c7a024435..0759b7d10f 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -390,7 +390,12 @@ func TestARM64Errors(t *testing.T) { } func TestAMD64EndToEnd(t *testing.T) { - testEndToEnd(t, "amd64", "amd64") + defer func(old string) { objabi.GOAMD64 = old }(objabi.GOAMD64) + for _, goamd64 := range []string{"normaljumps", "alignedjumps"} { + t.Logf("GOAMD64=%s", goamd64) + objabi.GOAMD64 = goamd64 + testEndToEnd(t, "amd64", "amd64") + } } func Test386Encoder(t *testing.T) { diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go index 9e2b4f33b8..d22ee1d361 100644 --- a/src/cmd/dist/build.go +++ b/src/cmd/dist/build.go @@ -31,6 +31,7 @@ var ( goos string goarm string go386 string + goamd64 string gomips string gomips64 string goppc64 string @@ -151,6 +152,12 @@ func xinit() { } go386 = b + b = os.Getenv("GOAMD64") + if b == "" { + b = "alignedjumps" + } + goamd64 = b + b = os.Getenv("GOMIPS") if b == "" { b = "hardfloat" @@ -223,6 +230,7 @@ func xinit() { // For tools being invoked but also for os.ExpandEnv. os.Setenv("GO386", go386) + os.Setenv("GOAMD64", goamd64) os.Setenv("GOARCH", goarch) os.Setenv("GOARM", goarm) os.Setenv("GOHOSTARCH", gohostarch) @@ -1163,6 +1171,9 @@ func cmdenv() { if goarch == "386" { xprintf(format, "GO386", go386) } + if goarch == "amd64" { + xprintf(format, "GOAMD64", goamd64) + } if goarch == "mips" || goarch == "mipsle" { xprintf(format, "GOMIPS", gomips) } diff --git a/src/cmd/dist/buildruntime.go b/src/cmd/dist/buildruntime.go index 2744951597..f11933c925 100644 --- a/src/cmd/dist/buildruntime.go +++ b/src/cmd/dist/buildruntime.go @@ -42,6 +42,7 @@ func mkzversion(dir, file string) { // // const defaultGOROOT = // const defaultGO386 = +// const defaultGOAMD64 = // const defaultGOARM = // const defaultGOMIPS = // const defaultGOMIPS64 = @@ -71,6 +72,7 @@ func mkzbootstrap(file string) { fmt.Fprintf(&buf, "import \"runtime\"\n") fmt.Fprintln(&buf) fmt.Fprintf(&buf, "const defaultGO386 = `%s`\n", go386) + fmt.Fprintf(&buf, "const defaultGOAMD64 = `%s`\n", goamd64) fmt.Fprintf(&buf, "const defaultGOARM = `%s`\n", goarm) fmt.Fprintf(&buf, "const defaultGOMIPS = `%s`\n", gomips) fmt.Fprintf(&buf, "const defaultGOMIPS64 = `%s`\n", gomips64) diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go index 8fe48810e4..81b4687a6a 100644 --- a/src/cmd/go/alldocs.go +++ b/src/cmd/go/alldocs.go @@ -1754,6 +1754,9 @@ // GO386 // For GOARCH=386, the floating point instruction set. // Valid values are 387, sse2. +// GOAMD64 +// For GOARCH=amd64, jumps can be optionally be aligned such that they do not end on +// or cross 32 byte boundaries. Valid values are alignedjumps (default), normaljumps. // GOMIPS // For GOARCH=mips{,le}, whether to use floating point instructions. // Valid values are hardfloat (default), softfloat. diff --git a/src/cmd/go/internal/cfg/cfg.go b/src/cmd/go/internal/cfg/cfg.go index 7f8f8e92be..21f55e852f 100644 --- a/src/cmd/go/internal/cfg/cfg.go +++ b/src/cmd/go/internal/cfg/cfg.go @@ -241,6 +241,7 @@ var ( // Used in envcmd.MkEnv and build ID computations. GOARM = envOr("GOARM", fmt.Sprint(objabi.GOARM)) GO386 = envOr("GO386", objabi.GO386) + GOAMD64 = envOr("GOAMD64", objabi.GOAMD64) GOMIPS = envOr("GOMIPS", objabi.GOMIPS) GOMIPS64 = envOr("GOMIPS64", objabi.GOMIPS64) GOPPC64 = envOr("GOPPC64", fmt.Sprintf("%s%d", "power", objabi.GOPPC64)) @@ -266,6 +267,8 @@ func GetArchEnv() (key, val string) { return "GOARM", GOARM case "386": return "GO386", GO386 + case "amd64": + return "GOAMD64", GOAMD64 case "mips", "mipsle": return "GOMIPS", GOMIPS case "mips64", "mips64le": diff --git a/src/cmd/go/internal/help/helpdoc.go b/src/cmd/go/internal/help/helpdoc.go index 693de8ff49..9583b3f327 100644 --- a/src/cmd/go/internal/help/helpdoc.go +++ b/src/cmd/go/internal/help/helpdoc.go @@ -582,6 +582,9 @@ Architecture-specific environment variables: GO386 For GOARCH=386, the floating point instruction set. Valid values are 387, sse2. + GOAMD64 + For GOARCH=amd64, jumps can be optionally be aligned such that they do not end on + or cross 32 byte boundaries. Valid values are alignedjumps (default), normaljumps. GOMIPS For GOARCH=mips{,le}, whether to use floating point instructions. Valid values are hardfloat (default), softfloat. diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 5597aab019..3eaed2ab54 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -1838,6 +1838,12 @@ func fillnop(p []byte, n int) { } } +func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { + s.Grow(int64(c) + int64(pad)) + fillnop(s.P[c:], int(pad)) + return c + pad +} + func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { return l @@ -1845,7 +1851,197 @@ func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { return q } +// If the environment variable GOAMD64=alignedjumps the assembler will ensure that +// no standalone or macro-fused jump will straddle or end on a 32 byte boundary +// by inserting NOPs before the jumps +func isJump(p *obj.Prog) bool { + return p.Pcond != nil || p.As == obj.AJMP || p.As == obj.ACALL || + p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO +} + +// lookForJCC returns the first real instruction starting from p, if that instruction is a conditional +// jump. Otherwise, nil is returned. +func lookForJCC(p *obj.Prog) *obj.Prog { + // Skip any PCDATA, FUNCDATA or NOP instructions + var q *obj.Prog + for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { + } + + if q == nil || q.Pcond == nil || p.As == obj.AJMP || p.As == obj.ACALL { + return nil + } + + switch q.As { + case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, + AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: + default: + return nil + } + + return q +} + +// fusedJump determines whether p can be fused with a subsequent conditional jump instruction. +// If it can, we return true followed by the total size of the fused jump. If it can't, we return false. +// Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. +func fusedJump(p *obj.Prog) (bool, uint8) { + var fusedSize uint8 + + // The first instruction in a macro fused pair may be preceeded by the LOCK prefix, + // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we + // need to be careful to insert any padding before the locks rather than directly after them. + + if p.As == AXRELEASE || p.As == AXACQUIRE { + fusedSize += p.Isize + for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { + } + if p == nil { + return false, 0 + } + } + if p.As == ALOCK { + fusedSize += p.Isize + for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { + } + if p == nil { + return false, 0 + } + } + cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW + + cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || + p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp + + testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || + p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW + + incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || + p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW + + if !cmpAddSub && !testAnd && !incDec { + return false, 0 + } + + if !incDec { + var argOne obj.AddrType + var argTwo obj.AddrType + if cmp { + argOne = p.From.Type + argTwo = p.To.Type + } else { + argOne = p.To.Type + argTwo = p.From.Type + } + if argOne == obj.TYPE_REG { + if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { + return false, 0 + } + } else if argOne == obj.TYPE_MEM { + if argTwo != obj.TYPE_REG { + return false, 0 + } + } else { + return false, 0 + } + } + + fusedSize += p.Isize + jmp := lookForJCC(p) + if jmp == nil { + return false, 0 + } + + fusedSize += jmp.Isize + + if testAnd { + return true, fusedSize + } + + if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || + jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { + return false, 0 + } + + if cmpAddSub { + return true, fusedSize + } + + if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { + return false, 0 + } + + return true, fusedSize +} + +type padJumpsCtx int32 + +func makePjcCtx(ctxt *obj.Link) padJumpsCtx { + // Disable jump padding on 32 bit builds by settting + // padJumps to 0. + if ctxt.Arch.Family == sys.I386 { + return padJumpsCtx(0) + } + + // Disable jump padding for hand written assembly code. + if ctxt.IsAsm { + return padJumpsCtx(0) + } + + if objabi.GOAMD64 != "alignedjumps" { + return padJumpsCtx(0) + + } + + return padJumpsCtx(32) +} + +// padJump detects whether the instruction being assembled is a standalone or a macro-fused +// jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does +// not cross or end on a 32 byte boundary. +func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { + if pjc == 0 { + return c + } + + var toPad int32 + fj, fjSize := fusedJump(p) + mask := int32(pjc - 1) + if fj { + if (c&mask)+int32(fjSize) >= int32(pjc) { + toPad = int32(pjc) - (c & mask) + } + } else if isJump(p) { + if (c&mask)+int32(p.Isize) >= int32(pjc) { + toPad = int32(pjc) - (c & mask) + } + } + if toPad <= 0 { + return c + } + + return noppad(ctxt, s, c, toPad) +} + +// reAssemble is called if an instruction's size changes during assembly. If +// it does and the instruction is a standalone or a macro-fused jump we need to +// reassemble. +func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { + if pjc == 0 { + return false + } + + fj, _ := fusedJump(p) + return fj || isJump(p) +} + +type nopPad struct { + p *obj.Prog // Instruction before the pad + n int32 // Size of the pad +} + func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { + pjc := makePjcCtx(ctxt) + if s.P != nil { return } @@ -1903,6 +2099,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var n int var c int32 errors := ctxt.Errors + var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) for { // This loop continues while there are reasons to re-assemble // whole block, like the presence of long forward jumps. @@ -1913,9 +2110,13 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { s.R = s.R[:0] s.P = s.P[:0] c = 0 + var pPrev *obj.Prog + nops = nops[:0] for p := s.Func.Text; p != nil; p = p.Link { + c0 := c + c = pjc.padJump(ctxt, s, p, c) - if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 { + if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { // pad with NOPs v := -c & (loopAlign - 1) @@ -1954,11 +2155,21 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { m := ab.Len() if int(p.Isize) != m { p.Isize = uint8(m) + if pjc.reAssemble(p) { + // We need to re-assemble here to check for jumps and fused jumps + // that span or end on 32 byte boundaries. + reAssemble = true + } } s.Grow(p.Pc + int64(m)) copy(s.P[p.Pc:], ab.Bytes()) + // If there was padding, remember it. + if pPrev != nil && !ctxt.IsAsm && c > c0 { + nops = append(nops, nopPad{p: pPrev, n: c - c0}) + } c += int32(m) + pPrev = p } n++ @@ -1973,6 +2184,12 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { return } } + // splice padding nops into Progs + for _, n := range nops { + pp := n.p + np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} + pp.Link = np + } s.Size = int64(c) diff --git a/src/cmd/internal/objabi/util.go b/src/cmd/internal/objabi/util.go index 8d05a6b735..72dd5856f8 100644 --- a/src/cmd/internal/objabi/util.go +++ b/src/cmd/internal/objabi/util.go @@ -25,6 +25,7 @@ var ( GOARCH = envOr("GOARCH", defaultGOARCH) GOOS = envOr("GOOS", defaultGOOS) GO386 = envOr("GO386", defaultGO386) + GOAMD64 = goamd64() GOARM = goarm() GOMIPS = gomips() GOMIPS64 = gomips64() @@ -39,6 +40,15 @@ const ( MachoRelocOffset = 2048 // reserve enough space for ELF relocations ) +func goamd64() string { + switch v := envOr("GOAMD64", defaultGOAMD64); v { + case "normaljumps", "alignedjumps": + return v + } + log.Fatalf("Invalid GOAMD64 value. Must be normaljumps or alignedjumps.") + panic("unreachable") +} + func goarm() int { switch v := envOr("GOARM", defaultGOARM); v { case "5": diff --git a/src/cmd/link/internal/amd64/obj.go b/src/cmd/link/internal/amd64/obj.go index 645547cb3e..1fa8cbb4d0 100644 --- a/src/cmd/link/internal/amd64/obj.go +++ b/src/cmd/link/internal/amd64/obj.go @@ -39,8 +39,13 @@ import ( func Init() (*sys.Arch, ld.Arch) { arch := sys.ArchAMD64 + fa := funcAlign + if objabi.GOAMD64 == "alignedjumps" { + fa = 32 + } + theArch := ld.Arch{ - Funcalign: funcAlign, + Funcalign: fa, Maxalign: maxAlign, Minalign: minAlign, Dwarfregsp: dwarfRegSP, diff --git a/src/internal/cfg/cfg.go b/src/internal/cfg/cfg.go index bdbe9df3e7..e40b7b4d1a 100644 --- a/src/internal/cfg/cfg.go +++ b/src/internal/cfg/cfg.go @@ -33,6 +33,7 @@ const KnownEnv = ` GCCGO GO111MODULE GO386 + GOAMD64 GOARCH GOARM GOBIN