math/big: add all architectures to mini-compiler

Step 2 of the mini-compiler: add all the remaining architectures.

Change-Id: I8c5283aa8baa497785a5c15f2248528fa9ae886e
Reviewed-on: https://go-review.googlesource.com/c/go/+/664936
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Russ Cox <rsc@golang.org>
This commit is contained in:
Russ Cox 2025-04-10 19:16:22 -04:00 committed by Gopher Robot
parent 8cc98a04ef
commit 2a88106617
9 changed files with 630 additions and 0 deletions

View File

@ -0,0 +1,58 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
import "fmt"
var Arch386 = &Arch{
Name: "386",
WordBits: 32,
WordBytes: 4,
regs: []string{
"BX", "SI", "DI", "BP",
"CX", "DX", "AX", // last, to leave available for hinted allocation
},
op3: x86Op3,
hint: x86Hint,
memOK: true,
subCarryIsBorrow: true,
maxColumns: 1, // not enough registers for more
// Note: It would be nice to not set memIndex and then
// delete all the code in pipe.go that supports it.
// But a few routines, notably lshVU and mulAddVWW,
// benefit dramatically from the use of index registers.
// Perhaps some day we will decide 386 performance
// does not matter enough to keep this code.
memIndex: _386MemIndex,
mov: "MOVL",
adds: "ADDL",
adcs: "ADCL",
subs: "SUBL",
sbcs: "SBBL",
lsh: "SHLL",
lshd: "SHLL",
rsh: "SHRL",
rshd: "SHRL",
and: "ANDL",
or: "ORL",
xor: "XORL",
neg: "NEGL",
lea: "LEAL",
mulWideF: x86MulWide,
addWords: "LEAL (%[2]s)(%[1]s*4), %[3]s",
jmpZero: "TESTL %[1]s, %[1]s; JZ %[2]s",
jmpNonZero: "TESTL %[1]s, %[1]s; JNZ %[2]s",
loopBottom: "SUBL $1, %[1]s; JNZ %[2]s",
loopBottomNeg: "ADDL $1, %[1]s; JNZ %[2]s",
}
func _386MemIndex(a *Asm, off int, ix Reg, p RegPtr) Reg {
return Reg{fmt.Sprintf("%d(%s)(%s*%d)", off, p, ix, a.Arch.WordBytes)}
}

View File

@ -0,0 +1,146 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
var ArchAMD64 = &Arch{
Name: "amd64",
WordBits: 64,
WordBytes: 8,
regs: []string{
"BX", "SI", "DI",
"R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
"AX", "DX", "CX", // last to leave available for hinted allocation
},
op3: x86Op3,
hint: x86Hint,
memOK: true,
subCarryIsBorrow: true,
// Note: Not setting memIndex, because code generally runs faster
// if we avoid the use of scaled-index memory references,
// particularly in ADX instructions.
options: map[Option]func(*Asm, string){
OptionAltCarry: amd64JmpADX,
},
mov: "MOVQ",
adds: "ADDQ",
adcs: "ADCQ",
subs: "SUBQ",
sbcs: "SBBQ",
lsh: "SHLQ",
lshd: "SHLQ",
rsh: "SHRQ",
rshd: "SHRQ",
and: "ANDQ",
or: "ORQ",
xor: "XORQ",
neg: "NEGQ",
lea: "LEAQ",
addF: amd64Add,
mulWideF: x86MulWide,
addWords: "LEAQ (%[2]s)(%[1]s*8), %[3]s",
jmpZero: "TESTQ %[1]s, %[1]s; JZ %[2]s",
jmpNonZero: "TESTQ %[1]s, %[1]s; JNZ %[2]s",
loopBottom: "SUBQ $1, %[1]s; JNZ %[2]s",
loopBottomNeg: "ADDQ $1, %[1]s; JNZ %[2]s",
}
func amd64JmpADX(a *Asm, label string) {
a.Printf("\tCMPB ·hasADX(SB), $0; JNZ %s\n", label)
}
func amd64Add(a *Asm, src1, src2 Reg, dst Reg, carry Carry) bool {
if a.Enabled(OptionAltCarry) {
// If OptionAltCarry is enabled, the generator is emitting ADD instructions
// both with and without the AltCarry flag set; the AltCarry flag means to
// use ADOX. Otherwise we have to use ADCX.
// Using regular ADD/ADC would smash both carry flags,
// so we reject anything we can't handled with ADCX/ADOX.
if carry&UseCarry != 0 && carry&(SetCarry|SmashCarry) != 0 {
if carry&AltCarry != 0 {
a.op3("ADOXQ", src1, src2, dst)
} else {
a.op3("ADCXQ", src1, src2, dst)
}
return true
}
if carry&(SetCarry|UseCarry) == SetCarry && a.IsZero(src1) && src2 == dst {
// Clearing carry flag. Caller will add EOL comment.
a.Printf("\tTESTQ AX, AX\n")
return true
}
if carry != KeepCarry {
a.Fatalf("unsupported carry")
}
}
return false
}
// The x86-prefixed functions are shared with Arch386 in 386.go.
func x86Op3(name string) bool {
// As far as a.op3 is concerned, there are no 3-op instructions.
// (We print instructions like MULX ourselves.)
return false
}
func x86Hint(a *Asm, h Hint) string {
switch h {
case HintShiftCount:
return "CX"
case HintMulSrc:
if a.Enabled(OptionAltCarry) { // using MULX
return "DX"
}
return "AX"
case HintMulHi:
if a.Enabled(OptionAltCarry) { // using MULX
return ""
}
return "DX"
}
return ""
}
func x86Suffix(a *Asm) string {
// Note: Not using a.Arch == Arch386 to avoid init cycle.
if a.Arch.Name == "386" {
return "L"
}
return "Q"
}
func x86MulWide(a *Asm, src1, src2, dstlo, dsthi Reg) {
if a.Enabled(OptionAltCarry) {
// Using ADCX/ADOX; use MULX to avoid clearing carry flag.
if src1.name != "DX" {
if src2.name != "DX" {
a.Fatalf("mul src1 or src2 must be DX")
}
src2 = src1
}
a.Printf("\tMULXQ %s, %s, %s\n", src2, dstlo, dsthi)
return
}
if src1.name != "AX" {
if src2.name != "AX" {
a.Fatalf("mulwide src1 or src2 must be AX")
}
src2 = src1
}
if dstlo.name != "AX" {
a.Fatalf("mulwide dstlo must be AX")
}
if dsthi.name != "DX" {
a.Fatalf("mulwide dsthi must be DX")
}
a.Printf("\tMUL%s %s\n", x86Suffix(a), src2)
}

View File

@ -0,0 +1,111 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
var ArchARM64 = &Arch{
Name: "arm64",
WordBits: 64,
WordBytes: 8,
CarrySafeLoop: true,
regs: []string{
// R18 is the platform register.
// R27 is the assembler/linker temporary (which we could potentially use but don't).
// R28 is g.
// R29 is FP.
// R30 is LR.
"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9",
"R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R19",
"R20", "R21", "R22", "R23", "R24", "R25", "R26",
},
reg0: "ZR",
mov: "MOVD",
add: "ADD",
adds: "ADDS",
adc: "ADC",
adcs: "ADCS",
sub: "SUB",
subs: "SUBS",
sbc: "SBC",
sbcs: "SBCS",
mul: "MUL",
mulhi: "UMULH",
lsh: "LSL",
rsh: "LSR",
and: "AND",
or: "ORR",
xor: "EOR",
addWords: "ADD %[1]s<<3, %[2]s, %[3]s",
jmpZero: "CBZ %s, %s",
jmpNonZero: "CBNZ %s, %s",
loadIncN: arm64LoadIncN,
loadDecN: arm64LoadDecN,
storeIncN: arm64StoreIncN,
storeDecN: arm64StoreDecN,
}
func arm64LoadIncN(a *Asm, p RegPtr, regs []Reg) {
if len(regs) == 1 {
a.Printf("\tMOVD.P %d(%s), %s\n", a.Arch.WordBytes, p, regs[0])
return
}
a.Printf("\tLDP.P %d(%s), (%s, %s)\n", len(regs)*a.Arch.WordBytes, p, regs[0], regs[1])
var i int
for i = 2; i+2 <= len(regs); i += 2 {
a.Printf("\tLDP %d(%s), (%s, %s)\n", (i-len(regs))*a.Arch.WordBytes, p, regs[i], regs[i+1])
}
if i < len(regs) {
a.Printf("\tMOVD %d(%s), %s\n", -1*a.Arch.WordBytes, p, regs[i])
}
}
func arm64LoadDecN(a *Asm, p RegPtr, regs []Reg) {
if len(regs) == 1 {
a.Printf("\tMOVD.W -%d(%s), %s\n", a.Arch.WordBytes, p, regs[0])
return
}
a.Printf("\tLDP.W %d(%s), (%s, %s)\n", -len(regs)*a.Arch.WordBytes, p, regs[len(regs)-1], regs[len(regs)-2])
var i int
for i = 2; i+2 <= len(regs); i += 2 {
a.Printf("\tLDP %d(%s), (%s, %s)\n", i*a.Arch.WordBytes, p, regs[len(regs)-1-i], regs[len(regs)-2-i])
}
if i < len(regs) {
a.Printf("\tMOVD %d(%s), %s\n", i*a.Arch.WordBytes, p, regs[0])
}
}
func arm64StoreIncN(a *Asm, p RegPtr, regs []Reg) {
if len(regs) == 1 {
a.Printf("\tMOVD.P %s, %d(%s)\n", regs[0], a.Arch.WordBytes, p)
return
}
a.Printf("\tSTP.P (%s, %s), %d(%s)\n", regs[0], regs[1], len(regs)*a.Arch.WordBytes, p)
var i int
for i = 2; i+2 <= len(regs); i += 2 {
a.Printf("\tSTP (%s, %s), %d(%s)\n", regs[i], regs[i+1], (i-len(regs))*a.Arch.WordBytes, p)
}
if i < len(regs) {
a.Printf("\tMOVD %s, %d(%s)\n", regs[i], -1*a.Arch.WordBytes, p)
}
}
func arm64StoreDecN(a *Asm, p RegPtr, regs []Reg) {
if len(regs) == 1 {
a.Printf("\tMOVD.W %s, -%d(%s)\n", regs[0], a.Arch.WordBytes, p)
return
}
a.Printf("\tSTP.W (%s, %s), %d(%s)\n", regs[len(regs)-1], regs[len(regs)-2], -len(regs)*a.Arch.WordBytes, p)
var i int
for i = 2; i+2 <= len(regs); i += 2 {
a.Printf("\tSTP (%s, %s), %d(%s)\n", regs[len(regs)-1-i], regs[len(regs)-2-i], i*a.Arch.WordBytes, p)
}
if i < len(regs) {
a.Printf("\tMOVD %s, %d(%s)\n", regs[0], i*a.Arch.WordBytes, p)
}
}

View File

@ -0,0 +1,52 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
// This program can be compiled with -S to produce a “cheat sheet”
// for filling out a new Arch: the compiler will show you how to implement
// the various operations.
//
// Usage (replace TARGET with your target architecture):
//
// GOOS=linux GOARCH=TARGET go build -gcflags='-p=cheat -S' cheat.go
package p
import "math/bits"
func mov(x, y uint) uint { return y }
func zero() uint { return 0 }
func add(x, y uint) uint { return x + y }
func adds(x, y, c uint) (uint, uint) { return bits.Add(x, y, 0) }
func adcs(x, y, c uint) (uint, uint) { return bits.Add(x, y, c) }
func sub(x, y uint) uint { return x + y }
func subs(x, y uint) (uint, uint) { return bits.Sub(x, y, 0) }
func sbcs(x, y, c uint) (uint, uint) { return bits.Sub(x, y, c) }
func mul(x, y uint) uint { return x * y }
func mulWide(x, y uint) (uint, uint) { return bits.Mul(x, y) }
func lsh(x, s uint) uint { return x << s }
func rsh(x, s uint) uint { return x >> s }
func and(x, y uint) uint { return x & y }
func or(x, y uint) uint { return x | y }
func xor(x, y uint) uint { return x ^ y }
func neg(x uint) uint { return -x }
func loop(x int) int {
s := 0
for i := 1; i < x; i++ {
s += i
if s == 98 {
return 99
}
if s == 99 {
return 100
}
if s == 0 {
return 101
}
s += 2
}
return s
}
func mem(x *[10]struct{ a, b uint }, i int) uint { return x[i].b }

View File

@ -0,0 +1,45 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
var ArchLoong64 = &Arch{
Name: "loong64",
WordBits: 64,
WordBytes: 8,
CarrySafeLoop: true,
regs: []string{
// R0 is set to 0.
// R1 is LR.
// R2 is ???
// R3 is SP.
// R22 is g.
// R28 and R29 are our virtual carry flags.
// R30 is the linker/assembler temp, which we use too.
"R4", "R5", "R6", "R7", "R8", "R9",
"R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19",
"R20", "R21", "R23", "R24", "R25", "R26", "R27",
"R31",
},
reg0: "R0",
regCarry: "R28",
regAltCarry: "R29",
regTmp: "R30",
mov: "MOVV",
add: "ADDVU",
sub: "SUBVU",
sltu: "SGTU",
mul: "MULV",
mulhi: "MULHVU",
lsh: "SLLV",
rsh: "SRLV",
and: "AND",
or: "OR",
xor: "XOR",
jmpZero: "BEQ %s, %s",
jmpNonZero: "BNE %s, %s",
}

View File

@ -15,9 +15,16 @@
package asmgen
var arches = []*Arch{
Arch386,
ArchAMD64,
ArchARM,
ArchARM64,
ArchLoong64,
ArchMIPS,
ArchMIPS64x,
ArchPPC64x,
ArchRISCV64,
ArchS390X,
}
// generate returns the file name and content of the generated assembly for the given architecture.

View File

@ -0,0 +1,64 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
var ArchPPC64x = &Arch{
Name: "ppc64x",
Build: "ppc64 || ppc64le",
WordBits: 64,
WordBytes: 8,
CarrySafeLoop: true,
// Note: The old, hand-written ppc64x assembly used MOVDU
// to avoid explicit pointer updates in a few routines, but the new
// generated code runs just as fast, so we haven't bothered to try
// to add that back. (It's not trivial; you'd have to keep the pointers
// shifted one word in order to make the semantics work.)
//
// The old assembly also used some complex vector instructions
// to implement lshVU and rshVU, but the generated code that uses
// ordinary integer instructions is much faster than the vector code was,
// at least on the power10 gomote.
regs: []string{
// R0 is 0 by convention.
// R1 is SP.
// R2 is TOC.
// R30 is g.
// R31 is the assembler/linker temporary (which we use too).
"R3", "R4", "R5", "R6", "R7", "R8", "R9",
"R10", "R11", "R12" /*R13 is TLS*/, "R14", "R15", "R16", "R17", "R18", "R19",
"R20", "R21", "R22", "R23", "R24", "R25", "R26", "R27", "R28", "R29",
},
reg0: "R0",
regTmp: "R31",
// Note: Could write an addF and subF to use ADDZE and SUBZE,
// but we have R0 so it doesn't seem to matter much.
mov: "MOVD",
add: "ADD",
adds: "ADDC",
adcs: "ADDE",
sub: "SUB",
subs: "SUBC",
sbcs: "SUBE",
mul: "MULLD",
mulhi: "MULHDU",
lsh: "SLD",
rsh: "SRD",
and: "ANDCC", // regular AND does not accept immediates
or: "OR",
xor: "XOR",
jmpZero: "CMP %[1]s, $0; BEQ %[2]s",
jmpNonZero: "CMP %s, $0; BNE %s",
// Note: Using CTR means that we could free the count register
// during the loop body, but the portable logic doesn't know that,
// and we're not hurting for registers.
loopTop: "CMP %[1]s, $0; BEQ %[2]s; MOVD %[1]s, CTR",
loopBottom: "BDNZ %[2]s",
}

View File

@ -0,0 +1,47 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
var ArchRISCV64 = &Arch{
Name: "riscv64",
WordBits: 64,
WordBytes: 8,
CarrySafeLoop: true,
regs: []string{
// X0 is zero.
// X1 is LR.
// X2 is SP.
// X3 is SB.
// X4 is TP.
// X27 is g.
// X28 and X29 are our virtual carry flags.
// X31 is the assembler/linker temporary (which we use too).
"X5", "X6", "X7", "X8", "X9",
"X10", "X11", "X12", "X13", "X14", "X15", "X16", "X17", "X18", "X19",
"X20", "X21", "X22", "X23", "X24", "X25", "X26",
"X30",
},
reg0: "X0",
regCarry: "X28",
regAltCarry: "X29",
regTmp: "X31",
mov: "MOV",
add: "ADD",
sub: "SUB",
mul: "MUL",
mulhi: "MULHU",
lsh: "SLL",
rsh: "SRL",
and: "AND",
or: "OR",
xor: "XOR",
sltu: "SLTU",
jmpZero: "BEQZ %s, %s",
jmpNonZero: "BNEZ %s, %s",
}

View File

@ -0,0 +1,100 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
var ArchS390X = &Arch{
Name: "s390x",
WordBits: 64,
WordBytes: 8,
CarrySafeLoop: true,
regs: []string{
// R0 is 0 by convention in this code (see setup).
// R10 is the assembler/linker temporary.
// R11 is a second assembler/linker temporary, for wide multiply.
// We allow allocating R10 and R11 so that we can use them as
// direct multiplication targets while tracking whether they're in use.
// R13 is g.
// R14 is LR.
// R15 is SP.
"R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9",
"R10", "R11", "R12",
},
reg0: "R0",
regTmp: "R10",
setup: s390xSetup,
maxColumns: 2,
op3: s390xOp3,
hint: s390xHint,
// Instruction reference: chapter 7 of
// https://www.ibm.com/docs/en/SSQ2R2_15.0.0/com.ibm.tpf.toolkit.hlasm.doc/dz9zr006.pdf
mov: "MOVD",
adds: "ADDC", // ADD is an alias for ADDC, sets carry
adcs: "ADDE",
subs: "SUBC", // SUB is an alias for SUBC, sets carry
sbcs: "SUBE",
mulWideF: s390MulWide,
lsh: "SLD",
rsh: "SRD",
and: "AND",
or: "OR",
xor: "XOR",
neg: "NEG",
lea: "LAY", // LAY because LA only accepts positive offsets
jmpZero: "CMPBEQ %s, $0, %s",
jmpNonZero: "CMPBNE %s, $0, %s",
}
func s390xSetup(f *Func) {
a := f.Asm
if f.Name == "addVV" || f.Name == "subVV" {
// S390x, unlike every other system, has vector instructions
// that can propagate carry bits during parallel adds (VACC).
// Instead of trying to generate that for this one system,
// jump to the hand-written code in arithvec_s390x.s.
a.Printf("\tMOVB ·hasVX(SB), R1\n")
a.Printf("\tCMPBEQ R1, $0, novec\n")
a.Printf("\tJMP ·%svec(SB)\n", f.Name)
a.Printf("novec:\n")
}
a.Printf("\tMOVD $0, R0\n")
}
func s390xOp3(name string) bool {
if name == "AND" { // AND with immediate only takes imm, reg; not imm, reg, reg.
return false
}
return true
}
func s390xHint(_ *Asm, h Hint) string {
switch h {
case HintMulSrc:
return "R11"
case HintMulHi:
return "R10"
}
return ""
}
func s390MulWide(a *Asm, src1, src2, dstlo, dsthi Reg) {
if src1.name != "R11" && src2.name != "R11" {
a.Fatalf("mulWide src1 or src2 must be R11")
}
if dstlo.name != "R11" {
a.Fatalf("mulWide dstlo must be R11")
}
if dsthi.name != "R10" {
a.Fatalf("mulWide dsthi must be R10")
}
src := src1
if src.name == "R11" {
src = src2
}
a.Printf("\tMLGR %s, R10\n", src)
}