runtime: use stp/ldp to save and restore all registers on arm64

Async preemption needs to save and restore almost all of the registers,
currently this is done by ldr and str on arm64. We can do it with ldp
and stp as they are more efficient.

Change-Id: Ida5a6f0a8d825a56af607ba2c2cd91fdc2e8f67f
Reviewed-on: https://go-review.googlesource.com/c/go/+/379715
Reviewed-by: Cherry Mui <cherryyz@google.com>
Trust: Eric Fang <eric.fang@arm.com>
Run-TryBot: Eric Fang <eric.fang@arm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
eric fang 2022-01-19 04:02:25 +00:00 committed by Eric Fang
parent 301fd8ac8b
commit 2e9facbdd4
2 changed files with 80 additions and 139 deletions

View File

@ -122,7 +122,7 @@ func header(arch string) {
fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n") fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
if beLe[arch] { if beLe[arch] {
base := arch[:len(arch)-1] base := arch[:len(arch)-1]
fmt.Fprintf(out, "//go:build %s || %sle\n", base, base) fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base)
} }
fmt.Fprintf(out, "#include \"go_asm.h\"\n") fmt.Fprintf(out, "#include \"go_asm.h\"\n")
fmt.Fprintf(out, "#include \"textflag.h\"\n\n") fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
@ -147,8 +147,9 @@ type layout struct {
type regPos struct { type regPos struct {
pos int pos int
op string saveOp string
reg string restoreOp string
reg string
// If this register requires special save and restore, these // If this register requires special save and restore, these
// give those operations with a %d placeholder for the stack // give those operations with a %d placeholder for the stack
@ -157,7 +158,12 @@ type regPos struct {
} }
func (l *layout) add(op, reg string, size int) { func (l *layout) add(op, reg string, size int) {
l.regs = append(l.regs, regPos{op: op, reg: reg, pos: l.stack}) l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack})
l.stack += size
}
func (l *layout) add2(sop, rop, reg string, size int) {
l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack})
l.stack += size l.stack += size
} }
@ -171,7 +177,7 @@ func (l *layout) save() {
if reg.save != "" { if reg.save != "" {
p(reg.save, reg.pos) p(reg.save, reg.pos)
} else { } else {
p("%s %s, %d(%s)", reg.op, reg.reg, reg.pos, l.sp) p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp)
} }
} }
} }
@ -182,7 +188,7 @@ func (l *layout) restore() {
if reg.restore != "" { if reg.restore != "" {
p(reg.restore, reg.pos) p(reg.restore, reg.pos)
} else { } else {
p("%s %d(%s), %s", reg.op, reg.pos, l.sp, reg.reg) p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg)
} }
} }
} }
@ -324,12 +330,13 @@ func genARM64() {
// R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
// and not saved here. // and not saved here.
var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
for i := 0; i <= 26; i++ { for i := 0; i < 26; i += 2 {
if i == 18 { if i == 18 {
i--
continue // R18 is not used, skip continue // R18 is not used, skip
} }
reg := fmt.Sprintf("R%d", i) reg := fmt.Sprintf("(R%d, R%d)", i, i+1)
l.add("MOVD", reg, 8) l.add2("STP", "LDP", reg, 16)
} }
// Add flag registers. // Add flag registers.
l.addSpecial( l.addSpecial(
@ -342,9 +349,9 @@ func genARM64() {
8) 8)
// TODO: FPCR? I don't think we'll change it, so no need to save. // TODO: FPCR? I don't think we'll change it, so no need to save.
// Add floating point registers F0-F31. // Add floating point registers F0-F31.
for i := 0; i <= 31; i++ { for i := 0; i < 31; i += 2 {
reg := fmt.Sprintf("F%d", i) reg := fmt.Sprintf("(F%d, F%d)", i, i+1)
l.add("FMOVD", reg, 8) l.add2("FSTPD", "FLDPD", reg, 16)
} }
if l.stack%16 != 0 { if l.stack%16 != 0 {
l.stack += 8 // SP needs 16-byte alignment l.stack += 8 // SP needs 16-byte alignment
@ -353,10 +360,8 @@ func genARM64() {
// allocate frame, save PC of interrupted instruction (in LR) // allocate frame, save PC of interrupted instruction (in LR)
p("MOVD R30, %d(RSP)", -l.stack) p("MOVD R30, %d(RSP)", -l.stack)
p("SUB $%d, RSP", l.stack) p("SUB $%d, RSP", l.stack)
p("#ifdef GOOS_linux")
p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
p("SUB $8, RSP, R29") // set up new frame pointer p("SUB $8, RSP, R29") // set up new frame pointer
p("#endif")
// On iOS, save the LR again after decrementing SP. We run the // On iOS, save the LR again after decrementing SP. We run the
// signal handler on the G stack (as it doesn't support sigaltstack), // signal handler on the G stack (as it doesn't support sigaltstack),
// so any writes below SP may be clobbered. // so any writes below SP may be clobbered.
@ -369,11 +374,9 @@ func genARM64() {
l.restore() l.restore()
p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
p("#ifdef GOOS_linux") p("MOVD -8(RSP), R29") // restore frame pointer
p("MOVD -8(RSP), R29") // restore frame pointer p("MOVD (RSP), R27") // load PC to REGTMP
p("#endif") p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
p("MOVD (RSP), R27") // load PC to REGTMP
p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
p("JMP (R27)") p("JMP (R27)")
} }

View File

@ -6,142 +6,80 @@
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
MOVD R30, -496(RSP) MOVD R30, -496(RSP)
SUB $496, RSP SUB $496, RSP
#ifdef GOOS_linux
MOVD R29, -8(RSP) MOVD R29, -8(RSP)
SUB $8, RSP, R29 SUB $8, RSP, R29
#endif
#ifdef GOOS_ios #ifdef GOOS_ios
MOVD R30, (RSP) MOVD R30, (RSP)
#endif #endif
MOVD R0, 8(RSP) STP (R0, R1), 8(RSP)
MOVD R1, 16(RSP) STP (R2, R3), 24(RSP)
MOVD R2, 24(RSP) STP (R4, R5), 40(RSP)
MOVD R3, 32(RSP) STP (R6, R7), 56(RSP)
MOVD R4, 40(RSP) STP (R8, R9), 72(RSP)
MOVD R5, 48(RSP) STP (R10, R11), 88(RSP)
MOVD R6, 56(RSP) STP (R12, R13), 104(RSP)
MOVD R7, 64(RSP) STP (R14, R15), 120(RSP)
MOVD R8, 72(RSP) STP (R16, R17), 136(RSP)
MOVD R9, 80(RSP) STP (R19, R20), 152(RSP)
MOVD R10, 88(RSP) STP (R21, R22), 168(RSP)
MOVD R11, 96(RSP) STP (R23, R24), 184(RSP)
MOVD R12, 104(RSP) STP (R25, R26), 200(RSP)
MOVD R13, 112(RSP)
MOVD R14, 120(RSP)
MOVD R15, 128(RSP)
MOVD R16, 136(RSP)
MOVD R17, 144(RSP)
MOVD R19, 152(RSP)
MOVD R20, 160(RSP)
MOVD R21, 168(RSP)
MOVD R22, 176(RSP)
MOVD R23, 184(RSP)
MOVD R24, 192(RSP)
MOVD R25, 200(RSP)
MOVD R26, 208(RSP)
MOVD NZCV, R0 MOVD NZCV, R0
MOVD R0, 216(RSP) MOVD R0, 216(RSP)
MOVD FPSR, R0 MOVD FPSR, R0
MOVD R0, 224(RSP) MOVD R0, 224(RSP)
FMOVD F0, 232(RSP) FSTPD (F0, F1), 232(RSP)
FMOVD F1, 240(RSP) FSTPD (F2, F3), 248(RSP)
FMOVD F2, 248(RSP) FSTPD (F4, F5), 264(RSP)
FMOVD F3, 256(RSP) FSTPD (F6, F7), 280(RSP)
FMOVD F4, 264(RSP) FSTPD (F8, F9), 296(RSP)
FMOVD F5, 272(RSP) FSTPD (F10, F11), 312(RSP)
FMOVD F6, 280(RSP) FSTPD (F12, F13), 328(RSP)
FMOVD F7, 288(RSP) FSTPD (F14, F15), 344(RSP)
FMOVD F8, 296(RSP) FSTPD (F16, F17), 360(RSP)
FMOVD F9, 304(RSP) FSTPD (F18, F19), 376(RSP)
FMOVD F10, 312(RSP) FSTPD (F20, F21), 392(RSP)
FMOVD F11, 320(RSP) FSTPD (F22, F23), 408(RSP)
FMOVD F12, 328(RSP) FSTPD (F24, F25), 424(RSP)
FMOVD F13, 336(RSP) FSTPD (F26, F27), 440(RSP)
FMOVD F14, 344(RSP) FSTPD (F28, F29), 456(RSP)
FMOVD F15, 352(RSP) FSTPD (F30, F31), 472(RSP)
FMOVD F16, 360(RSP)
FMOVD F17, 368(RSP)
FMOVD F18, 376(RSP)
FMOVD F19, 384(RSP)
FMOVD F20, 392(RSP)
FMOVD F21, 400(RSP)
FMOVD F22, 408(RSP)
FMOVD F23, 416(RSP)
FMOVD F24, 424(RSP)
FMOVD F25, 432(RSP)
FMOVD F26, 440(RSP)
FMOVD F27, 448(RSP)
FMOVD F28, 456(RSP)
FMOVD F29, 464(RSP)
FMOVD F30, 472(RSP)
FMOVD F31, 480(RSP)
CALL ·asyncPreempt2(SB) CALL ·asyncPreempt2(SB)
FMOVD 480(RSP), F31 FLDPD 472(RSP), (F30, F31)
FMOVD 472(RSP), F30 FLDPD 456(RSP), (F28, F29)
FMOVD 464(RSP), F29 FLDPD 440(RSP), (F26, F27)
FMOVD 456(RSP), F28 FLDPD 424(RSP), (F24, F25)
FMOVD 448(RSP), F27 FLDPD 408(RSP), (F22, F23)
FMOVD 440(RSP), F26 FLDPD 392(RSP), (F20, F21)
FMOVD 432(RSP), F25 FLDPD 376(RSP), (F18, F19)
FMOVD 424(RSP), F24 FLDPD 360(RSP), (F16, F17)
FMOVD 416(RSP), F23 FLDPD 344(RSP), (F14, F15)
FMOVD 408(RSP), F22 FLDPD 328(RSP), (F12, F13)
FMOVD 400(RSP), F21 FLDPD 312(RSP), (F10, F11)
FMOVD 392(RSP), F20 FLDPD 296(RSP), (F8, F9)
FMOVD 384(RSP), F19 FLDPD 280(RSP), (F6, F7)
FMOVD 376(RSP), F18 FLDPD 264(RSP), (F4, F5)
FMOVD 368(RSP), F17 FLDPD 248(RSP), (F2, F3)
FMOVD 360(RSP), F16 FLDPD 232(RSP), (F0, F1)
FMOVD 352(RSP), F15
FMOVD 344(RSP), F14
FMOVD 336(RSP), F13
FMOVD 328(RSP), F12
FMOVD 320(RSP), F11
FMOVD 312(RSP), F10
FMOVD 304(RSP), F9
FMOVD 296(RSP), F8
FMOVD 288(RSP), F7
FMOVD 280(RSP), F6
FMOVD 272(RSP), F5
FMOVD 264(RSP), F4
FMOVD 256(RSP), F3
FMOVD 248(RSP), F2
FMOVD 240(RSP), F1
FMOVD 232(RSP), F0
MOVD 224(RSP), R0 MOVD 224(RSP), R0
MOVD R0, FPSR MOVD R0, FPSR
MOVD 216(RSP), R0 MOVD 216(RSP), R0
MOVD R0, NZCV MOVD R0, NZCV
MOVD 208(RSP), R26 LDP 200(RSP), (R25, R26)
MOVD 200(RSP), R25 LDP 184(RSP), (R23, R24)
MOVD 192(RSP), R24 LDP 168(RSP), (R21, R22)
MOVD 184(RSP), R23 LDP 152(RSP), (R19, R20)
MOVD 176(RSP), R22 LDP 136(RSP), (R16, R17)
MOVD 168(RSP), R21 LDP 120(RSP), (R14, R15)
MOVD 160(RSP), R20 LDP 104(RSP), (R12, R13)
MOVD 152(RSP), R19 LDP 88(RSP), (R10, R11)
MOVD 144(RSP), R17 LDP 72(RSP), (R8, R9)
MOVD 136(RSP), R16 LDP 56(RSP), (R6, R7)
MOVD 128(RSP), R15 LDP 40(RSP), (R4, R5)
MOVD 120(RSP), R14 LDP 24(RSP), (R2, R3)
MOVD 112(RSP), R13 LDP 8(RSP), (R0, R1)
MOVD 104(RSP), R12
MOVD 96(RSP), R11
MOVD 88(RSP), R10
MOVD 80(RSP), R9
MOVD 72(RSP), R8
MOVD 64(RSP), R7
MOVD 56(RSP), R6
MOVD 48(RSP), R5
MOVD 40(RSP), R4
MOVD 32(RSP), R3
MOVD 24(RSP), R2
MOVD 16(RSP), R1
MOVD 8(RSP), R0
MOVD 496(RSP), R30 MOVD 496(RSP), R30
#ifdef GOOS_linux
MOVD -8(RSP), R29 MOVD -8(RSP), R29
#endif
MOVD (RSP), R27 MOVD (RSP), R27
ADD $512, RSP ADD $512, RSP
JMP (R27) JMP (R27)