diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 17c9b75d69..37a8cf8a5d 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -122,7 +122,7 @@ func header(arch string) { fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n") if beLe[arch] { base := arch[:len(arch)-1] - fmt.Fprintf(out, "//go:build %s || %sle\n", base, base) + fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base) } fmt.Fprintf(out, "#include \"go_asm.h\"\n") fmt.Fprintf(out, "#include \"textflag.h\"\n\n") @@ -147,8 +147,9 @@ type layout struct { type regPos struct { pos int - op string - reg string + saveOp string + restoreOp string + reg string // If this register requires special save and restore, these // give those operations with a %d placeholder for the stack @@ -157,7 +158,12 @@ type regPos struct { } func (l *layout) add(op, reg string, size int) { - l.regs = append(l.regs, regPos{op: op, reg: reg, pos: l.stack}) + l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack}) + l.stack += size +} + +func (l *layout) add2(sop, rop, reg string, size int) { + l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack}) l.stack += size } @@ -171,7 +177,7 @@ func (l *layout) save() { if reg.save != "" { p(reg.save, reg.pos) } else { - p("%s %s, %d(%s)", reg.op, reg.reg, reg.pos, l.sp) + p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp) } } } @@ -182,7 +188,7 @@ func (l *layout) restore() { if reg.restore != "" { p(reg.restore, reg.pos) } else { - p("%s %d(%s), %s", reg.op, reg.pos, l.sp, reg.reg) + p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg) } } } @@ -324,12 +330,13 @@ func genARM64() { // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special // and not saved here. var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction - for i := 0; i <= 26; i++ { + for i := 0; i < 26; i += 2 { if i == 18 { + i-- continue // R18 is not used, skip } - reg := fmt.Sprintf("R%d", i) - l.add("MOVD", reg, 8) + reg := fmt.Sprintf("(R%d, R%d)", i, i+1) + l.add2("STP", "LDP", reg, 16) } // Add flag registers. l.addSpecial( @@ -342,9 +349,9 @@ func genARM64() { 8) // TODO: FPCR? I don't think we'll change it, so no need to save. // Add floating point registers F0-F31. - for i := 0; i <= 31; i++ { - reg := fmt.Sprintf("F%d", i) - l.add("FMOVD", reg, 8) + for i := 0; i < 31; i += 2 { + reg := fmt.Sprintf("(F%d, F%d)", i, i+1) + l.add2("FSTPD", "FLDPD", reg, 16) } if l.stack%16 != 0 { l.stack += 8 // SP needs 16-byte alignment @@ -353,10 +360,8 @@ func genARM64() { // allocate frame, save PC of interrupted instruction (in LR) p("MOVD R30, %d(RSP)", -l.stack) p("SUB $%d, RSP", l.stack) - p("#ifdef GOOS_linux") p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) p("SUB $8, RSP, R29") // set up new frame pointer - p("#endif") // On iOS, save the LR again after decrementing SP. We run the // signal handler on the G stack (as it doesn't support sigaltstack), // so any writes below SP may be clobbered. @@ -369,11 +374,9 @@ func genARM64() { l.restore() p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it - p("#ifdef GOOS_linux") - p("MOVD -8(RSP), R29") // restore frame pointer - p("#endif") - p("MOVD (RSP), R27") // load PC to REGTMP - p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall) + p("MOVD -8(RSP), R29") // restore frame pointer + p("MOVD (RSP), R27") // load PC to REGTMP + p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall) p("JMP (R27)") } diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index 36ee13282c..c27d475dee 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -6,142 +6,80 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R30, -496(RSP) SUB $496, RSP - #ifdef GOOS_linux MOVD R29, -8(RSP) SUB $8, RSP, R29 - #endif #ifdef GOOS_ios MOVD R30, (RSP) #endif - MOVD R0, 8(RSP) - MOVD R1, 16(RSP) - MOVD R2, 24(RSP) - MOVD R3, 32(RSP) - MOVD R4, 40(RSP) - MOVD R5, 48(RSP) - MOVD R6, 56(RSP) - MOVD R7, 64(RSP) - MOVD R8, 72(RSP) - MOVD R9, 80(RSP) - MOVD R10, 88(RSP) - MOVD R11, 96(RSP) - MOVD R12, 104(RSP) - MOVD R13, 112(RSP) - MOVD R14, 120(RSP) - MOVD R15, 128(RSP) - MOVD R16, 136(RSP) - MOVD R17, 144(RSP) - MOVD R19, 152(RSP) - MOVD R20, 160(RSP) - MOVD R21, 168(RSP) - MOVD R22, 176(RSP) - MOVD R23, 184(RSP) - MOVD R24, 192(RSP) - MOVD R25, 200(RSP) - MOVD R26, 208(RSP) + STP (R0, R1), 8(RSP) + STP (R2, R3), 24(RSP) + STP (R4, R5), 40(RSP) + STP (R6, R7), 56(RSP) + STP (R8, R9), 72(RSP) + STP (R10, R11), 88(RSP) + STP (R12, R13), 104(RSP) + STP (R14, R15), 120(RSP) + STP (R16, R17), 136(RSP) + STP (R19, R20), 152(RSP) + STP (R21, R22), 168(RSP) + STP (R23, R24), 184(RSP) + STP (R25, R26), 200(RSP) MOVD NZCV, R0 MOVD R0, 216(RSP) MOVD FPSR, R0 MOVD R0, 224(RSP) - FMOVD F0, 232(RSP) - FMOVD F1, 240(RSP) - FMOVD F2, 248(RSP) - FMOVD F3, 256(RSP) - FMOVD F4, 264(RSP) - FMOVD F5, 272(RSP) - FMOVD F6, 280(RSP) - FMOVD F7, 288(RSP) - FMOVD F8, 296(RSP) - FMOVD F9, 304(RSP) - FMOVD F10, 312(RSP) - FMOVD F11, 320(RSP) - FMOVD F12, 328(RSP) - FMOVD F13, 336(RSP) - FMOVD F14, 344(RSP) - FMOVD F15, 352(RSP) - FMOVD F16, 360(RSP) - FMOVD F17, 368(RSP) - FMOVD F18, 376(RSP) - FMOVD F19, 384(RSP) - FMOVD F20, 392(RSP) - FMOVD F21, 400(RSP) - FMOVD F22, 408(RSP) - FMOVD F23, 416(RSP) - FMOVD F24, 424(RSP) - FMOVD F25, 432(RSP) - FMOVD F26, 440(RSP) - FMOVD F27, 448(RSP) - FMOVD F28, 456(RSP) - FMOVD F29, 464(RSP) - FMOVD F30, 472(RSP) - FMOVD F31, 480(RSP) + FSTPD (F0, F1), 232(RSP) + FSTPD (F2, F3), 248(RSP) + FSTPD (F4, F5), 264(RSP) + FSTPD (F6, F7), 280(RSP) + FSTPD (F8, F9), 296(RSP) + FSTPD (F10, F11), 312(RSP) + FSTPD (F12, F13), 328(RSP) + FSTPD (F14, F15), 344(RSP) + FSTPD (F16, F17), 360(RSP) + FSTPD (F18, F19), 376(RSP) + FSTPD (F20, F21), 392(RSP) + FSTPD (F22, F23), 408(RSP) + FSTPD (F24, F25), 424(RSP) + FSTPD (F26, F27), 440(RSP) + FSTPD (F28, F29), 456(RSP) + FSTPD (F30, F31), 472(RSP) CALL ·asyncPreempt2(SB) - FMOVD 480(RSP), F31 - FMOVD 472(RSP), F30 - FMOVD 464(RSP), F29 - FMOVD 456(RSP), F28 - FMOVD 448(RSP), F27 - FMOVD 440(RSP), F26 - FMOVD 432(RSP), F25 - FMOVD 424(RSP), F24 - FMOVD 416(RSP), F23 - FMOVD 408(RSP), F22 - FMOVD 400(RSP), F21 - FMOVD 392(RSP), F20 - FMOVD 384(RSP), F19 - FMOVD 376(RSP), F18 - FMOVD 368(RSP), F17 - FMOVD 360(RSP), F16 - FMOVD 352(RSP), F15 - FMOVD 344(RSP), F14 - FMOVD 336(RSP), F13 - FMOVD 328(RSP), F12 - FMOVD 320(RSP), F11 - FMOVD 312(RSP), F10 - FMOVD 304(RSP), F9 - FMOVD 296(RSP), F8 - FMOVD 288(RSP), F7 - FMOVD 280(RSP), F6 - FMOVD 272(RSP), F5 - FMOVD 264(RSP), F4 - FMOVD 256(RSP), F3 - FMOVD 248(RSP), F2 - FMOVD 240(RSP), F1 - FMOVD 232(RSP), F0 + FLDPD 472(RSP), (F30, F31) + FLDPD 456(RSP), (F28, F29) + FLDPD 440(RSP), (F26, F27) + FLDPD 424(RSP), (F24, F25) + FLDPD 408(RSP), (F22, F23) + FLDPD 392(RSP), (F20, F21) + FLDPD 376(RSP), (F18, F19) + FLDPD 360(RSP), (F16, F17) + FLDPD 344(RSP), (F14, F15) + FLDPD 328(RSP), (F12, F13) + FLDPD 312(RSP), (F10, F11) + FLDPD 296(RSP), (F8, F9) + FLDPD 280(RSP), (F6, F7) + FLDPD 264(RSP), (F4, F5) + FLDPD 248(RSP), (F2, F3) + FLDPD 232(RSP), (F0, F1) MOVD 224(RSP), R0 MOVD R0, FPSR MOVD 216(RSP), R0 MOVD R0, NZCV - MOVD 208(RSP), R26 - MOVD 200(RSP), R25 - MOVD 192(RSP), R24 - MOVD 184(RSP), R23 - MOVD 176(RSP), R22 - MOVD 168(RSP), R21 - MOVD 160(RSP), R20 - MOVD 152(RSP), R19 - MOVD 144(RSP), R17 - MOVD 136(RSP), R16 - MOVD 128(RSP), R15 - MOVD 120(RSP), R14 - MOVD 112(RSP), R13 - MOVD 104(RSP), R12 - MOVD 96(RSP), R11 - MOVD 88(RSP), R10 - MOVD 80(RSP), R9 - MOVD 72(RSP), R8 - MOVD 64(RSP), R7 - MOVD 56(RSP), R6 - MOVD 48(RSP), R5 - MOVD 40(RSP), R4 - MOVD 32(RSP), R3 - MOVD 24(RSP), R2 - MOVD 16(RSP), R1 - MOVD 8(RSP), R0 + LDP 200(RSP), (R25, R26) + LDP 184(RSP), (R23, R24) + LDP 168(RSP), (R21, R22) + LDP 152(RSP), (R19, R20) + LDP 136(RSP), (R16, R17) + LDP 120(RSP), (R14, R15) + LDP 104(RSP), (R12, R13) + LDP 88(RSP), (R10, R11) + LDP 72(RSP), (R8, R9) + LDP 56(RSP), (R6, R7) + LDP 40(RSP), (R4, R5) + LDP 24(RSP), (R2, R3) + LDP 8(RSP), (R0, R1) MOVD 496(RSP), R30 - #ifdef GOOS_linux MOVD -8(RSP), R29 - #endif MOVD (RSP), R27 ADD $512, RSP JMP (R27)