go/test/codegen/stack.go
Alexander Musman 16a6b71f18 cmd/compile: improve store-to-load forwarding with compatible types
Improve the compiler's store-to-load forwarding optimization by relaxing the
type comparison condition. Instead of requiring exact type equality (CMPeq),
we now use copyCompatibleType which allows forwarding between compatible
types where safe.

Fix several size comparison bugs in the nested store patterns. Previously,
we were comparing the size of the outer store with the load type,
rather than comparing with the size of the actual store being forwarded
from.

Skip OpConvert in dead store elimination to help get rid of dead stores such
as zeroing slices. OpConvert, like OpInlMark, doesn't really use the memory.

This optimization is particularly beneficial for code that creates slices with
computed pointers, such as the runtime's heapBitsSlice function, where
intermediate calculations were previously causing the compiler to miss
store-to-load forwarding opportunities.

Local sweet run result on an x86_64 laptop:

                       │  Orig.res   │              Hopt.res              │
                       │   sec/op    │   sec/op     vs base               │
BiogoIgor-8               5.303 ± 1%    5.322 ± 1%       ~ (p=0.190 n=10)
BiogoKrishna-8            7.894 ± 1%    7.828 ± 2%       ~ (p=0.190 n=10)
BleveIndexBatch100-8      2.257 ± 1%    2.248 ± 2%       ~ (p=0.529 n=10)
EtcdPut-8                30.12m ± 1%   30.03m ± 1%       ~ (p=0.796 n=10)
EtcdSTM-8                127.1m ± 1%   126.2m ± 0%  -0.74% (p=0.023 n=10)
GoBuildKubelet-8          52.21 ± 0%    52.05 ± 1%       ~ (p=0.063 n=10)
GoBuildKubeletLink-8      4.342 ± 1%    4.305 ± 0%  -0.85% (p=0.000 n=10)
GoBuildIstioctl-8         43.33 ± 0%    43.24 ± 0%  -0.22% (p=0.015 n=10)
GoBuildIstioctlLink-8     4.604 ± 1%    4.598 ± 0%       ~ (p=0.063 n=10)
GoBuildFrontend-8         15.33 ± 0%    15.29 ± 0%       ~ (p=0.143 n=10)
GoBuildFrontendLink-8    740.0m ± 1%   737.7m ± 1%       ~ (p=0.912 n=10)
GopherLuaKNucleotide-8    9.590 ± 1%    9.656 ± 1%       ~ (p=0.165 n=10)
MarkdownRenderXHTML-8    96.97m ± 1%   97.26m ± 2%       ~ (p=0.105 n=10)
Tile38QueryLoad-8        335.9µ ± 1%   335.6µ ± 1%       ~ (p=0.481 n=10)
geomean                   1.336         1.333       -0.22%

Change-Id: I031552623e6d5a3b1b5be8325e6314706e45534f
Reviewed-on: https://go-review.googlesource.com/c/go/+/662075
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Carlos Amedee <carlos@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2025-04-04 08:25:47 -07:00

171 lines
3.9 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import (
"runtime"
"unsafe"
)
// This file contains code generation tests related to the use of the
// stack.
// Check that stack stores are optimized away.
// 386:"TEXT\t.*, [$]0-"
// amd64:"TEXT\t.*, [$]0-"
// arm:"TEXT\t.*, [$]-4-"
// arm64:"TEXT\t.*, [$]0-"
// mips:"TEXT\t.*, [$]-4-"
// ppc64x:"TEXT\t.*, [$]0-"
// s390x:"TEXT\t.*, [$]0-"
func StackStore() int {
var x int
return *(&x)
}
type T struct {
A, B, C, D int // keep exported fields
x, y, z int // reset unexported fields
}
// Check that large structs are cleared directly (issue #24416).
// 386:"TEXT\t.*, [$]0-"
// amd64:"TEXT\t.*, [$]0-"
// arm:"TEXT\t.*, [$]0-" (spills return address)
// arm64:"TEXT\t.*, [$]0-"
// mips:"TEXT\t.*, [$]-4-"
// ppc64x:"TEXT\t.*, [$]0-"
// s390x:"TEXT\t.*, [$]0-"
func ZeroLargeStruct(x *T) {
t := T{}
*x = t
}
// Check that structs are partially initialised directly (issue #24386).
// Notes:
// - 386 fails due to spilling a register
// amd64:"TEXT\t.*, [$]0-"
// arm:"TEXT\t.*, [$]0-" (spills return address)
// arm64:"TEXT\t.*, [$]0-"
// ppc64x:"TEXT\t.*, [$]0-"
// s390x:"TEXT\t.*, [$]0-"
// Note: that 386 currently has to spill a register.
func KeepWanted(t *T) {
*t = T{A: t.A, B: t.B, C: t.C, D: t.D}
}
// Check that small array operations avoid using the stack (issue #15925).
// Notes:
// - 386 fails due to spilling a register
// - arm & mips fail due to softfloat calls
// amd64:"TEXT\t.*, [$]0-"
// arm64:"TEXT\t.*, [$]0-"
// ppc64x:"TEXT\t.*, [$]0-"
// s390x:"TEXT\t.*, [$]0-"
func ArrayAdd64(a, b [4]float64) [4]float64 {
return [4]float64{a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}
}
// Check that small array initialization avoids using the stack.
// 386:"TEXT\t.*, [$]0-"
// amd64:"TEXT\t.*, [$]0-"
// arm:"TEXT\t.*, [$]0-" (spills return address)
// arm64:"TEXT\t.*, [$]0-"
// mips:"TEXT\t.*, [$]-4-"
// ppc64x:"TEXT\t.*, [$]0-"
// s390x:"TEXT\t.*, [$]0-"
func ArrayInit(i, j int) [4]int {
return [4]int{i, 0, j, 0}
}
// Check that assembly output has matching offset and base register
// (issue #21064).
func check_asmout(b [2]int) int {
runtime.GC() // use some frame
// amd64:`.*b\+24\(SP\)`
// arm:`.*b\+4\(FP\)`
return b[1]
}
// Check that simple functions get promoted to nosplit, even when
// they might panic in various ways. See issue 31219.
// amd64:"TEXT\t.*NOSPLIT.*"
func MightPanic(a []int, i, j, k, s int) {
_ = a[i] // panicIndex
_ = a[i:j] // panicSlice
_ = a[i:j:k] // also panicSlice
_ = i << s // panicShift
_ = i / j // panicDivide
}
// Put a defer in a loop, so second defer is not open-coded
func Defer() {
for i := 0; i < 2; i++ {
defer func() {}()
}
// amd64:`CALL\truntime\.deferprocStack`
defer func() {}()
}
// Check that stack slots are shared among values of the same
// type, but not pointer-identical types. See issue 65783.
func spillSlotReuse() {
// The return values of getp1 and getp2 need to be
// spilled around the calls to nopInt. Make sure that
// spill slot gets reused.
//arm64:`.*autotmp_2-8\(SP\)`
getp1()[nopInt()] = 0
//arm64:`.*autotmp_2-8\(SP\)`
getp2()[nopInt()] = 0
}
// Check that no stack frame space is needed for simple slice initialization with underlying structure.
type mySlice struct {
array unsafe.Pointer
len int
cap int
}
// amd64:"TEXT\t.*, [$]0-"
func sliceInit(base uintptr) []uintptr {
const ptrSize = 8
size := uintptr(4096)
bitmapSize := size / ptrSize / 8
elements := int(bitmapSize / ptrSize)
var sl mySlice
sl = mySlice{
unsafe.Pointer(base + size - bitmapSize),
elements,
elements,
}
// amd64:-"POPQ",-"SP"
return *(*[]uintptr)(unsafe.Pointer(&sl))
}
//go:noinline
func nopInt() int {
return 0
}
//go:noinline
func getp1() *[4]int {
return nil
}
//go:noinline
func getp2() *[4]int {
return nil
}