cmd/compile: stack allocate variable-sized makeslice

Instead of always allocating variable-sized "make" calls on the heap,
allocate a small, constant-sized array on the stack and use that array
as the backing store if it is big enough.

Requires the result of the "make" doesn't escape.

  if cap <= K {
      var arr [K]E
      slice = arr[:len:cap]
  } else {
      slice = makeslice(E, len, cap)
  }

Pretty conservatively for now, K = 32/sizeof(E). The slice header is
already 24 bytes, so wasting 32 bytes of stack if the requested size
is too big isn't that bad. Larger would waste more stack space but
maybe avoid more allocations.

This CL also requires the element type be pointer-free.  Maybe we
could relax that at some point, but it is hard. If the element type
has pointers we can get heap->stack pointers (in the case where the
requested size is too big and the slice is heap allocated).

Note that this only handles the case of makeslice called directly from
compiler-generated code. It does not handle slices built in the
runtime on behalf of the program (e.g. in growslice). Some of those
are currently handled by passing in a tmpBuf (e.g. concatstrings),
but we could probably do more.

Change-Id: I8378efad527cd00d25948a80b82a68d88fbd93a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/653856
Reviewed-by: Robert Griesemer <gri@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Keith Randall 2025-02-28 17:01:36 -08:00
parent 16a6b71f18
commit 7a427143b6
5 changed files with 164 additions and 37 deletions

View File

@ -227,10 +227,36 @@ func HeapAllocReason(n ir.Node) string {
} }
} }
elem := n.Type().Elem()
if elem.Size() == 0 {
// TODO: stack allocate these? See #65685.
return "zero-sized element"
}
if !ir.IsSmallIntConst(*r) { if !ir.IsSmallIntConst(*r) {
if !elem.HasPointers() {
// For non-constant sizes, we do a hybrid approach:
//
// if cap <= K {
// var backing [K]E
// s = backing[:len:cap]
// } else {
// s = makeslice(E, len, cap)
// }
//
// It costs a constant amount of stack space, but may
// avoid a heap allocation.
// Note that this only works for pointer-free element types,
// because we forbid heap->stack pointers.
// (TODO: To get around this limitation, maybe we could treat
// these "heap" objects as still in the stack, possibly as
// stack objects. We should be able to find them and walk them
// on a stack backtrace. Not sure if that would work.)
// Implementation is in ../walk/builtin.go:walkMakeSlice.
return ""
}
return "non-constant size" return "non-constant size"
} }
if t := n.Type(); t.Elem().Size() != 0 && ir.Int64Val(*r) > ir.MaxImplicitStackVarSize/t.Elem().Size() { if ir.Int64Val(*r) > ir.MaxImplicitStackVarSize/elem.Size() {
return "too large for stack" return "too large for stack"
} }
} }

View File

@ -0,0 +1,50 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package test
import (
"internal/testenv"
"testing"
"unsafe"
)
// Stack allocation size for variable-sized allocations.
// Matches constant of the same name in ../walk/builtin.go:walkMakeSlice.
const maxStackSize = 32
//go:noinline
func genericUse[T any](s []T) {
// Doesn't escape s.
}
func TestStackAllocation(t *testing.T) {
testenv.SkipIfOptimizationOff(t)
type testCase struct {
f func(int)
elemSize uintptr
}
for _, tc := range []testCase{
{
f: func(n int) {
genericUse(make([]int, n))
},
elemSize: unsafe.Sizeof(int(0)),
},
} {
max := maxStackSize / int(tc.elemSize)
if n := testing.AllocsPerRun(10, func() {
tc.f(max)
}); n != 0 {
t.Fatalf("unexpected allocation: %f", n)
}
if n := testing.AllocsPerRun(10, func() {
tc.f(max + 1)
}); n != 1 {
t.Fatalf("unexpected allocation: %f", n)
}
}
}

View File

@ -524,51 +524,100 @@ func walkMakeOldMap(n *ir.MakeExpr, init *ir.Nodes) ir.Node {
// walkMakeSlice walks an OMAKESLICE node. // walkMakeSlice walks an OMAKESLICE node.
func walkMakeSlice(n *ir.MakeExpr, init *ir.Nodes) ir.Node { func walkMakeSlice(n *ir.MakeExpr, init *ir.Nodes) ir.Node {
l := n.Len len := n.Len
r := n.Cap cap := n.Cap
if r == nil { len = safeExpr(len, init)
r = safeExpr(l, init) if cap != nil {
l = r cap = safeExpr(cap, init)
} else {
cap = len
} }
t := n.Type() t := n.Type()
if t.Elem().NotInHeap() { if t.Elem().NotInHeap() {
base.Errorf("%v can't be allocated in Go; it is incomplete (or unallocatable)", t.Elem()) base.Errorf("%v can't be allocated in Go; it is incomplete (or unallocatable)", t.Elem())
} }
tryStack := false
if n.Esc() == ir.EscNone { if n.Esc() == ir.EscNone {
if why := escape.HeapAllocReason(n); why != "" { if why := escape.HeapAllocReason(n); why != "" {
base.Fatalf("%v has EscNone, but %v", n, why) base.Fatalf("%v has EscNone, but %v", n, why)
} }
// var arr [r]T if ir.IsSmallIntConst(cap) {
// n = arr[:l] // Constant backing array - allocate it and slice it.
i := typecheck.IndexConst(r) cap := typecheck.IndexConst(cap)
// Note that len might not be constant. If it isn't, check for panics.
// cap is constrained to [0,2^31) or [0,2^63) depending on whether
// we're in 32-bit or 64-bit systems. So it's safe to do:
//
// if uint64(len) > cap {
// if len < 0 { panicmakeslicelen() }
// panicmakeslicecap()
// }
nif := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OGT, typecheck.Conv(len, types.Types[types.TUINT64]), ir.NewInt(base.Pos, cap)), nil, nil)
niflen := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLT, len, ir.NewInt(base.Pos, 0)), nil, nil)
niflen.Body = []ir.Node{mkcall("panicmakeslicelen", nil, init)}
nif.Body.Append(niflen, mkcall("panicmakeslicecap", nil, init))
init.Append(typecheck.Stmt(nif))
// cap is constrained to [0,2^31) or [0,2^63) depending on whether // var arr [cap]E
// we're in 32-bit or 64-bit systems. So it's safe to do: // s = arr[:len]
// t := types.NewArray(t.Elem(), cap) // [cap]E
// if uint64(len) > cap { arr := typecheck.TempAt(base.Pos, ir.CurFunc, t)
// if len < 0 { panicmakeslicelen() } appendWalkStmt(init, ir.NewAssignStmt(base.Pos, arr, nil)) // zero temp
// panicmakeslicecap() s := ir.NewSliceExpr(base.Pos, ir.OSLICE, arr, nil, len, nil) // arr[:len]
// } // The conv is necessary in case n.Type is named.
nif := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OGT, typecheck.Conv(l, types.Types[types.TUINT64]), ir.NewInt(base.Pos, i)), nil, nil) return walkExpr(typecheck.Expr(typecheck.Conv(s, n.Type())), init)
niflen := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLT, l, ir.NewInt(base.Pos, 0)), nil, nil) }
niflen.Body = []ir.Node{mkcall("panicmakeslicelen", nil, init)} if t.Elem().HasPointers() {
nif.Body.Append(niflen, mkcall("panicmakeslicecap", nil, init)) // TODO: remove this limitation (see ../escape/utils.go:HeapAllocReason).
init.Append(typecheck.Stmt(nif)) base.Fatalf("%v can't have pointers", t.Elem())
}
t = types.NewArray(t.Elem(), i) // [r]T tryStack = true
var_ := typecheck.TempAt(base.Pos, ir.CurFunc, t)
appendWalkStmt(init, ir.NewAssignStmt(base.Pos, var_, nil)) // zero temp
r := ir.NewSliceExpr(base.Pos, ir.OSLICE, var_, nil, l, nil) // arr[:l]
// The conv is necessary in case n.Type is named.
return walkExpr(typecheck.Expr(typecheck.Conv(r, n.Type())), init)
} }
// n escapes; set up a call to makeslice. // The final result is assigned to this variable.
slice := typecheck.TempAt(base.Pos, ir.CurFunc, n.Type()) // []E result (possibly named)
if tryStack {
// K := maxStackSize/sizeof(E)
// if cap <= K {
// var arr [K]E
// slice = arr[:len:cap]
// } else {
// slice = makeslice(elemType, len, cap)
// }
const maxStackSize = 32
K := maxStackSize / t.Elem().Size() // rounds down
if K > 0 { // skip if elem size is too big.
nif := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLE, typecheck.Conv(cap, types.Types[types.TUINT64]), ir.NewInt(base.Pos, K)), nil, nil)
// cap is in bounds after the K check, but len might not be.
// (Note that the slicing below would generate a panic for
// the same bad cases, but we want makeslice panics, not
// regular slicing panics.)
lenCap := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OGT, typecheck.Conv(len, types.Types[types.TUINT64]), typecheck.Conv(cap, types.Types[types.TUINT64])), nil, nil)
lenZero := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLT, len, ir.NewInt(base.Pos, 0)), nil, nil)
lenZero.Body.Append(mkcall("panicmakeslicelen", nil, &lenZero.Body))
lenCap.Body.Append(lenZero)
lenCap.Body.Append(mkcall("panicmakeslicecap", nil, &lenCap.Body))
nif.Body.Append(lenCap)
t := types.NewArray(t.Elem(), K) // [K]E
arr := typecheck.TempAt(base.Pos, ir.CurFunc, t) // var arr [K]E
nif.Body.Append(ir.NewAssignStmt(base.Pos, arr, nil)) // arr = {} (zero it)
s := ir.NewSliceExpr(base.Pos, ir.OSLICE, arr, nil, len, cap) // arr[:len:cap]
nif.Body.Append(ir.NewAssignStmt(base.Pos, slice, s)) // slice = arr[:len:cap]
appendWalkStmt(init, typecheck.Stmt(nif))
// Put makeslice call below in the else branch.
init = &nif.Else
}
}
// Set up a call to makeslice.
// When len and cap can fit into int, use makeslice instead of // When len and cap can fit into int, use makeslice instead of
// makeslice64, which is faster and shorter on 32 bit platforms. // makeslice64, which is faster and shorter on 32 bit platforms.
len, cap := l, r
fnname := "makeslice64" fnname := "makeslice64"
argtype := types.Types[types.TINT64] argtype := types.Types[types.TINT64]
@ -585,8 +634,10 @@ func walkMakeSlice(n *ir.MakeExpr, init *ir.Nodes) ir.Node {
ptr.MarkNonNil() ptr.MarkNonNil()
len = typecheck.Conv(len, types.Types[types.TINT]) len = typecheck.Conv(len, types.Types[types.TINT])
cap = typecheck.Conv(cap, types.Types[types.TINT]) cap = typecheck.Conv(cap, types.Types[types.TINT])
sh := ir.NewSliceHeaderExpr(base.Pos, t, ptr, len, cap) s := ir.NewSliceHeaderExpr(base.Pos, t, ptr, len, cap)
return walkExpr(typecheck.Expr(sh), init) appendWalkStmt(init, ir.NewAssignStmt(base.Pos, slice, s))
return slice
} }
// walkMakeSliceCopy walks an OMAKESLICECOPY node. // walkMakeSliceCopy walks an OMAKESLICECOPY node.

View File

@ -131,7 +131,7 @@ func TestGenericsHashKeyInPprofBuilder(t *testing.T) {
for _, sz := range []int{128, 256} { for _, sz := range []int{128, 256} {
genericAllocFunc[uint32](sz / 4) genericAllocFunc[uint32](sz / 4)
} }
for _, sz := range []int{32, 64} { for _, sz := range []int{64, 128} {
genericAllocFunc[uint64](sz / 8) genericAllocFunc[uint64](sz / 8)
} }
@ -149,8 +149,8 @@ func TestGenericsHashKeyInPprofBuilder(t *testing.T) {
expected := []string{ expected := []string{
"testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint32] [1 128 0 0]", "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint32] [1 128 0 0]",
"testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint32] [1 256 0 0]", "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint32] [1 256 0 0]",
"testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 32 0 0]",
"testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 64 0 0]", "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 64 0 0]",
"testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 128 0 0]",
} }
for _, l := range expected { for _, l := range expected {

View File

@ -65,7 +65,7 @@ func testSlices() {
} }
{ {
_ = make([]byte, globalVarSize) // ERROR "make\(\[\]byte, globalVarSize\) escapes to heap" _ = make([]byte, globalVarSize) // ERROR "make\(\[\]byte, globalVarSize\) does not escape"
_ = make([]byte, globalVarSize, globalConstSize) // ERROR "make\(\[\]byte, globalVarSize, 128\) does not escape" _ = make([]byte, globalVarSize, globalConstSize) // ERROR "make\(\[\]byte, globalVarSize, 128\) does not escape"
} }
} }