From 7a427143b6ff296125359084a8959bf0c9d23e78 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 28 Feb 2025 17:01:36 -0800 Subject: [PATCH] cmd/compile: stack allocate variable-sized makeslice Instead of always allocating variable-sized "make" calls on the heap, allocate a small, constant-sized array on the stack and use that array as the backing store if it is big enough. Requires the result of the "make" doesn't escape. if cap <= K { var arr [K]E slice = arr[:len:cap] } else { slice = makeslice(E, len, cap) } Pretty conservatively for now, K = 32/sizeof(E). The slice header is already 24 bytes, so wasting 32 bytes of stack if the requested size is too big isn't that bad. Larger would waste more stack space but maybe avoid more allocations. This CL also requires the element type be pointer-free. Maybe we could relax that at some point, but it is hard. If the element type has pointers we can get heap->stack pointers (in the case where the requested size is too big and the slice is heap allocated). Note that this only handles the case of makeslice called directly from compiler-generated code. It does not handle slices built in the runtime on behalf of the program (e.g. in growslice). Some of those are currently handled by passing in a tmpBuf (e.g. concatstrings), but we could probably do more. Change-Id: I8378efad527cd00d25948a80b82a68d88fbd93a1 Reviewed-on: https://go-review.googlesource.com/c/go/+/653856 Reviewed-by: Robert Griesemer LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/cmd/compile/internal/escape/utils.go | 28 ++++- src/cmd/compile/internal/test/stack_test.go | 50 +++++++++ src/cmd/compile/internal/walk/builtin.go | 117 ++++++++++++++------ src/runtime/pprof/protomem_test.go | 4 +- test/escape_make_non_const.go | 2 +- 5 files changed, 164 insertions(+), 37 deletions(-) create mode 100644 src/cmd/compile/internal/test/stack_test.go diff --git a/src/cmd/compile/internal/escape/utils.go b/src/cmd/compile/internal/escape/utils.go index d9cb9bdf8e..815bfd8896 100644 --- a/src/cmd/compile/internal/escape/utils.go +++ b/src/cmd/compile/internal/escape/utils.go @@ -227,10 +227,36 @@ func HeapAllocReason(n ir.Node) string { } } + elem := n.Type().Elem() + if elem.Size() == 0 { + // TODO: stack allocate these? See #65685. + return "zero-sized element" + } if !ir.IsSmallIntConst(*r) { + if !elem.HasPointers() { + // For non-constant sizes, we do a hybrid approach: + // + // if cap <= K { + // var backing [K]E + // s = backing[:len:cap] + // } else { + // s = makeslice(E, len, cap) + // } + // + // It costs a constant amount of stack space, but may + // avoid a heap allocation. + // Note that this only works for pointer-free element types, + // because we forbid heap->stack pointers. + // (TODO: To get around this limitation, maybe we could treat + // these "heap" objects as still in the stack, possibly as + // stack objects. We should be able to find them and walk them + // on a stack backtrace. Not sure if that would work.) + // Implementation is in ../walk/builtin.go:walkMakeSlice. + return "" + } return "non-constant size" } - if t := n.Type(); t.Elem().Size() != 0 && ir.Int64Val(*r) > ir.MaxImplicitStackVarSize/t.Elem().Size() { + if ir.Int64Val(*r) > ir.MaxImplicitStackVarSize/elem.Size() { return "too large for stack" } } diff --git a/src/cmd/compile/internal/test/stack_test.go b/src/cmd/compile/internal/test/stack_test.go new file mode 100644 index 0000000000..d4caa9155d --- /dev/null +++ b/src/cmd/compile/internal/test/stack_test.go @@ -0,0 +1,50 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package test + +import ( + "internal/testenv" + "testing" + "unsafe" +) + +// Stack allocation size for variable-sized allocations. +// Matches constant of the same name in ../walk/builtin.go:walkMakeSlice. +const maxStackSize = 32 + +//go:noinline +func genericUse[T any](s []T) { + // Doesn't escape s. +} + +func TestStackAllocation(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + + type testCase struct { + f func(int) + elemSize uintptr + } + + for _, tc := range []testCase{ + { + f: func(n int) { + genericUse(make([]int, n)) + }, + elemSize: unsafe.Sizeof(int(0)), + }, + } { + max := maxStackSize / int(tc.elemSize) + if n := testing.AllocsPerRun(10, func() { + tc.f(max) + }); n != 0 { + t.Fatalf("unexpected allocation: %f", n) + } + if n := testing.AllocsPerRun(10, func() { + tc.f(max + 1) + }); n != 1 { + t.Fatalf("unexpected allocation: %f", n) + } + } +} diff --git a/src/cmd/compile/internal/walk/builtin.go b/src/cmd/compile/internal/walk/builtin.go index be32e77ded..0d9e2a4392 100644 --- a/src/cmd/compile/internal/walk/builtin.go +++ b/src/cmd/compile/internal/walk/builtin.go @@ -524,51 +524,100 @@ func walkMakeOldMap(n *ir.MakeExpr, init *ir.Nodes) ir.Node { // walkMakeSlice walks an OMAKESLICE node. func walkMakeSlice(n *ir.MakeExpr, init *ir.Nodes) ir.Node { - l := n.Len - r := n.Cap - if r == nil { - r = safeExpr(l, init) - l = r + len := n.Len + cap := n.Cap + len = safeExpr(len, init) + if cap != nil { + cap = safeExpr(cap, init) + } else { + cap = len } t := n.Type() if t.Elem().NotInHeap() { base.Errorf("%v can't be allocated in Go; it is incomplete (or unallocatable)", t.Elem()) } + + tryStack := false if n.Esc() == ir.EscNone { if why := escape.HeapAllocReason(n); why != "" { base.Fatalf("%v has EscNone, but %v", n, why) } - // var arr [r]T - // n = arr[:l] - i := typecheck.IndexConst(r) + if ir.IsSmallIntConst(cap) { + // Constant backing array - allocate it and slice it. + cap := typecheck.IndexConst(cap) + // Note that len might not be constant. If it isn't, check for panics. + // cap is constrained to [0,2^31) or [0,2^63) depending on whether + // we're in 32-bit or 64-bit systems. So it's safe to do: + // + // if uint64(len) > cap { + // if len < 0 { panicmakeslicelen() } + // panicmakeslicecap() + // } + nif := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OGT, typecheck.Conv(len, types.Types[types.TUINT64]), ir.NewInt(base.Pos, cap)), nil, nil) + niflen := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLT, len, ir.NewInt(base.Pos, 0)), nil, nil) + niflen.Body = []ir.Node{mkcall("panicmakeslicelen", nil, init)} + nif.Body.Append(niflen, mkcall("panicmakeslicecap", nil, init)) + init.Append(typecheck.Stmt(nif)) - // cap is constrained to [0,2^31) or [0,2^63) depending on whether - // we're in 32-bit or 64-bit systems. So it's safe to do: - // - // if uint64(len) > cap { - // if len < 0 { panicmakeslicelen() } - // panicmakeslicecap() - // } - nif := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OGT, typecheck.Conv(l, types.Types[types.TUINT64]), ir.NewInt(base.Pos, i)), nil, nil) - niflen := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLT, l, ir.NewInt(base.Pos, 0)), nil, nil) - niflen.Body = []ir.Node{mkcall("panicmakeslicelen", nil, init)} - nif.Body.Append(niflen, mkcall("panicmakeslicecap", nil, init)) - init.Append(typecheck.Stmt(nif)) - - t = types.NewArray(t.Elem(), i) // [r]T - var_ := typecheck.TempAt(base.Pos, ir.CurFunc, t) - appendWalkStmt(init, ir.NewAssignStmt(base.Pos, var_, nil)) // zero temp - r := ir.NewSliceExpr(base.Pos, ir.OSLICE, var_, nil, l, nil) // arr[:l] - // The conv is necessary in case n.Type is named. - return walkExpr(typecheck.Expr(typecheck.Conv(r, n.Type())), init) + // var arr [cap]E + // s = arr[:len] + t := types.NewArray(t.Elem(), cap) // [cap]E + arr := typecheck.TempAt(base.Pos, ir.CurFunc, t) + appendWalkStmt(init, ir.NewAssignStmt(base.Pos, arr, nil)) // zero temp + s := ir.NewSliceExpr(base.Pos, ir.OSLICE, arr, nil, len, nil) // arr[:len] + // The conv is necessary in case n.Type is named. + return walkExpr(typecheck.Expr(typecheck.Conv(s, n.Type())), init) + } + if t.Elem().HasPointers() { + // TODO: remove this limitation (see ../escape/utils.go:HeapAllocReason). + base.Fatalf("%v can't have pointers", t.Elem()) + } + tryStack = true } - // n escapes; set up a call to makeslice. + // The final result is assigned to this variable. + slice := typecheck.TempAt(base.Pos, ir.CurFunc, n.Type()) // []E result (possibly named) + + if tryStack { + // K := maxStackSize/sizeof(E) + // if cap <= K { + // var arr [K]E + // slice = arr[:len:cap] + // } else { + // slice = makeslice(elemType, len, cap) + // } + const maxStackSize = 32 + K := maxStackSize / t.Elem().Size() // rounds down + if K > 0 { // skip if elem size is too big. + nif := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLE, typecheck.Conv(cap, types.Types[types.TUINT64]), ir.NewInt(base.Pos, K)), nil, nil) + + // cap is in bounds after the K check, but len might not be. + // (Note that the slicing below would generate a panic for + // the same bad cases, but we want makeslice panics, not + // regular slicing panics.) + lenCap := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OGT, typecheck.Conv(len, types.Types[types.TUINT64]), typecheck.Conv(cap, types.Types[types.TUINT64])), nil, nil) + lenZero := ir.NewIfStmt(base.Pos, ir.NewBinaryExpr(base.Pos, ir.OLT, len, ir.NewInt(base.Pos, 0)), nil, nil) + lenZero.Body.Append(mkcall("panicmakeslicelen", nil, &lenZero.Body)) + lenCap.Body.Append(lenZero) + lenCap.Body.Append(mkcall("panicmakeslicecap", nil, &lenCap.Body)) + nif.Body.Append(lenCap) + + t := types.NewArray(t.Elem(), K) // [K]E + arr := typecheck.TempAt(base.Pos, ir.CurFunc, t) // var arr [K]E + nif.Body.Append(ir.NewAssignStmt(base.Pos, arr, nil)) // arr = {} (zero it) + s := ir.NewSliceExpr(base.Pos, ir.OSLICE, arr, nil, len, cap) // arr[:len:cap] + nif.Body.Append(ir.NewAssignStmt(base.Pos, slice, s)) // slice = arr[:len:cap] + + appendWalkStmt(init, typecheck.Stmt(nif)) + + // Put makeslice call below in the else branch. + init = &nif.Else + } + } + + // Set up a call to makeslice. // When len and cap can fit into int, use makeslice instead of // makeslice64, which is faster and shorter on 32 bit platforms. - - len, cap := l, r - fnname := "makeslice64" argtype := types.Types[types.TINT64] @@ -585,8 +634,10 @@ func walkMakeSlice(n *ir.MakeExpr, init *ir.Nodes) ir.Node { ptr.MarkNonNil() len = typecheck.Conv(len, types.Types[types.TINT]) cap = typecheck.Conv(cap, types.Types[types.TINT]) - sh := ir.NewSliceHeaderExpr(base.Pos, t, ptr, len, cap) - return walkExpr(typecheck.Expr(sh), init) + s := ir.NewSliceHeaderExpr(base.Pos, t, ptr, len, cap) + appendWalkStmt(init, ir.NewAssignStmt(base.Pos, slice, s)) + + return slice } // walkMakeSliceCopy walks an OMAKESLICECOPY node. diff --git a/src/runtime/pprof/protomem_test.go b/src/runtime/pprof/protomem_test.go index 43f4d3efe1..6f3231d42a 100644 --- a/src/runtime/pprof/protomem_test.go +++ b/src/runtime/pprof/protomem_test.go @@ -131,7 +131,7 @@ func TestGenericsHashKeyInPprofBuilder(t *testing.T) { for _, sz := range []int{128, 256} { genericAllocFunc[uint32](sz / 4) } - for _, sz := range []int{32, 64} { + for _, sz := range []int{64, 128} { genericAllocFunc[uint64](sz / 8) } @@ -149,8 +149,8 @@ func TestGenericsHashKeyInPprofBuilder(t *testing.T) { expected := []string{ "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint32] [1 128 0 0]", "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint32] [1 256 0 0]", - "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 32 0 0]", "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 64 0 0]", + "testing.tRunner;runtime/pprof.TestGenericsHashKeyInPprofBuilder;runtime/pprof.genericAllocFunc[go.shape.uint64] [1 128 0 0]", } for _, l := range expected { diff --git a/test/escape_make_non_const.go b/test/escape_make_non_const.go index b5f5cb2e71..7a9b28d5e3 100644 --- a/test/escape_make_non_const.go +++ b/test/escape_make_non_const.go @@ -65,7 +65,7 @@ func testSlices() { } { - _ = make([]byte, globalVarSize) // ERROR "make\(\[\]byte, globalVarSize\) escapes to heap" + _ = make([]byte, globalVarSize) // ERROR "make\(\[\]byte, globalVarSize\) does not escape" _ = make([]byte, globalVarSize, globalConstSize) // ERROR "make\(\[\]byte, globalVarSize, 128\) does not escape" } }