mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
cmd/compile: aggregate scalar allocations for heap escapes
If multiple small scalars escape to the heap, allocate them together with a single allocation. They are going to be aggregated together in the tiny allocator anyway, might as well do just one runtime call. Change-Id: I4317e29235af63de378a26436a18d7fb0c39e41f Reviewed-on: https://go-review.googlesource.com/c/go/+/648536 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Carlos Amedee <carlos@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
6839e71d82
commit
5fc596ebe7
@ -30,6 +30,7 @@ type symsStruct struct {
|
|||||||
Goschedguarded *obj.LSym
|
Goschedguarded *obj.LSym
|
||||||
Growslice *obj.LSym
|
Growslice *obj.LSym
|
||||||
InterfaceSwitch *obj.LSym
|
InterfaceSwitch *obj.LSym
|
||||||
|
MallocGC *obj.LSym
|
||||||
Memmove *obj.LSym
|
Memmove *obj.LSym
|
||||||
Msanread *obj.LSym
|
Msanread *obj.LSym
|
||||||
Msanwrite *obj.LSym
|
Msanwrite *obj.LSym
|
||||||
|
@ -333,6 +333,13 @@ func (v *Value) SetArgs3(a, b, c *Value) {
|
|||||||
v.AddArg(b)
|
v.AddArg(b)
|
||||||
v.AddArg(c)
|
v.AddArg(c)
|
||||||
}
|
}
|
||||||
|
func (v *Value) SetArgs4(a, b, c, d *Value) {
|
||||||
|
v.resetArgs()
|
||||||
|
v.AddArg(a)
|
||||||
|
v.AddArg(b)
|
||||||
|
v.AddArg(c)
|
||||||
|
v.AddArg(d)
|
||||||
|
}
|
||||||
|
|
||||||
func (v *Value) resetArgs() {
|
func (v *Value) resetArgs() {
|
||||||
for _, a := range v.Args {
|
for _, a := range v.Args {
|
||||||
|
@ -7,6 +7,7 @@ package ssagen
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"cmp"
|
||||||
"fmt"
|
"fmt"
|
||||||
"go/constant"
|
"go/constant"
|
||||||
"html"
|
"html"
|
||||||
@ -47,6 +48,11 @@ const ssaDumpFile = "ssa.html"
|
|||||||
// ssaDumpInlined holds all inlined functions when ssaDump contains a function name.
|
// ssaDumpInlined holds all inlined functions when ssaDump contains a function name.
|
||||||
var ssaDumpInlined []*ir.Func
|
var ssaDumpInlined []*ir.Func
|
||||||
|
|
||||||
|
// Maximum size we will aggregate heap allocations of scalar locals.
|
||||||
|
// Almost certainly can't hurt to be as big as the tiny allocator.
|
||||||
|
// Might help to be a bit bigger.
|
||||||
|
const maxAggregatedHeapAllocation = 16
|
||||||
|
|
||||||
func DumpInline(fn *ir.Func) {
|
func DumpInline(fn *ir.Func) {
|
||||||
if ssaDump != "" && ssaDump == ir.FuncName(fn) {
|
if ssaDump != "" && ssaDump == ir.FuncName(fn) {
|
||||||
ssaDumpInlined = append(ssaDumpInlined, fn)
|
ssaDumpInlined = append(ssaDumpInlined, fn)
|
||||||
@ -122,6 +128,7 @@ func InitConfig() {
|
|||||||
ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
|
ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
|
||||||
ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
|
ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
|
||||||
ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch")
|
ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch")
|
||||||
|
ir.Syms.MallocGC = typecheck.LookupRuntimeFunc("mallocgc")
|
||||||
ir.Syms.Memmove = typecheck.LookupRuntimeFunc("memmove")
|
ir.Syms.Memmove = typecheck.LookupRuntimeFunc("memmove")
|
||||||
ir.Syms.Msanread = typecheck.LookupRuntimeFunc("msanread")
|
ir.Syms.Msanread = typecheck.LookupRuntimeFunc("msanread")
|
||||||
ir.Syms.Msanwrite = typecheck.LookupRuntimeFunc("msanwrite")
|
ir.Syms.Msanwrite = typecheck.LookupRuntimeFunc("msanwrite")
|
||||||
@ -696,7 +703,89 @@ func (s *state) paramsToHeap() {
|
|||||||
|
|
||||||
// newHeapaddr allocates heap memory for n and sets its heap address.
|
// newHeapaddr allocates heap memory for n and sets its heap address.
|
||||||
func (s *state) newHeapaddr(n *ir.Name) {
|
func (s *state) newHeapaddr(n *ir.Name) {
|
||||||
s.setHeapaddr(n.Pos(), n, s.newObject(n.Type(), nil))
|
if n.Type().HasPointers() || n.Type().Size() >= maxAggregatedHeapAllocation || n.Type().Size() == 0 {
|
||||||
|
s.setHeapaddr(n.Pos(), n, s.newObject(n.Type(), nil))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do we have room together with our pending allocations?
|
||||||
|
// If not, flush all the current ones.
|
||||||
|
var size int64
|
||||||
|
for _, v := range s.pendingHeapAllocations {
|
||||||
|
size += v.Type.Elem().Size()
|
||||||
|
}
|
||||||
|
if size+n.Type().Size() > maxAggregatedHeapAllocation {
|
||||||
|
s.flushPendingHeapAllocations()
|
||||||
|
}
|
||||||
|
|
||||||
|
var allocCall *ssa.Value // (SelectN [0] (call of runtime.newobject))
|
||||||
|
if len(s.pendingHeapAllocations) == 0 {
|
||||||
|
// Make an allocation, but the type being allocated is just
|
||||||
|
// the first pending object. We will come back and update it
|
||||||
|
// later if needed.
|
||||||
|
allocCall = s.newObject(n.Type(), nil)
|
||||||
|
} else {
|
||||||
|
allocCall = s.pendingHeapAllocations[0].Args[0]
|
||||||
|
}
|
||||||
|
// v is an offset to the shared allocation. Offsets are dummy 0s for now.
|
||||||
|
v := s.newValue1I(ssa.OpOffPtr, n.Type().PtrTo(), 0, allocCall)
|
||||||
|
|
||||||
|
// Add to list of pending allocations.
|
||||||
|
s.pendingHeapAllocations = append(s.pendingHeapAllocations, v)
|
||||||
|
|
||||||
|
// Finally, record for posterity.
|
||||||
|
s.setHeapaddr(n.Pos(), n, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *state) flushPendingHeapAllocations() {
|
||||||
|
pending := s.pendingHeapAllocations
|
||||||
|
if len(pending) == 0 {
|
||||||
|
return // nothing to do
|
||||||
|
}
|
||||||
|
s.pendingHeapAllocations = nil // reset state
|
||||||
|
ptr := pending[0].Args[0] // The SelectN [0] op
|
||||||
|
call := ptr.Args[0] // The runtime.newobject call
|
||||||
|
|
||||||
|
if len(pending) == 1 {
|
||||||
|
// Just a single object, do a standard allocation.
|
||||||
|
v := pending[0]
|
||||||
|
v.Op = ssa.OpCopy // instead of OffPtr [0]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort in decreasing alignment.
|
||||||
|
// This way we never have to worry about padding.
|
||||||
|
// (Stable not required; just cleaner to keep program order among equal alignments.)
|
||||||
|
slices.SortStableFunc(pending, func(x, y *ssa.Value) int {
|
||||||
|
return cmp.Compare(y.Type.Elem().Alignment(), x.Type.Elem().Alignment())
|
||||||
|
})
|
||||||
|
|
||||||
|
// Figure out how much data we need allocate.
|
||||||
|
var size int64
|
||||||
|
for _, v := range pending {
|
||||||
|
v.AuxInt = size // Adjust OffPtr to the right value while we are here.
|
||||||
|
size += v.Type.Elem().Size()
|
||||||
|
}
|
||||||
|
align := pending[0].Type.Elem().Alignment()
|
||||||
|
size = types.RoundUp(size, align)
|
||||||
|
|
||||||
|
// Convert newObject call to a mallocgc call.
|
||||||
|
args := []*ssa.Value{
|
||||||
|
s.constInt(types.Types[types.TUINTPTR], size),
|
||||||
|
s.constNil(call.Args[0].Type), // a nil *runtime._type
|
||||||
|
s.constBool(true), // needZero TODO: false is ok?
|
||||||
|
call.Args[1], // memory
|
||||||
|
}
|
||||||
|
call.Aux = ssa.StaticAuxCall(ir.Syms.MallocGC, s.f.ABIDefault.ABIAnalyzeTypes(
|
||||||
|
[]*types.Type{args[0].Type, args[1].Type, args[2].Type},
|
||||||
|
[]*types.Type{types.Types[types.TUNSAFEPTR]},
|
||||||
|
))
|
||||||
|
call.AuxInt = 4 * s.config.PtrSize // arg+results size, uintptr/ptr/bool/ptr
|
||||||
|
call.SetArgs4(args[0], args[1], args[2], args[3])
|
||||||
|
// TODO: figure out how to pass alignment to runtime
|
||||||
|
|
||||||
|
call.Type = types.NewTuple(types.Types[types.TUNSAFEPTR], types.TypeMem)
|
||||||
|
ptr.Type = types.Types[types.TUNSAFEPTR]
|
||||||
}
|
}
|
||||||
|
|
||||||
// setHeapaddr allocates a new PAUTO variable to store ptr (which must be non-nil)
|
// setHeapaddr allocates a new PAUTO variable to store ptr (which must be non-nil)
|
||||||
@ -937,6 +1026,11 @@ type state struct {
|
|||||||
lastDeferCount int // Number of defers encountered at that point
|
lastDeferCount int // Number of defers encountered at that point
|
||||||
|
|
||||||
prevCall *ssa.Value // the previous call; use this to tie results to the call op.
|
prevCall *ssa.Value // the previous call; use this to tie results to the call op.
|
||||||
|
|
||||||
|
// List of allocations in the current block that are still pending.
|
||||||
|
// They are all (OffPtr (Select0 (runtime call))) and have the correct types,
|
||||||
|
// but the offsets are not set yet, and the type of the runtime call is also not final.
|
||||||
|
pendingHeapAllocations []*ssa.Value
|
||||||
}
|
}
|
||||||
|
|
||||||
type funcLine struct {
|
type funcLine struct {
|
||||||
@ -1005,6 +1099,9 @@ func (s *state) endBlock() *ssa.Block {
|
|||||||
if b == nil {
|
if b == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.flushPendingHeapAllocations()
|
||||||
|
|
||||||
for len(s.defvars) <= int(b.ID) {
|
for len(s.defvars) <= int(b.ID) {
|
||||||
s.defvars = append(s.defvars, nil)
|
s.defvars = append(s.defvars, nil)
|
||||||
}
|
}
|
||||||
|
76
src/cmd/compile/internal/test/locals_test.go
Normal file
76
src/cmd/compile/internal/test/locals_test.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
// Copyright 2025 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package test
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func locals() {
|
||||||
|
var x int64
|
||||||
|
var y int32
|
||||||
|
var z int16
|
||||||
|
var w int8
|
||||||
|
sink64 = &x
|
||||||
|
sink32 = &y
|
||||||
|
sink16 = &z
|
||||||
|
sink8 = &w
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func args(x int64, y int32, z int16, w int8) {
|
||||||
|
sink64 = &x
|
||||||
|
sink32 = &y
|
||||||
|
sink16 = &z
|
||||||
|
sink8 = &w
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func half(x int64, y int16) {
|
||||||
|
var z int32
|
||||||
|
var w int8
|
||||||
|
sink64 = &x
|
||||||
|
sink16 = &y
|
||||||
|
sink32 = &z
|
||||||
|
sink8 = &w
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func closure() func() {
|
||||||
|
var x int64
|
||||||
|
var y int32
|
||||||
|
var z int16
|
||||||
|
var w int8
|
||||||
|
_, _, _, _ = x, y, z, w
|
||||||
|
return func() {
|
||||||
|
x = 1
|
||||||
|
y = 2
|
||||||
|
z = 3
|
||||||
|
w = 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var sink64 *int64
|
||||||
|
var sink32 *int32
|
||||||
|
var sink16 *int16
|
||||||
|
var sink8 *int8
|
||||||
|
|
||||||
|
func TestLocalAllocations(t *testing.T) {
|
||||||
|
type test struct {
|
||||||
|
name string
|
||||||
|
f func()
|
||||||
|
want int
|
||||||
|
}
|
||||||
|
for _, tst := range []test{
|
||||||
|
{"locals", locals, 1},
|
||||||
|
{"args", func() { args(1, 2, 3, 4) }, 1},
|
||||||
|
{"half", func() { half(1, 2) }, 1},
|
||||||
|
{"closure", func() { _ = closure() }, 2},
|
||||||
|
} {
|
||||||
|
allocs := testing.AllocsPerRun(100, tst.f)
|
||||||
|
if allocs != float64(tst.want) {
|
||||||
|
t.Errorf("test %s uses %v allocs, want %d", tst.name, allocs, tst.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user