mirror of
https://github.com/golang/go.git
synced 2025-05-28 02:41:30 +00:00
runtime: add per-p page allocation cache
This change adds a per-p free page cache which the page allocator may allocate out of without a lock. The change also introduces a completely lockless page allocator fast path. Although the cache contains at most 64 pages (and usually less), the vast majority (85%+) of page allocations are exactly 1 page in size. Updates #35112. Change-Id: I170bf0a9375873e7e3230845eb1df7e5cf741b78 Reviewed-on: https://go-review.googlesource.com/c/go/+/195701 Run-TryBot: Michael Knyszek <mknyszek@google.com> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
81640ea38d
commit
a2cd2bd55d
@ -7,6 +7,7 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
"runtime/internal/atomic"
|
||||
"runtime/internal/sys"
|
||||
"unsafe"
|
||||
@ -358,6 +359,10 @@ func ReadMemStatsSlow() (base, slow MemStats) {
|
||||
pg := mheap_.pages.chunks[i].scavenged.popcntRange(0, pallocChunkPages)
|
||||
slow.HeapReleased += uint64(pg) * pageSize
|
||||
}
|
||||
for _, p := range allp {
|
||||
pg := bits.OnesCount64(p.pcache.scav)
|
||||
slow.HeapReleased += uint64(pg) * pageSize
|
||||
}
|
||||
|
||||
// Unused space in the current arena also counts as released space.
|
||||
slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
|
||||
@ -879,3 +884,20 @@ func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func PageCachePagesLeaked() (leaked uintptr) {
|
||||
stopTheWorld("PageCachePagesLeaked")
|
||||
|
||||
// Walk over destroyed Ps and look for unflushed caches.
|
||||
deadp := allp[len(allp):cap(allp)]
|
||||
for _, p := range deadp {
|
||||
// Since we're going past len(allp) we may see nil Ps.
|
||||
// Just ignore them.
|
||||
if p != nil {
|
||||
leaked += uintptr(bits.OnesCount64(p.pcache.cache))
|
||||
}
|
||||
}
|
||||
|
||||
startTheWorld()
|
||||
return
|
||||
}
|
||||
|
@ -168,6 +168,14 @@ func TestTinyAlloc(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPageCacheLeak(t *testing.T) {
|
||||
defer GOMAXPROCS(GOMAXPROCS(1))
|
||||
leaked := PageCachePagesLeaked()
|
||||
if leaked != 0 {
|
||||
t.Fatalf("found %d leaked pages in page caches", leaked)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPhysicalMemoryUtilization(t *testing.T) {
|
||||
got := runTestProg(t, "testprog", "GCPhys")
|
||||
want := "OK\n"
|
||||
|
@ -1073,28 +1073,60 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
|
||||
gp := getg()
|
||||
base, scav := uintptr(0), uintptr(0)
|
||||
|
||||
// Try to allocate a cached span.
|
||||
s = h.tryAllocMSpan()
|
||||
// If the allocation is small enough, try the page cache!
|
||||
pp := gp.m.p.ptr()
|
||||
if pp != nil && npages < pageCachePages/4 {
|
||||
c := &pp.pcache
|
||||
|
||||
// We failed to do what we need to do without the lock.
|
||||
// If the cache is empty, refill it.
|
||||
if c.empty() {
|
||||
lock(&h.lock)
|
||||
*c = h.pages.allocToCache()
|
||||
unlock(&h.lock)
|
||||
}
|
||||
|
||||
// Try to allocate from the cache.
|
||||
base, scav = c.alloc(npages)
|
||||
if base != 0 {
|
||||
s = h.tryAllocMSpan()
|
||||
|
||||
if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
|
||||
goto HaveSpan
|
||||
}
|
||||
// We're either running duing GC, failed to acquire a mspan,
|
||||
// or the allocation is for a large object. This means we
|
||||
// have to lock the heap and do a bunch of extra work,
|
||||
// so go down the HaveBaseLocked path.
|
||||
//
|
||||
// We must do this during GC to avoid skew with heap_scan
|
||||
// since we flush mcache stats whenever we lock.
|
||||
//
|
||||
// TODO(mknyszek): It would be nice to not have to
|
||||
// lock the heap if it's a large allocation, but
|
||||
// it's fine for now. The critical section here is
|
||||
// short and large object allocations are relatively
|
||||
// infrequent.
|
||||
}
|
||||
}
|
||||
|
||||
// For one reason or another, we couldn't get the
|
||||
// whole job done without the heap lock.
|
||||
lock(&h.lock)
|
||||
|
||||
// Try to acquire a base address.
|
||||
base, scav = h.pages.alloc(npages)
|
||||
if base != 0 {
|
||||
goto HaveBase
|
||||
if base == 0 {
|
||||
// Try to acquire a base address.
|
||||
base, scav = h.pages.alloc(npages)
|
||||
if base == 0 {
|
||||
if !h.grow(npages) {
|
||||
unlock(&h.lock)
|
||||
return nil
|
||||
}
|
||||
base, scav = h.pages.alloc(npages)
|
||||
if base == 0 {
|
||||
throw("grew heap, but no adequate free space found")
|
||||
}
|
||||
}
|
||||
}
|
||||
if !h.grow(npages) {
|
||||
unlock(&h.lock)
|
||||
return nil
|
||||
}
|
||||
base, scav = h.pages.alloc(npages)
|
||||
if base != 0 {
|
||||
goto HaveBase
|
||||
}
|
||||
throw("grew heap, but no adequate free space found")
|
||||
|
||||
HaveBase:
|
||||
if s == nil {
|
||||
// We failed to get an mspan earlier, so grab
|
||||
// one now that we have the heap lock.
|
||||
@ -1124,7 +1156,9 @@ HaveBase:
|
||||
}
|
||||
unlock(&h.lock)
|
||||
|
||||
// Initialize the span.
|
||||
HaveSpan:
|
||||
// At this point, both s != nil and base != 0, and the heap
|
||||
// lock is no longer held. Initialize the span.
|
||||
s.init(base, npages)
|
||||
if h.allocNeedsZero(base, npages) {
|
||||
s.needzero = 1
|
||||
|
@ -4088,6 +4088,7 @@ func (pp *p) destroy() {
|
||||
mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
|
||||
}
|
||||
pp.mspancache.len = 0
|
||||
pp.pcache.flush(&mheap_.pages)
|
||||
})
|
||||
freemcache(pp.mcache)
|
||||
pp.mcache = nil
|
||||
|
@ -555,6 +555,7 @@ type p struct {
|
||||
sysmontick sysmontick // last tick observed by sysmon
|
||||
m muintptr // back-link to associated m (nil if idle)
|
||||
mcache *mcache
|
||||
pcache pageCache
|
||||
raceprocctx uintptr
|
||||
|
||||
deferpool [5][]*_defer // pool of available defer structs of different sizes (see panic.go)
|
||||
@ -611,7 +612,7 @@ type p struct {
|
||||
|
||||
palloc persistentAlloc // per-P to avoid mutex
|
||||
|
||||
// _ uint32 // Alignment for atomic fields below
|
||||
_ uint32 // Alignment for atomic fields below
|
||||
|
||||
// Per-P GC state
|
||||
gcAssistTime int64 // Nanoseconds in assistAlloc
|
||||
|
Loading…
x
Reference in New Issue
Block a user