diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 4122b7ba23..4562e82c37 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -154,6 +154,39 @@ const ( // since the arena starts at address 0. _MaxMem = 1<<_MHeapMap_TotalBits - 1 + // memLimitBits is the maximum number of bits in a heap address. + // + // On 64-bit platforms, we limit this to 48 bits because that + // is the maximum supported by Linux across all 64-bit + // architectures, with the exception of s390x. + // s390x supports full 64-bit addresses, but the allocator + // will panic in the unlikely event we exceed 48 bits. + // + // On 32-bit platforms, we accept the full 32-bit address + // space because doing so is cheap. + // mips32 only has access to the low 2GB of virtual memory, so + // we further limit it to 31 bits. + // + // The size of the arena index is proportional to + // 1<= n { - return - } - - sysMap(unsafe.Pointer(h.bitmap_start+h.bitmap_mapped), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) - h.bitmap_mapped = n -} - // heapBits provides access to the bitmap bits for a single heap word. // The methods on heapBits take value receivers so that the compiler // can more easily inline calls to those methods and registerize the @@ -166,8 +141,14 @@ func (h *mheap) mapBits(arena_used uintptr) { type heapBits struct { bitp *uint8 shift uint32 + arena uint32 // Index of heap arena containing bitp + last *uint8 // Last byte arena's bitmap } +// Make the compiler check that heapBits.arena is large enough to hold +// the maximum arena index. +var _ = heapBits{arena: memLimit / heapArenaBytes} + // markBits provides access to the mark bit for an object in the heap. // bytep points to the byte holding the mark bit. // mask is a byte with a single bit set that can be &ed with *bytep @@ -349,14 +330,26 @@ func (m *markBits) advance() { } // heapBitsForAddr returns the heapBits for the address addr. -// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). +// The caller must ensure addr is in an allocated span. +// In particular, be careful not to point past the end of an object. // // nosplit because it is used during write barriers and must not be preempted. //go:nosplit func heapBitsForAddr(addr uintptr) heapBits { // 2 bits per word, 4 pairs per byte, and a mask is hard coded. off := addr / sys.PtrSize - return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap_delta + off/4)), uint32(off & 3)} + arena := addr / heapArenaBytes + ha := mheap_.arenas[arena] + // The compiler uses a load for nil checking ha, but in this + // case we'll almost never hit that cache line again, so it + // makes more sense to do a value check. + if ha == nil { + // addr is not in the heap. Crash without inhibiting inlining. + _ = *ha + } + bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes] + last := &ha.bitmap[len(ha.bitmap)-1] + return heapBits{bitp, uint32(off & 3), uint32(arena), last} } // heapBitsForSpan returns the heapBits for the span base address base. @@ -446,9 +439,24 @@ func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex ui //go:nosplit func (h heapBits) next() heapBits { if h.shift < 3*heapBitsShift { - return heapBits{h.bitp, h.shift + heapBitsShift} + h.shift += heapBitsShift + } else if h.bitp != h.last { + h.bitp, h.shift = add1(h.bitp), 0 + } else { + // Move to the next arena. + h.arena++ + a := mheap_.arenas[h.arena] + if a == nil { + // We just passed the end of the object, which + // was also the end of the heap. Poison h. It + // should never be dereferenced at this point. + h.bitp, h.last = nil, nil + } else { + h.bitp, h.shift = &a.bitmap[0], 0 + h.last = &a.bitmap[len(a.bitmap)-1] + } } - return heapBits{add1(h.bitp), 0} + return h } // forward returns the heapBits describing n pointer-sized words ahead of h in memory. @@ -456,16 +464,37 @@ func (h heapBits) next() heapBits { // h.forward(1) is equivalent to h.next(), just slower. // Note that forward does not modify h. The caller must record the result. // bits returns the heap bits for the current word. +//go:nosplit func (h heapBits) forward(n uintptr) heapBits { n += uintptr(h.shift) / heapBitsShift - return heapBits{addb(h.bitp, n/4), uint32(n%4) * heapBitsShift} + nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4 + h.shift = uint32(n%4) * heapBitsShift + if nbitp <= uintptr(unsafe.Pointer(h.last)) { + h.bitp = (*uint8)(unsafe.Pointer(nbitp)) + return h + } + + // We're in a new heap arena. + past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1) + h.arena += 1 + uint32(past/heapArenaBitmapBytes) + a := mheap_.arenas[h.arena] + if a == nil { + h.bitp, h.last = nil, nil + } else { + h.bitp = &a.bitmap[past%heapArenaBitmapBytes] + h.last = &a.bitmap[len(a.bitmap)-1] + } + return h } // forwardOrBoundary is like forward, but stops at boundaries between // contiguous sections of the bitmap. It returns the number of words // advanced over, which will be <= n. func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) { - // The bitmap is contiguous right now, so this is just forward. + maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp))) + if n > maxn { + n = maxn + } return h.forward(n), n } @@ -951,6 +980,16 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // This is a lot of lines of code, but it compiles into relatively few // machine instructions. + outOfPlace := false + if (x+size-1)/heapArenaBytes != uintptr(h.arena) { + // This object spans heap arenas, so the bitmap may be + // discontiguous. Unroll it into the object instead + // and then copy it out. + outOfPlace = true + h.bitp = (*uint8)(unsafe.Pointer(x)) + h.last = nil + } + var ( // Ptrmask input. p *byte // last ptrmask byte read @@ -989,9 +1028,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } ptrmask = debugPtrmask.data runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) - goto Phase4 } - return + goto Phase4 } // Note about sizes: @@ -1109,7 +1147,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { nw = 2 } - // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). + // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2). // The leading byte is special because it contains the bits for word 1, // which does not have the scan bit set. // The leading half-byte is special because it's a half a byte, @@ -1280,9 +1318,81 @@ Phase3: } Phase4: - // Phase 4: all done, but perhaps double check. + // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary. + if outOfPlace { + // TODO: We could probably make this faster by + // handling [x+dataSize, x+size) specially. + h := heapBitsForAddr(x) + // cnw is the number of heap words, or bit pairs + // remaining (like nw above). + cnw := size / sys.PtrSize + src := (*uint8)(unsafe.Pointer(x)) + // We know the first and last byte of the bitmap are + // not the same, but it's still possible for small + // objects span arenas, so it may share bitmap bytes + // with neighboring objects. + // + // Handle the first byte specially if it's shared. See + // Phase 1 for why this is the only special case we need. + if doubleCheck { + if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) { + print("x=", x, " size=", size, " cnw=", h.shift, "\n") + throw("bad start shift") + } + } + if sys.PtrSize == 8 && h.shift == 2 { + *hbitp = *hbitp&^((bitPointer|bitScan|(bitPointer|bitScan)<= 4 { + hNext, words := h.forwardOrBoundary(cnw) + + // n is the number of bitmap bytes to copy. + n := words / 4 + memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n) + cnw -= words + h = hNext + src = addb(src, n) + } + // Handle the last byte if it's shared. + if cnw == 2 { + *h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)< x+size { + throw("copy exceeded object size") + } + if !(cnw == 0 || cnw == 2) { + print("x=", x, " size=", size, " cnw=", cnw, "\n") + throw("bad number of remaining words") + } + // Set up hbitp so doubleCheck code below can check it. + hbitp = h.bitp + } + // Zero the object where we wrote the bitmap. + memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x) + } + + // Double check the whole bitmap. if doubleCheck { - end := heapBitsForAddr(x + size) + // x+size may not point to the heap, so back up one + // word and then call next(). + end := heapBitsForAddr(x + size - sys.PtrSize).next() + if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) { + // The unrolling code above walks hbitp just + // past the bitmap without moving to the next + // arena. Synthesize this for end.bitp. + end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes) + end.arena-- + end.last = nil + } if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") @@ -1322,7 +1432,7 @@ Phase4: if have != want { println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") - print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") + print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n") print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") h0 := heapBitsForAddr(x) print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") @@ -1430,7 +1540,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize } endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4)) - endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/heapBitmapScale)) + endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte)) memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg)) } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 737161dfee..eb9418f0db 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -114,9 +114,6 @@ type mheap struct { nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) // range of addresses we might see in the heap - bitmap_start uintptr // Points to first byte of bitmap - bitmap_mapped uintptr - bitmap_delta uintptr // Used to map heap address to bitmap address // The arena_* fields indicate the addresses of the Go heap. // @@ -143,6 +140,21 @@ type mheap struct { // here and *must* clobber it to use it. arena_reserved bool + // arenas is the heap arena index. arenas[va/heapArenaBytes] + // points to the metadata for the heap arena containing va. + // + // For regions of the address space that are not backed by the + // Go heap, the arena index contains nil. + // + // Modifications are protected by mheap_.lock. Reads can be + // performed without locking; however, a given entry can + // transition from nil to non-nil at any time when the lock + // isn't held. (Entries never transitions back to nil.) + // + // This structure is fully mapped by mallocinit, so it's safe + // to probe any index. + arenas *[memLimit / heapArenaBytes]*heapArena + //_ uint32 // ensure 64-bit alignment // central free lists for small size classes. @@ -167,6 +179,23 @@ type mheap struct { var mheap_ mheap +// A heapArena stores metadata for a heap arena. heapArenas are stored +// outside of the Go heap and accessed via the mheap_.arenas index. +// +// This gets allocated directly from the OS, so ideally it should be a +// multiple of the system page size. For example, avoid adding small +// fields. +// +//go:notinheap +type heapArena struct { + // bitmap stores the pointer/scalar bitmap for the words in + // this arena. See mbitmap.go for a description. Use the + // heapBits type to access this. + bitmap [heapArenaBitmapBytes]byte + + // TODO: Also store the spans map here. +} + // An MSpan is a run of pages. // // When a MSpan is in the heap free list, state == MSpanFree @@ -507,8 +536,21 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) { // avoids faults when other threads try access these regions immediately // after observing the change to arena_used. - // Map the bitmap. - h.mapBits(arena_used) + // Allocate heap arena metadata. + for ri := h.arena_used / heapArenaBytes; ri < (arena_used+heapArenaBytes-1)/heapArenaBytes; ri++ { + if h.arenas[ri] != nil { + continue + } + r := (*heapArena)(persistentalloc(unsafe.Sizeof(heapArena{}), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + throw("runtime: out of memory allocating heap arena metadata") + } + // Store atomically just in case an object from the + // new heap arena becomes visible before the heap lock + // is released (which shouldn't happen, but there's + // little downside to this). + atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r)) + } // Map spans array. h.mapSpans(arena_used)