diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 385c569ed8..8a44cebc76 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -413,3 +413,35 @@ func TracebackSystemstack(stk []uintptr, i int) int { }) return n } + +func KeepNArenaHints(n int) { + hint := mheap_.arenaHints + for i := 1; i < n; i++ { + hint = hint.next + if hint == nil { + return + } + } + hint.next = nil +} + +// MapNextArenaHint reserves a page at the next arena growth hint, +// preventing the arena from growing there, and returns the range of +// addresses that are no longer viable. +func MapNextArenaHint() (start, end uintptr) { + hint := mheap_.arenaHints + addr := hint.addr + if hint.down { + start, end = addr-heapArenaBytes, addr + addr -= physPageSize + } else { + start, end = addr, addr+heapArenaBytes + } + var reserved bool + sysReserve(unsafe.Pointer(addr), physPageSize, &reserved) + return +} + +func GetNextArenaHint() uintptr { + return mheap_.arenaHints.addr +} diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index a95a7fffde..02c0be6690 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -78,9 +78,32 @@ // // 3. We don't zero pages that never get reused. +// Virtual memory layout +// +// The heap consists of a set of arenas, which are 64MB on 64-bit and +// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also +// aligned to the arena size. +// +// Each arena has an associated heapArena object that stores the +// metadata for that arena: the heap bitmap for all words in the arena +// and the span map for all pages in the arena. heapArena objects are +// themselves allocated off-heap. +// +// Since arenas are aligned, the address space can be viewed as a +// series of arena frames. The arena index (mheap_.arenas) maps from +// arena frame number to *heapArena, or nil for parts of the address +// space not backed by the Go heap. Since arenas are large, the arena +// index is just a single-level mapping. +// +// The arena index covers the entire possible address space, allowing +// the Go heap to use any part of the address space. The allocator +// attempts to keep arenas contiguous so that large spans (and hence +// large objects) can cross arenas. + package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -113,9 +136,8 @@ const ( _TinySize = 16 _TinySizeClass = int8(2) - _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc - _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. - _HeapAllocChunk = 1 << 20 // Chunk size for heap growth + _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc + _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. // Per-P, per order stack segment cache size. _StackCacheSize = 32 * 1024 @@ -134,26 +156,6 @@ const ( // plan9 | 4KB | 3 _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9 - // Number of bits in page to span calculations (4k pages). - // On Windows 64-bit we limit the arena to 32GB or 35 bits. - // Windows counts memory used by page table into committed memory - // of the process, so we can't reserve too much memory. - // See https://golang.org/issue/5402 and https://golang.org/issue/5236. - // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits. - // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. - // The only exception is mips32 which only has access to low 2GB of virtual memory. - // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory, - // but as most devices have less than 4GB of physical memory anyway, we - // try to be conservative here, and only ask for a 2GB heap. - _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle)) - _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift - - // _MaxMem is the maximum heap arena size minus 1. - // - // On 32-bit, this is also the maximum heap pointer value, - // since the arena starts at address 0. - _MaxMem = 1<<_MHeapMap_TotalBits - 1 - // memLimitBits is the maximum number of bits in a heap address. // // On 64-bit platforms, we limit this to 48 bits because that @@ -174,14 +176,14 @@ const ( memLimitBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle)) // memLimit is one past the highest possible heap pointer value. + // + // This is also the maximum heap pointer value. memLimit = 1 << memLimitBits + _MaxMem = memLimit - 1 // heapArenaBytes is the size of a heap arena. The heap // consists of mappings of size heapArenaBytes, aligned to // heapArenaBytes. The initial heap mapping is one arena. - // - // TODO: Right now only the bitmap is divided into separate - // arenas, but shortly all of the heap will be. heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit) // heapArenaBitmapBytes is the size of each heap arena's bitmap. @@ -281,43 +283,53 @@ func mallocinit() { throw("bad system page size") } - // The auxiliary regions start at p and are laid out in the - // following order: spans, bitmap, arena. - var p, pSize uintptr - var reserved bool + // Map the arena index. Most of this will never be written to, + // so we don't account it. + var untracked uint64 + mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked)) + if mheap_.arenas == nil { + throw("failed to allocate arena index") + } - // Set up the allocation arena, a contiguous area of memory where - // allocated data will be found. + // Initialize the heap. + mheap_.init() + _g_ := getg() + _g_.m.mcache = allocmcache() + + // Create initial arena growth hints. if sys.PtrSize == 8 { - // On a 64-bit machine, allocate from a single contiguous reservation. - // 512 GB (MaxMem) should be big enough for now. + // On a 64-bit machine, we pick the following hints + // because: // - // The code will work with the reservation at any address, but ask - // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f). - // Allocating a 512 GB region takes away 39 bits, and the amd64 - // doesn't let us choose the top 17 bits, so that leaves the 9 bits - // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means - // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df. - // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid + // 1. Starting from the middle of the address space + // makes it easier to grow out a contiguous range + // without running in to some other mapping. + // + // 2. This makes Go heap addresses more easily + // recognizable when debugging. + // + // 3. Stack scanning in gccgo is still conservative, + // so it's important that addresses be distinguishable + // from other data. + // + // Starting at 0x00c0 means that the valid memory addresses + // will begin 0x00c0, 0x00c1, ... + // In little-endian, that's c0 00, c1 00, ... None of those are valid // UTF-8 sequences, and they are otherwise as far away from // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 // addresses. An earlier attempt to use 0x11f8 caused out of memory errors // on OS X during thread allocations. 0x00c0 causes conflicts with // AddressSanitizer which reserves all memory up to 0x0100. - // These choices are both for debuggability and to reduce the - // odds of a conservative garbage collector (as is still used in gccgo) + // These choices reduce the odds of a conservative garbage collector // not collecting memory because some non-pointer block of memory // had a bit pattern that matched a memory address. // - // If this fails we fall back to the 32 bit memory mechanism - // // However, on arm64, we ignore all this advice above and slam the // allocation at 0x40 << 32 because when using 4k pages with 3-level // translation buffers, the user address space is limited to 39 bits // On darwin/arm64, the address space is even smaller. - arenaSize := round(_MaxMem, _PageSize) - pSize = arenaSize + _PageSize - for i := 0; i <= 0x7f; i++ { + for i := 0x7f; i >= 0; i-- { + var p uintptr switch { case GOARCH == "arm64" && GOOS == "darwin": p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) @@ -326,225 +338,240 @@ func mallocinit() { default: p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) } - p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) - if p != 0 { - break - } + hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) + hint.addr = p + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } - } + } else { + // On a 32-bit machine, we're much more concerned + // about keeping the usable heap contiguous. + // Hence: + // + // 1. We reserve space for all heapArenas up front so + // they don't get interleaved with the heap. They're + // ~258MB, so this isn't too bad. (We could reserve a + // smaller amount of space up front if this is a + // problem.) + // + // 2. We hint the heap to start right above the end of + // the binary so we have the best chance of keeping it + // contiguous. + // + // 3. We try to stake out a reasonably large initial + // heap reservation. - if p == 0 { - // On a 32-bit machine, we can't typically get away - // with a giant virtual address space reservation. - // Instead we map the memory information bitmap - // immediately after the data segment, large enough - // to handle the entire 4GB address space (256 MB), - // along with a reservation for an initial arena. - // When that gets used up, we'll start asking the kernel - // for any memory anywhere. + const arenaMetaSize = unsafe.Sizeof(heapArena{}) * uintptr(len(*mheap_.arenas)) + var reserved bool + meta := uintptr(sysReserve(nil, arenaMetaSize, &reserved)) + if meta != 0 { + mheap_.heapArenaAlloc.init(meta, arenaMetaSize) + } // We want to start the arena low, but if we're linked // against C code, it's possible global constructors // have called malloc and adjusted the process' brk. // Query the brk so we can avoid trying to map the - // arena over it (which will cause the kernel to put - // the arena somewhere else, likely at a high + // region over it (which will cause the kernel to put + // the region somewhere else, likely at a high // address). procBrk := sbrk0() - // If we fail to allocate, try again with a smaller arena. - // This is necessary on Android L where we share a process - // with ART, which reserves virtual memory aggressively. - // In the worst case, fall back to a 0-sized initial arena, - // in the hope that subsequent reservations will succeed. + // If we ask for the end of the data segment but the + // operating system requires a little more space + // before we can start allocating, it will give out a + // slightly higher pointer. Except QEMU, which is + // buggy, as usual: it won't adjust the pointer + // upward. So adjust it upward a little bit ourselves: + // 1/4 MB to get away from the running binary image. + p := firstmoduledata.end + if p < procBrk { + p = procBrk + } + if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end { + p = mheap_.heapArenaAlloc.end + } + p = round(p+(256<<10), heapArenaBytes) + // Because we're worried about fragmentation on + // 32-bit, we try to make a large initial reservation. arenaSizes := []uintptr{ 512 << 20, 256 << 20, 128 << 20, - 0, } - for _, arenaSize := range arenaSizes { - // SysReserve treats the address we ask for, end, as a hint, - // not as an absolute requirement. If we ask for the end - // of the data segment but the operating system requires - // a little more space before we can start allocating, it will - // give out a slightly higher pointer. Except QEMU, which - // is buggy, as usual: it won't adjust the pointer upward. - // So adjust it upward a little bit ourselves: 1/4 MB to get - // away from the running binary image and then round up - // to a MB boundary. - p = round(firstmoduledata.end+(1<<18), 1<<20) - pSize = arenaSize + _PageSize - if p <= procBrk && procBrk < p+pSize { - // Move the start above the brk, - // leaving some room for future brk - // expansion. - p = round(procBrk+(1<<20), 1<<20) - } - p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) - if p != 0 { + a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes, &reserved) + if a != nil { + mheap_.arena.init(uintptr(a), size) + p = uintptr(a) + size // For hint below break } } - if p == 0 { - throw("runtime: cannot reserve arena virtual address space") - } + hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) + hint.addr = p + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } - - // PageSize can be larger than OS definition of page size, - // so SysReserve can give us a PageSize-unaligned pointer. - // To overcome this we ask for PageSize more and round up the pointer. - p1 := round(p, _PageSize) - pSize -= p1 - p - - if sys.PtrSize == 4 { - // Set arena_start such that we can accept memory - // reservations located anywhere in the 4GB virtual space. - mheap_.arena_start = 0 - } else { - mheap_.arena_start = p1 - } - mheap_.arena_end = p + pSize - mheap_.arena_used = p1 - mheap_.arena_alloc = p1 - mheap_.arena_reserved = reserved - - if mheap_.arena_start&(_PageSize-1) != 0 { - println("bad pagesize", hex(p), hex(p1), hex(_PageSize), "start", hex(mheap_.arena_start)) - throw("misrounded allocation in mallocinit") - } - - // Map the arena index. Most of this will never be touched. - var untracked uint64 - mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked)) - if mheap_.arenas == nil { - throw("failed to allocate arena index") - } - - // Initialize the rest of the allocator. - mheap_.init() - _g_ := getg() - _g_.m.mcache = allocmcache() } -// sysAlloc allocates the next n bytes from the heap arena. The -// returned pointer is always _PageSize aligned and between -// h.arena_start and h.arena_end. sysAlloc returns nil on failure. +// sysAlloc allocates heap arena space for at least n bytes. The +// returned pointer is always heapArenaBytes-aligned and backed by +// h.arenas metadata. The returned size is always a multiple of +// heapArenaBytes. sysAlloc returns nil on failure. // There is no corresponding free function. -func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer { - // strandLimit is the maximum number of bytes to strand from - // the current arena block. If we would need to strand more - // than this, we fall back to sysAlloc'ing just enough for - // this allocation. - const strandLimit = 16 << 20 +// +// h must be locked. +func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { + n = round(n, heapArenaBytes) - if n > h.arena_end-h.arena_alloc { - // If we haven't grown the arena to _MaxMem yet, try - // to reserve some more address space. - p_size := round(n+_PageSize, 256<<20) - new_end := h.arena_end + p_size // Careful: can overflow - if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem { - // TODO: It would be bad if part of the arena - // is reserved and part is not. - var reserved bool - p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved)) - if p == 0 { - // TODO: Try smaller reservation - // growths in case we're in a crowded - // 32-bit address space. - goto reservationFailed + // First, try the arena pre-reservation. + v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys) + if v != nil { + size = n + goto mapped + } + + // Try to grow the heap at a hint address. + for h.arenaHints != nil { + hint := h.arenaHints + p := hint.addr + if hint.down { + p -= n + } + if p+n < p || p+n >= memLimit-1 { + // We can't use this, so don't ask. + v = nil + } else { + v = sysReserve(unsafe.Pointer(p), n, &h.arena_reserved) + } + if p == uintptr(v) { + // Success. Update the hint. + if !hint.down { + p += n } - // p can be just about anywhere in the address - // space, including before arena_end. - if p == h.arena_end { - // The new block is contiguous with - // the current block. Extend the - // current arena block. - h.arena_end = new_end - h.arena_reserved = reserved - } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit { - // We were able to reserve more memory - // within the arena space, but it's - // not contiguous with our previous - // reservation. It could be before or - // after our current arena_used. - // - // Keep everything page-aligned. - // Our pages are bigger than hardware pages. - h.arena_end = p + p_size - p = round(p, _PageSize) - h.arena_alloc = p - h.arena_reserved = reserved - } else { - // We got a mapping, but either - // - // 1) It's not in the arena, so we - // can't use it. (This should never - // happen on 32-bit.) - // - // 2) We would need to discard too - // much of our current arena block to - // use it. - // - // We haven't added this allocation to - // the stats, so subtract it from a - // fake stat (but avoid underflow). - // - // We'll fall back to a small sysAlloc. - stat := uint64(p_size) - sysFree(unsafe.Pointer(p), p_size, &stat) + hint.addr = p + size = n + break + } + // Failed. Discard this hint and try the next. + // + // TODO: This would be cleaner if sysReserve could be + // told to only return the requested address. In + // particular, this is already how Windows behaves, so + // it would simply things there. + if v != nil { + sysFree(v, n, nil) + } + h.arenaHints = hint.next + h.arenaHintAlloc.free(unsafe.Pointer(hint)) + } + + if size == 0 { + // All of the hints failed, so we'll take any + // (sufficiently aligned) address the kernel will give + // us. + v, size = sysReserveAligned(nil, n, heapArenaBytes, &h.arena_reserved) + if v == nil { + return nil, 0 + } + + // Create new hints for extending this region. + hint := (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr, hint.down = uintptr(v), true + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + hint = (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr = uintptr(v) + size + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + } + + if v := uintptr(v); v+size < v || v+size >= memLimit-1 { + // This should be impossible on most architectures, + // but it would be really confusing to debug. + print("runtime: memory allocated by OS [", hex(v), ", ", hex(v+size), ") exceeds address space limit (", hex(int64(memLimit)), ")\n") + throw("memory reservation exceeds address space limit") + } + + if uintptr(v)&(heapArenaBytes-1) != 0 { + throw("misrounded allocation in sysAlloc") + } + + // Back the reservation. + sysMap(v, size, h.arena_reserved, &memstats.heap_sys) + +mapped: + // Create arena metadata. + for ri := uintptr(v) / heapArenaBytes; ri < (uintptr(v)+size)/heapArenaBytes; ri++ { + if h.arenas[ri] != nil { + throw("arena already initialized") + } + var r *heapArena + r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + throw("out of memory allocating heap arena metadata") } } + + // Store atomically just in case an object from the + // new heap arena becomes visible before the heap lock + // is released (which shouldn't happen, but there's + // little downside to this). + atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r)) } - if n <= h.arena_end-h.arena_alloc { - // Keep taking from our reservation. - p := h.arena_alloc - sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys) - h.arena_alloc += n - if h.arena_alloc > h.arena_used { - h.setArenaUsed(h.arena_alloc, true) + // Tell the race detector about the new heap memory. + if raceenabled { + racemapshadow(v, size) + } + + return +} + +// sysReserveAligned is like sysReserve, but the returned pointer is +// aligned to align bytes. It may reserve either n or n+align bytes, +// so it returns the size that was reserved. +func sysReserveAligned(v unsafe.Pointer, size, align uintptr, reserved *bool) (unsafe.Pointer, uintptr) { + // Since the alignment is rather large in uses of this + // function, we're not likely to get it by chance, so we ask + // for a larger region and remove the parts we don't need. + retries := 0 +retry: + p := uintptr(sysReserve(v, size+align, reserved)) + switch { + case p == 0: + return nil, 0 + case p&(align-1) == 0: + // We got lucky and got an aligned region, so we can + // use the whole thing. + return unsafe.Pointer(p), size + align + case GOOS == "windows": + // On Windows we can't release pieces of a + // reservation, so we release the whole thing and + // re-reserve the aligned sub-region. This may race, + // so we may have to try again. + sysFree(unsafe.Pointer(p), size+align, nil) + p = round(p, align) + p2 := sysReserve(unsafe.Pointer(p), size, reserved) + if p != uintptr(p2) { + // Must have raced. Try again. + sysFree(p2, size, nil) + if retries++; retries == 100 { + throw("failed to allocate aligned heap memory; too many retries") + } + goto retry } - - if p&(_PageSize-1) != 0 { - throw("misrounded allocation in MHeap_SysAlloc") + // Success. + return p2, size + default: + // Trim off the unaligned parts. + pAligned := round(p, align) + sysFree(unsafe.Pointer(p), pAligned-p, nil) + end := pAligned + size + endLen := (p + size + align) - end + if endLen > 0 { + sysFree(unsafe.Pointer(end), endLen, nil) } - return unsafe.Pointer(p) + return unsafe.Pointer(pAligned), size } - -reservationFailed: - // If using 64-bit, our reservation is all we have. - if sys.PtrSize != 4 { - return nil - } - - // On 32-bit, once the reservation is gone we can - // try to get memory at a location chosen by the OS. - p_size := round(n, _PageSize) + _PageSize - p := uintptr(sysAlloc(p_size, &memstats.heap_sys)) - if p == 0 { - return nil - } - - if p < h.arena_start || p+p_size-h.arena_start > _MaxMem { - // This shouldn't be possible because _MaxMem is the - // whole address space on 32-bit. - top := uint64(h.arena_start) + _MaxMem - print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n") - sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys) - return nil - } - - p += -p & (_PageSize - 1) - if p+n > h.arena_used { - h.setArenaUsed(p+n, true) - } - - if p&(_PageSize-1) != 0 { - throw("misrounded allocation in MHeap_SysAlloc") - } - return unsafe.Pointer(p) } // base address for all 0-byte allocations @@ -1046,6 +1073,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { return p } +// linearAlloc is a simple linear allocator that pre-reserves a region +// of memory and then maps that region as needed. The caller is +// responsible for locking. +type linearAlloc struct { + next uintptr // next free byte + mapped uintptr // one byte past end of mapped space + end uintptr // end of reserved space +} + +func (l *linearAlloc) init(base, size uintptr) { + l.next, l.mapped = base, base + l.end = base + size +} + +func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { + p := round(l.next, align) + if p+size > l.end { + return nil + } + l.next = p + size + if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped { + // We need to map more of the reserved space. + sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, true, sysStat) + l.mapped = pEnd + } + return unsafe.Pointer(p) +} + // notInHeap is off-heap memory allocated by a lower-level allocator // like sysAlloc or persistentAlloc. // diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index a56d9e6925..091fc21199 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -7,8 +7,12 @@ package runtime_test import ( "flag" "fmt" + "internal/testenv" + "os" + "os/exec" "reflect" . "runtime" + "strings" "testing" "time" "unsafe" @@ -152,6 +156,55 @@ func TestTinyAlloc(t *testing.T) { } } +type acLink struct { + x [1 << 20]byte +} + +var arenaCollisionSink []*acLink + +func TestArenaCollision(t *testing.T) { + if GOOS == "nacl" { + t.Skip("nacl can't self-exec a test") + } + // Test that mheap.sysAlloc handles collisions with other + // memory mappings. + if os.Getenv("TEST_ARENA_COLLISION") != "1" { + cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestArenaCollision", "-test.v")) + cmd.Env = append(cmd.Env, "TEST_ARENA_COLLISION=1") + if out, err := cmd.CombinedOutput(); !strings.Contains(string(out), "PASS\n") || err != nil { + t.Fatalf("%s\n(exit status %v)", string(out), err) + } + return + } + disallowed := [][2]uintptr{} + // Drop all but the next 3 hints. 64-bit has a lot of hints, + // so it would take a lot of memory to go through all of them. + KeepNArenaHints(3) + // Consume these 3 hints and force the runtime to find some + // fallback hints. + for i := 0; i < 5; i++ { + // Reserve memory at the next hint so it can't be used + // for the heap. + start, end := MapNextArenaHint() + disallowed = append(disallowed, [2]uintptr{start, end}) + // Allocate until the runtime tries to use the hint we + // just mapped over. + hint := GetNextArenaHint() + for GetNextArenaHint() == hint { + ac := new(acLink) + arenaCollisionSink = append(arenaCollisionSink, ac) + // The allocation must not have fallen into + // one of the reserved regions. + p := uintptr(unsafe.Pointer(ac)) + for _, d := range disallowed { + if d[0] <= p && p < d[1] { + t.Fatalf("allocation %#x in reserved region [%#x, %#x)", p, d[0], d[1]) + } + } + } + } +} + var mallocSink uintptr func BenchmarkMalloc8(b *testing.B) { diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go index c37c82ab67..c7ee2950ea 100644 --- a/src/runtime/mem_windows.go +++ b/src/runtime/mem_windows.go @@ -102,6 +102,7 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { *reserved = true // v is just a hint. // First try at v. + // This will fail if any of [v, v+n) is already reserved. v = unsafe.Pointer(stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_RESERVE, _PAGE_READWRITE)) if v != nil { return v diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 9fafcb7ffd..7c469b1049 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -96,31 +96,13 @@ type mheap struct { nlargefree uint64 // number of frees for large objects (>maxsmallsize) nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) - // range of addresses we might see in the heap - - // The arena_* fields indicate the addresses of the Go heap. - // - // The maximum range of the Go heap is - // [arena_start, arena_start+_MaxMem+1). - // - // The range of the current Go heap is - // [arena_start, arena_used). Parts of this range may not be - // mapped, but the metadata structures are always mapped for - // the full range. - arena_start uintptr - arena_used uintptr // Set with setArenaUsed. - - // The heap is grown using a linear allocator that allocates - // from the block [arena_alloc, arena_end). arena_alloc is - // often, but *not always* equal to arena_used. - arena_alloc uintptr - arena_end uintptr - // arena_reserved indicates that the memory [arena_alloc, // arena_end) is reserved (e.g., mapped PROT_NONE). If this is // false, we have to be careful not to clobber existing // mappings here. If this is true, then we own the mapping // here and *must* clobber it to use it. + // + // TODO(austin): Remove. arena_reserved bool // arenas is the heap arena index. arenas[va/heapArenaBytes] @@ -138,7 +120,22 @@ type mheap struct { // to probe any index. arenas *[memLimit / heapArenaBytes]*heapArena - //_ uint32 // ensure 64-bit alignment of central + // heapArenaAlloc is pre-reserved space for allocating heapArena + // objects. This is only used on 32-bit, where we pre-reserve + // this space to avoid interleaving it with the heap itself. + heapArenaAlloc linearAlloc + + // arenaHints is a list of addresses at which to attempt to + // add more heap arenas. This is initially populated with a + // set of general hint addresses, and grown with the bounds of + // actual heap arena ranges. + arenaHints *arenaHint + + // arena is a pre-reserved space for allocating heap arenas + // (the actual arenas). This is only used on 32-bit. + arena linearAlloc + + _ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. // the padding makes sure that the MCentrals are @@ -156,6 +153,7 @@ type mheap struct { specialfinalizeralloc fixalloc // allocator for specialfinalizer* specialprofilealloc fixalloc // allocator for specialprofile* speciallock mutex // lock for special record allocators. + arenaHintAlloc fixalloc // allocator for arenaHints unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF } @@ -190,6 +188,16 @@ type heapArena struct { spans [pagesPerArena]*mspan } +// arenaHint is a hint for where to grow the heap arenas. See +// mheap_.arenaHints. +// +//go:notinheap +type arenaHint struct { + addr uintptr + down bool + next *arenaHint +} + // An MSpan is a run of pages. // // When a MSpan is in the heap free list, state == MSpanFree @@ -458,8 +466,7 @@ func spanOf(p uintptr) *mspan { } // spanOfUnchecked is equivalent to spanOf, but the caller must ensure -// that p points into the heap (that is, mheap_.arena_start <= p < -// mheap_.arena_used). +// that p points into an allocated heap arena. // // Must be nosplit because it has callers that are nosplit. // @@ -491,6 +498,7 @@ func (h *mheap) init() { h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys) h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys) h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys) + h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys) // Don't zero mspan allocations. Background sweeping can // inspect a span concurrently with allocating it, so it's @@ -511,46 +519,6 @@ func (h *mheap) init() { for i := range h.central { h.central[i].mcentral.init(spanClass(i)) } - - // Map metadata structures. But don't map race detector memory - // since we're not actually growing the arena here (and TSAN - // gets mad if you map 0 bytes). - h.setArenaUsed(h.arena_used, false) -} - -// setArenaUsed extends the usable arena to address arena_used and -// maps auxiliary VM regions for any newly usable arena space. -// -// racemap indicates that this memory should be managed by the race -// detector. racemap should be true unless this is covering a VM hole. -func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) { - // Map auxiliary structures *before* h.arena_used is updated. - // Waiting to update arena_used until after the memory has been mapped - // avoids faults when other threads try access these regions immediately - // after observing the change to arena_used. - - // Allocate heap arena metadata. - for ri := h.arena_used / heapArenaBytes; ri < (arena_used+heapArenaBytes-1)/heapArenaBytes; ri++ { - if h.arenas[ri] != nil { - continue - } - r := (*heapArena)(persistentalloc(unsafe.Sizeof(heapArena{}), sys.PtrSize, &memstats.gc_sys)) - if r == nil { - throw("runtime: out of memory allocating heap arena metadata") - } - // Store atomically just in case an object from the - // new heap arena becomes visible before the heap lock - // is released (which shouldn't happen, but there's - // little downside to this). - atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r)) - } - - // Tell the race detector about the new heap memory. - if racemap && raceenabled { - racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used) - } - - h.arena_used = arena_used } // Sweeps spans in list until reclaims at least npages into heap. @@ -886,32 +854,17 @@ func (h *mheap) allocLarge(npage uintptr) *mspan { // // h must be locked. func (h *mheap) grow(npage uintptr) bool { - // Ask for a big chunk, to reduce the number of mappings - // the operating system needs to track; also amortizes - // the overhead of an operating system mapping. - // Allocate a multiple of 64kB. - npage = round(npage, (64<<10)/_PageSize) ask := npage << _PageShift - if ask < _HeapAllocChunk { - ask = _HeapAllocChunk - } - - v := h.sysAlloc(ask) + v, size := h.sysAlloc(ask) if v == nil { - if ask > npage<<_PageShift { - ask = npage << _PageShift - v = h.sysAlloc(ask) - } - if v == nil { - print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n") - return false - } + print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n") + return false } // Create a fake "in use" span and free it, so that the // right coalescing happens. s := (*mspan)(h.spanalloc.alloc()) - s.init(uintptr(v), ask>>_PageShift) + s.init(uintptr(v), size/pageSize) h.setSpans(s.base(), s.npages, s) atomic.Store(&s.sweepgen, h.sweepgen) s.state = _MSpanInUse diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index c75ca747d0..f67d05414d 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -662,6 +662,9 @@ func purgecachedstats(c *mcache) { // overflow errors. //go:nosplit func mSysStatInc(sysStat *uint64, n uintptr) { + if sysStat == nil { + return + } if sys.BigEndian { atomic.Xadd64(sysStat, int64(n)) return @@ -676,6 +679,9 @@ func mSysStatInc(sysStat *uint64, n uintptr) { // mSysStatInc apply. //go:nosplit func mSysStatDec(sysStat *uint64, n uintptr) { + if sysStat == nil { + return + } if sys.BigEndian { atomic.Xadd64(sysStat, -int64(n)) return diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 9ed6b1d774..029bff5af4 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -144,7 +144,7 @@ var stackpoolmu mutex // Global pool of large stack spans. var stackLarge struct { lock mutex - free [_MHeapMap_Bits]mSpanList // free lists by log_2(s.npages) + free [memLimitBits - pageShift]mSpanList // free lists by log_2(s.npages) } func stackinit() { diff --git a/test/chancap.go b/test/chancap.go index b08478a13c..9675e38bdb 100644 --- a/test/chancap.go +++ b/test/chancap.go @@ -42,11 +42,10 @@ func main() { shouldPanic("makechan: size out of range", func() { _ = make(T, n) }) shouldPanic("makechan: size out of range", func() { _ = make(T, int64(n)) }) if ptrSize == 8 { - n = 1 << 20 - n <<= 20 - shouldPanic("makechan: size out of range", func() { _ = make(T, n) }) - n <<= 20 - shouldPanic("makechan: size out of range", func() { _ = make(T, n) }) + var n2 int64 = 1 << 50 + shouldPanic("makechan: size out of range", func() { _ = make(T, int(n2)) }) + n2 = 1<<63 - 1 + shouldPanic("makechan: size out of range", func() { _ = make(T, int(n2)) }) } else { n = 1<<31 - 1 shouldPanic("makechan: size out of range", func() { _ = make(T, n) }) diff --git a/test/fixedbugs/bug273.go b/test/fixedbugs/bug273.go index c04f2116c5..7305c6063c 100644 --- a/test/fixedbugs/bug273.go +++ b/test/fixedbugs/bug273.go @@ -8,13 +8,15 @@ package main +import "unsafe" + var bug = false var minus1 = -1 var five = 5 -var big int64 = 10 | 1<<32 +var big int64 = 10 | 1<<40 -type block [1<<19]byte +type block [1 << 19]byte var g1 []block @@ -48,9 +50,10 @@ func bigcap() { g1 = make([]block, 10, big) } -type cblock [1<<16-1]byte +type cblock [1<<16 - 1]byte var g4 chan cblock + func badchancap() { g4 = make(chan cblock, minus1) } @@ -60,7 +63,8 @@ func bigchancap() { } func overflowchan() { - g4 = make(chan cblock, 1<<30) + const ptrSize = unsafe.Sizeof(uintptr(0)) + g4 = make(chan cblock, 1<<(30*(ptrSize/4))) } func main() { diff --git a/test/fixedbugs/issue4085b.go b/test/fixedbugs/issue4085b.go index b91bbd748a..db9a15894b 100644 --- a/test/fixedbugs/issue4085b.go +++ b/test/fixedbugs/issue4085b.go @@ -21,13 +21,12 @@ func main() { shouldPanic("cap out of range", func() { _ = make(T, 0, int64(n)) }) var t *byte if unsafe.Sizeof(t) == 8 { - n = 1 << 20 - n <<= 20 - shouldPanic("len out of range", func() { _ = make(T, n) }) - shouldPanic("cap out of range", func() { _ = make(T, 0, n) }) - n <<= 20 - shouldPanic("len out of range", func() { _ = make(T, n) }) - shouldPanic("cap out of range", func() { _ = make(T, 0, n) }) + var n2 int64 = 1 << 50 + shouldPanic("len out of range", func() { _ = make(T, int(n2)) }) + shouldPanic("cap out of range", func() { _ = make(T, 0, int(n2)) }) + n2 = 1<<63 - 1 + shouldPanic("len out of range", func() { _ = make(T, int(n2)) }) + shouldPanic("cap out of range", func() { _ = make(T, 0, int(n2)) }) } else { n = 1<<31 - 1 shouldPanic("len out of range", func() { _ = make(T, n) })