mirror of
https://github.com/golang/go.git
synced 2025-05-28 19:02:22 +00:00
Currently the runtime marks all new memory as MADV_HUGEPAGE on Linux and manages its hugepage eligibility status. Unfortunately, the default THP behavior on most Linux distros is that MADV_HUGEPAGE blocks while the kernel eagerly reclaims and compacts memory to allocate a hugepage. This direct reclaim and compaction is unbounded, and may result in significant application thread stalls. In really bad cases, this can exceed 100s of ms or even seconds. Really all we want is to undo MADV_NOHUGEPAGE marks and let the default Linux paging behavior take over, but the only way to unmark a region as MADV_NOHUGEPAGE is to also mark it MADV_HUGEPAGE. The overall strategy of trying to keep hugepages for the heap unbroken however is sound. So instead let's use the new shiny MADV_COLLAPSE if it exists. MADV_COLLAPSE makes a best-effort synchronous attempt at collapsing the physical memory backing a memory region into a hugepage. We'll use MADV_COLLAPSE where we would've used MADV_HUGEPAGE, and stop using MADV_NOHUGEPAGE altogether. Because MADV_COLLAPSE is synchronous, it's also important to not re-collapse huge pages if the huge pages are likely part of some large allocation. Although in many cases it's advantageous to back these allocations with hugepages because they're contiguous, eagerly collapsing every hugepage means having to page in at least part of the large allocation. However, because we won't use MADV_NOHUGEPAGE anymore, we'll no longer handle the fact that khugepaged might come in and back some memory we returned to the OS with a hugepage. I've come to the conclusion that this is basically unavoidable without a new madvise flag and that it's just not a good default. If this change lands, advice about Linux huge page settings will be added to the GC guide. Verified that this change doesn't regress Sweet, at least not on my machine with: /sys/kernel/mm/transparent_hugepage/enabled [always or madvise] /sys/kernel/mm/transparent_hugepage/defrag [madvise] /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none [0 or 511] Unfortunately, this workaround means that we only get forced hugepages on Linux 6.1+. Fixes #61718. Change-Id: I7f4a7ba397847de29f800a99f9cb66cb2720a533 Reviewed-on: https://go-review.googlesource.com/c/go/+/516795 Reviewed-by: Austin Clements <austin@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Michael Knyszek <mknyszek@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
193 lines
3.2 KiB
Go
193 lines
3.2 KiB
Go
// Copyright 2016 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package runtime
|
|
|
|
import "unsafe"
|
|
|
|
const (
|
|
_EINTR = 0x4
|
|
_EAGAIN = 0xb
|
|
_ENOMEM = 0xc
|
|
|
|
_PROT_NONE = 0x0
|
|
_PROT_READ = 0x1
|
|
_PROT_WRITE = 0x2
|
|
_PROT_EXEC = 0x4
|
|
|
|
_MAP_ANON = 0x20
|
|
_MAP_PRIVATE = 0x2
|
|
_MAP_FIXED = 0x10
|
|
|
|
_MADV_DONTNEED = 0x4
|
|
_MADV_FREE = 0x8
|
|
_MADV_HUGEPAGE = 0xe
|
|
_MADV_NOHUGEPAGE = 0xf
|
|
_MADV_COLLAPSE = 0x19
|
|
|
|
_SA_RESTART = 0x10000000
|
|
_SA_ONSTACK = 0x8000000
|
|
_SA_SIGINFO = 0x4
|
|
|
|
_SI_KERNEL = 0x80
|
|
_SI_TIMER = -0x2
|
|
|
|
_SIGHUP = 0x1
|
|
_SIGINT = 0x2
|
|
_SIGQUIT = 0x3
|
|
_SIGILL = 0x4
|
|
_SIGTRAP = 0x5
|
|
_SIGABRT = 0x6
|
|
_SIGBUS = 0x7
|
|
_SIGFPE = 0x8
|
|
_SIGKILL = 0x9
|
|
_SIGUSR1 = 0xa
|
|
_SIGSEGV = 0xb
|
|
_SIGUSR2 = 0xc
|
|
_SIGPIPE = 0xd
|
|
_SIGALRM = 0xe
|
|
_SIGSTKFLT = 0x10
|
|
_SIGCHLD = 0x11
|
|
_SIGCONT = 0x12
|
|
_SIGSTOP = 0x13
|
|
_SIGTSTP = 0x14
|
|
_SIGTTIN = 0x15
|
|
_SIGTTOU = 0x16
|
|
_SIGURG = 0x17
|
|
_SIGXCPU = 0x18
|
|
_SIGXFSZ = 0x19
|
|
_SIGVTALRM = 0x1a
|
|
_SIGPROF = 0x1b
|
|
_SIGWINCH = 0x1c
|
|
_SIGIO = 0x1d
|
|
_SIGPWR = 0x1e
|
|
_SIGSYS = 0x1f
|
|
|
|
_SIGRTMIN = 0x20
|
|
|
|
_FPE_INTDIV = 0x1
|
|
_FPE_INTOVF = 0x2
|
|
_FPE_FLTDIV = 0x3
|
|
_FPE_FLTOVF = 0x4
|
|
_FPE_FLTUND = 0x5
|
|
_FPE_FLTRES = 0x6
|
|
_FPE_FLTINV = 0x7
|
|
_FPE_FLTSUB = 0x8
|
|
|
|
_BUS_ADRALN = 0x1
|
|
_BUS_ADRERR = 0x2
|
|
_BUS_OBJERR = 0x3
|
|
|
|
_SEGV_MAPERR = 0x1
|
|
_SEGV_ACCERR = 0x2
|
|
|
|
_ITIMER_REAL = 0x0
|
|
_ITIMER_VIRTUAL = 0x1
|
|
_ITIMER_PROF = 0x2
|
|
|
|
_CLOCK_THREAD_CPUTIME_ID = 0x3
|
|
|
|
_SIGEV_THREAD_ID = 0x4
|
|
)
|
|
|
|
type timespec struct {
|
|
tv_sec int64
|
|
tv_nsec int64
|
|
}
|
|
|
|
//go:nosplit
|
|
func (ts *timespec) setNsec(ns int64) {
|
|
ts.tv_sec = ns / 1e9
|
|
ts.tv_nsec = ns % 1e9
|
|
}
|
|
|
|
type timeval struct {
|
|
tv_sec int64
|
|
tv_usec int64
|
|
}
|
|
|
|
func (tv *timeval) set_usec(x int32) {
|
|
tv.tv_usec = int64(x)
|
|
}
|
|
|
|
type sigactiont struct {
|
|
sa_handler uintptr
|
|
sa_flags uint64
|
|
sa_restorer uintptr
|
|
sa_mask uint64
|
|
}
|
|
|
|
type siginfoFields struct {
|
|
si_signo int32
|
|
si_errno int32
|
|
si_code int32
|
|
// below here is a union; si_addr is the only field we use
|
|
si_addr uint64
|
|
}
|
|
|
|
type siginfo struct {
|
|
siginfoFields
|
|
|
|
// Pad struct to the max size in the kernel.
|
|
_ [_si_max_size - unsafe.Sizeof(siginfoFields{})]byte
|
|
}
|
|
|
|
type itimerspec struct {
|
|
it_interval timespec
|
|
it_value timespec
|
|
}
|
|
|
|
type itimerval struct {
|
|
it_interval timeval
|
|
it_value timeval
|
|
}
|
|
|
|
type sigeventFields struct {
|
|
value uintptr
|
|
signo int32
|
|
notify int32
|
|
// below here is a union; sigev_notify_thread_id is the only field we use
|
|
sigev_notify_thread_id int32
|
|
}
|
|
|
|
type sigevent struct {
|
|
sigeventFields
|
|
|
|
// Pad struct to the max size in the kernel.
|
|
_ [_sigev_max_size - unsafe.Sizeof(sigeventFields{})]byte
|
|
}
|
|
|
|
const (
|
|
_O_RDONLY = 0x0
|
|
_O_WRONLY = 0x1
|
|
_O_CREAT = 0x40
|
|
_O_TRUNC = 0x200
|
|
_O_NONBLOCK = 0x800
|
|
_O_CLOEXEC = 0x80000
|
|
_SA_RESTORER = 0
|
|
)
|
|
|
|
type stackt struct {
|
|
ss_sp *byte
|
|
ss_flags int32
|
|
ss_size uintptr
|
|
}
|
|
|
|
type sigcontext struct {
|
|
psw_mask uint64
|
|
psw_addr uint64
|
|
gregs [16]uint64
|
|
aregs [16]uint32
|
|
fpc uint32
|
|
fpregs [16]uint64
|
|
}
|
|
|
|
type ucontext struct {
|
|
uc_flags uint64
|
|
uc_link *ucontext
|
|
uc_stack stackt
|
|
uc_mcontext sigcontext
|
|
uc_sigmask uint64
|
|
}
|