mirror of
https://github.com/golang/go.git
synced 2025-05-18 22:04:38 +00:00
runtime: traceAcquire and traceRelease across all P steals
Currently there are a few places where a P can get stolen where the runtime doesn't traceAcquire and traceRelease across the steal itself. What can happen then is the following scenario: - Thread 1 enters a syscall and writes an event about it. - Thread 2 steals Thread 1's P. - Thread 1 exits the syscall and writes one or more events about it. - Tracing ends (trace.gen is set to 0). - Thread 2 checks to see if it should write an event for the P it just stole, sees that tracing is disabled, and doesn't. This results in broken traces, because there's a missing ProcSteal event. The parser always waits for a ProcSteal to advance a GoSyscallEndBlocked event, and in this case, it never comes. Fixes #65181. Change-Id: I437629499bb7669bf7fe2fc6fc4f64c53002916b Reviewed-on: https://go-review.googlesource.com/c/go/+/560235 Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
829f2ce3ba
commit
c9d88ea2aa
@ -4404,8 +4404,8 @@ func entersyscall_gcwait() {
|
||||
pp := gp.m.oldp.ptr()
|
||||
|
||||
lock(&sched.lock)
|
||||
if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
|
||||
trace := traceAcquire()
|
||||
if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
|
||||
if trace.ok() {
|
||||
if goexperiment.ExecTracer2 {
|
||||
// This is a steal in the new tracer. While it's very likely
|
||||
@ -4428,6 +4428,8 @@ func entersyscall_gcwait() {
|
||||
if sched.stopwait--; sched.stopwait == 0 {
|
||||
notewakeup(&sched.stopnote)
|
||||
}
|
||||
} else if trace.ok() {
|
||||
traceRelease(trace)
|
||||
}
|
||||
unlock(&sched.lock)
|
||||
}
|
||||
@ -4605,12 +4607,19 @@ func exitsyscallfast(oldp *p) bool {
|
||||
}
|
||||
|
||||
// Try to re-acquire the last P.
|
||||
trace := traceAcquire()
|
||||
if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
|
||||
// There's a cpu for us, so we can run.
|
||||
wirep(oldp)
|
||||
exitsyscallfast_reacquired()
|
||||
exitsyscallfast_reacquired(trace)
|
||||
if trace.ok() {
|
||||
traceRelease(trace)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if trace.ok() {
|
||||
traceRelease(trace)
|
||||
}
|
||||
|
||||
// Try to get any other idle P.
|
||||
if sched.pidle != 0 {
|
||||
@ -4646,10 +4655,9 @@ func exitsyscallfast(oldp *p) bool {
|
||||
// syscall.
|
||||
//
|
||||
//go:nosplit
|
||||
func exitsyscallfast_reacquired() {
|
||||
func exitsyscallfast_reacquired(trace traceLocker) {
|
||||
gp := getg()
|
||||
if gp.m.syscalltick != gp.m.p.ptr().syscalltick {
|
||||
trace := traceAcquire()
|
||||
if trace.ok() {
|
||||
// The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed).
|
||||
// traceGoSysBlock for this syscall was already emitted,
|
||||
@ -4666,7 +4674,6 @@ func exitsyscallfast_reacquired() {
|
||||
// Denote completion of the current syscall.
|
||||
trace.GoSysExit(true)
|
||||
}
|
||||
traceRelease(trace)
|
||||
})
|
||||
}
|
||||
gp.m.p.ptr().syscalltick++
|
||||
@ -6146,8 +6153,8 @@ func retake(now int64) uint32 {
|
||||
// Otherwise the M from which we retake can exit the syscall,
|
||||
// increment nmidle and report deadlock.
|
||||
incidlelocked(-1)
|
||||
if atomic.Cas(&pp.status, s, _Pidle) {
|
||||
trace := traceAcquire()
|
||||
if atomic.Cas(&pp.status, s, _Pidle) {
|
||||
if trace.ok() {
|
||||
trace.GoSysBlock(pp)
|
||||
trace.ProcSteal(pp, false)
|
||||
@ -6156,6 +6163,8 @@ func retake(now int64) uint32 {
|
||||
n++
|
||||
pp.syscalltick++
|
||||
handoffp(pp)
|
||||
} else if trace.ok() {
|
||||
traceRelease(trace)
|
||||
}
|
||||
incidlelocked(1)
|
||||
lock(&allpLock)
|
||||
|
Loading…
x
Reference in New Issue
Block a user