diff --git a/src/runtime/trace2.go b/src/runtime/trace2.go index 5fd09ed1ea..26e9867f08 100644 --- a/src/runtime/trace2.go +++ b/src/runtime/trace2.go @@ -71,7 +71,8 @@ var trace struct { stringTab [2]traceStringTable // maps strings to unique ids // cpuLogRead accepts CPU profile samples from the signal handler where - // they're generated. It uses a three-word header to hold the IDs of the P, G, + // they're generated. There are two profBufs here: one for gen%2, one for + // 1-gen%2. These profBufs use a three-word header to hold the IDs of the P, G, // and M (respectively) that were active at the time of the sample. Because // profBuf uses a record with all zeros in its header to indicate overflow, // we make sure to make the P field always non-zero: The ID of a real P will @@ -82,9 +83,9 @@ var trace struct { // when sampling g0. // // Initialization and teardown of these fields is protected by traceAdvanceSema. - cpuLogRead *profBuf - signalLock atomic.Uint32 // protects use of the following member, only usable in signal handlers - cpuLogWrite atomic.Pointer[profBuf] // copy of cpuLogRead for use in signal handlers, set without signalLock + cpuLogRead [2]*profBuf + signalLock atomic.Uint32 // protects use of the following member, only usable in signal handlers + cpuLogWrite [2]atomic.Pointer[profBuf] // copy of cpuLogRead for use in signal handlers, set without signalLock cpuSleep *wakeableSleep cpuLogDone <-chan struct{} cpuBuf [2]*traceBuf diff --git a/src/runtime/trace2cpu.go b/src/runtime/trace2cpu.go index a33c0b6b6d..95c62c44b4 100644 --- a/src/runtime/trace2cpu.go +++ b/src/runtime/trace2cpu.go @@ -16,8 +16,9 @@ func traceInitReadCPU() { throw("traceInitReadCPU called with trace enabled") } // Create new profBuf for CPU samples that will be emitted as events. - profBuf := newProfBuf(3, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid, mp.procid] - trace.cpuLogRead = profBuf + // Format: after the timestamp, header is [pp.id, gp.goid, mp.procid]. + trace.cpuLogRead[0] = newProfBuf(3, profBufWordCount, profBufTagCount) + trace.cpuLogRead[1] = newProfBuf(3, profBufWordCount, profBufTagCount) // We must not acquire trace.signalLock outside of a signal handler: a // profiling signal may arrive at any time and try to acquire it, leading to // deadlock. Because we can't use that lock to protect updates to @@ -25,7 +26,8 @@ func traceInitReadCPU() { // writes of the pointer must be atomic. (And although this field is never // the sole pointer to the profBuf value, it's best to allow a write barrier // here.) - trace.cpuLogWrite.Store(profBuf) + trace.cpuLogWrite[0].Store(trace.cpuLogRead[0]) + trace.cpuLogWrite[1].Store(trace.cpuLogRead[1]) } // traceStartReadCPU creates a goroutine to start reading CPU profile @@ -52,7 +54,15 @@ func traceStartReadCPU() { // we would still want to do a goroutine-level sleep in between // reads to avoid frequent wakeups. trace.cpuSleep.sleep(100_000_000) - if !traceReadCPU(trace.cpuLogRead) { + + tl := traceAcquire() + if !tl.ok() { + // Tracing disabled. + break + } + keepGoing := traceReadCPU(tl.gen) + traceRelease(tl) + if !keepGoing { break } } @@ -76,8 +86,10 @@ func traceStopReadCPU() { // // Wake the goroutine so it can observe that their the buffer is // closed an exit. - trace.cpuLogWrite.Store(nil) - trace.cpuLogRead.close() + trace.cpuLogWrite[0].Store(nil) + trace.cpuLogWrite[1].Store(nil) + trace.cpuLogRead[0].close() + trace.cpuLogRead[1].close() trace.cpuSleep.wake() // Wait until the logger goroutine exits. @@ -85,20 +97,25 @@ func traceStopReadCPU() { // Clear state for the next trace. trace.cpuLogDone = nil - trace.cpuLogRead = nil + trace.cpuLogRead[0] = nil + trace.cpuLogRead[1] = nil trace.cpuSleep.close() } -// traceReadCPU attempts to read from the provided profBuf and write +// traceReadCPU attempts to read from the provided profBuf[gen%2] and write // into the trace. Returns true if there might be more to read or false // if the profBuf is closed or the caller should otherwise stop reading. // +// The caller is responsible for ensuring that gen does not change. Either +// the caller must be in a traceAcquire/traceRelease block, or must be calling +// with traceAdvanceSema held. +// // No more than one goroutine may be in traceReadCPU for the same // profBuf at a time. -func traceReadCPU(pb *profBuf) bool { +func traceReadCPU(gen uintptr) bool { var pcBuf [traceStackSize]uintptr - data, tags, eof := pb.read(profBufNonBlocking) + data, tags, eof := trace.cpuLogRead[gen%2].read(profBufNonBlocking) for len(data) > 0 { if len(data) < 4 || data[0] > uint64(len(data)) { break // truncated profile @@ -147,12 +164,7 @@ func traceReadCPU(pb *profBuf) bool { } // Write out a trace event. - tl := traceAcquire() - if !tl.ok() { - // Tracing disabled, exit without continuing. - return false - } - w := unsafeTraceWriter(tl.gen, trace.cpuBuf[tl.gen%2]) + w := unsafeTraceWriter(gen, trace.cpuBuf[gen%2]) // Ensure we have a place to write to. var flushed bool @@ -163,7 +175,7 @@ func traceReadCPU(pb *profBuf) bool { } // Add the stack to the table. - stackID := trace.stackTab[tl.gen%2].put(pcBuf[:nstk]) + stackID := trace.stackTab[gen%2].put(pcBuf[:nstk]) // Write out the CPU sample. w.byte(byte(traceEvCPUSample)) @@ -173,8 +185,7 @@ func traceReadCPU(pb *profBuf) bool { w.varint(goid) w.varint(stackID) - trace.cpuBuf[tl.gen%2] = w.traceBuf - traceRelease(tl) + trace.cpuBuf[gen%2] = w.traceBuf } return !eof } @@ -187,6 +198,10 @@ func traceReadCPU(pb *profBuf) bool { // //go:systemstack func traceCPUFlush(gen uintptr) { + // Read everything out of the last gen's CPU profile buffer. + traceReadCPU(gen) + + // Flush any remaining trace buffers containing CPU samples. if buf := trace.cpuBuf[gen%2]; buf != nil { lock(&trace.lock) traceBufFlush(buf, gen) @@ -197,13 +212,38 @@ func traceCPUFlush(gen uintptr) { // traceCPUSample writes a CPU profile sample stack to the execution tracer's // profiling buffer. It is called from a signal handler, so is limited in what -// it can do. +// it can do. mp must be the thread that is currently stopped in a signal. func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) { if !traceEnabled() { // Tracing is usually turned off; don't spend time acquiring the signal // lock unless it's active. return } + if mp == nil { + // Drop samples that don't have an identifiable thread. We can't render + // this in any useful way anyway. + return + } + + // We're going to conditionally write to one of two buffers based on the + // generation. To make sure we write to the correct one, we need to make + // sure this thread's trace seqlock is held. If it already is, then we're + // in the tracer and we can just take advantage of that. If it isn't, then + // we need to acquire it and read the generation. + locked := false + if mp.trace.seqlock.Load()%2 == 0 { + mp.trace.seqlock.Add(1) + locked = true + } + gen := trace.gen.Load() + if gen == 0 { + // Tracing is disabled, as it turns out. Release the seqlock if necessary + // and exit. + if locked { + mp.trace.seqlock.Add(1) + } + return + } now := traceClockNow() // The "header" here is the ID of the M that was running the profiled code, @@ -231,7 +271,7 @@ func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) { osyield() } - if log := trace.cpuLogWrite.Load(); log != nil { + if log := trace.cpuLogWrite[gen%2].Load(); log != nil { // Note: we don't pass a tag pointer here (how should profiling tags // interact with the execution tracer?), but if we did we'd need to be // careful about write barriers. See the long comment in profBuf.write. @@ -239,4 +279,9 @@ func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) { } trace.signalLock.Store(0) + + // Release the seqlock if we acquired it earlier. + if locked { + mp.trace.seqlock.Add(1) + } } diff --git a/src/runtime/trace2runtime.go b/src/runtime/trace2runtime.go index a9c8d8a590..9638e4b924 100644 --- a/src/runtime/trace2runtime.go +++ b/src/runtime/trace2runtime.go @@ -192,7 +192,12 @@ func traceAcquireEnabled() traceLocker { // Prevent preemption. mp := acquirem() - // Acquire the trace seqlock. + // Acquire the trace seqlock. This prevents traceAdvance from moving forward + // until all Ms are observed to be outside of their seqlock critical section. + // + // Note: The seqlock is mutated here and also in traceCPUSample. If you update + // usage of the seqlock here, make sure to also look at what traceCPUSample is + // doing. seq := mp.trace.seqlock.Add(1) if debugTraceReentrancy && seq%2 != 1 { throw("bad use of trace.seqlock or tracer is reentrant")