diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 845e25da6e..5ac32fb259 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4173,7 +4173,7 @@ func exitsyscall() { // Tracing code can invoke write barriers that cannot run without a P. // So instead we remember the syscall exit time and emit the event // in execute when we have a P. - gp.trace.sysExitTicks = cputicks() + gp.trace.sysExitTime = traceClockNow() } gp.m.locks-- diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 2fe6d2d13f..29b9886b8a 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -86,9 +86,7 @@ const ( // The suggested increment frequency for PowerPC's time base register is // 512 MHz according to Power ISA v2.07 section 6.2, so we use 16 on ppc64 // and ppc64le. - // Tracing won't work reliably for architectures where cputicks is emulated - // by nanotime, so the value doesn't matter for those architectures. - traceTickDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64) + traceTimeDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64) // Maximum number of PCs in a single stack trace. // Since events contain only stack id rather than whole stack trace, // we can allow quite large values here. @@ -113,10 +111,12 @@ var trace struct { footerWritten bool // whether ReadTrace has emitted trace footer shutdownSema uint32 // used to wait for ReadTrace completion seqStart uint64 // sequence number when tracing was started - ticksStart int64 // cputicks when tracing was started - ticksEnd int64 // cputicks when tracing was stopped - timeStart int64 // nanotime when tracing was started - timeEnd int64 // nanotime when tracing was stopped + startTicks int64 // cputicks when tracing was started + endTicks int64 // cputicks when tracing was stopped + startNanotime int64 // nanotime when tracing was started + endNanotime int64 // nanotime when tracing was stopped + startTime traceTime // traceClockNow when tracing started + endTime traceTime // traceClockNow when tracing stopped seqGC uint64 // GC start/done sequencer reading traceBufPtr // buffer currently handed off to user empty traceBufPtr // stack of empty buffers @@ -163,10 +163,10 @@ var trace struct { // gTraceState is per-G state for the tracer. type gTraceState struct { - sysExitTicks int64 // cputicks when syscall has returned - tracedSyscallEnter bool // syscall or cgo was entered while trace was enabled or StartTrace has emitted EvGoInSyscall about this goroutine - seq uint64 // trace event sequencer - lastP puintptr // last P emitted an event for this goroutine + sysExitTime traceTime // timestamp when syscall has returned + tracedSyscallEnter bool // syscall or cgo was entered while trace was enabled or StartTrace has emitted EvGoInSyscall about this goroutine + seq uint64 // trace event sequencer + lastP puintptr // last P emitted an event for this goroutine } // mTraceState is per-M state for the tracer. @@ -199,10 +199,10 @@ func traceLockInit() { // traceBufHeader is per-P tracing buffer. type traceBufHeader struct { - link traceBufPtr // in trace.empty/full - lastTicks uint64 // when we wrote the last event - pos int // next write offset in arr - stk [traceStackSize]uintptr // scratch buffer for traceback + link traceBufPtr // in trace.empty/full + lastTime traceTime // when we wrote the last event + pos int // next write offset in arr + stk [traceStackSize]uintptr // scratch buffer for traceback } // traceBuf is per-P tracing buffer. @@ -336,12 +336,13 @@ func StartTrace() error { }) traceProcStart() traceGoStart() - // Note: ticksStart needs to be set after we emit traceEvGoInSyscall events. + // Note: startTicks needs to be set after we emit traceEvGoInSyscall events. // If we do it the other way around, it is possible that exitsyscall will - // query sysExitTicks after ticksStart but before traceEvGoInSyscall timestamp. + // query sysExitTime after startTicks but before traceEvGoInSyscall timestamp. // It will lead to a false conclusion that cputicks is broken. - trace.ticksStart = cputicks() - trace.timeStart = nanotime() + trace.startTime = traceClockNow() + trace.startTicks = cputicks() + trace.startNanotime = nanotime() trace.headerWritten = false trace.footerWritten = false @@ -424,10 +425,11 @@ func StopTrace() { } for { - trace.ticksEnd = cputicks() - trace.timeEnd = nanotime() + trace.endTime = traceClockNow() + trace.endTicks = cputicks() + trace.endNanotime = nanotime() // Windows time can tick only every 15ms, wait for at least one tick. - if trace.timeEnd != trace.timeStart { + if trace.endNanotime != trace.startNanotime { break } osyield() @@ -591,8 +593,7 @@ newFull: // Write footer with timer frequency. if !trace.footerWritten { trace.footerWritten = true - // Use float64 because (trace.ticksEnd - trace.ticksStart) * 1e9 can overflow int64. - freq := float64(trace.ticksEnd-trace.ticksStart) * 1e9 / float64(trace.timeEnd-trace.timeStart) / traceTickDiv + freq := (float64(trace.endTicks-trace.startTicks) / traceTimeDiv) / (float64(trace.endNanotime-trace.startNanotime) / 1e9) if freq <= 0 { throw("trace: ReadTrace got invalid frequency") } @@ -768,16 +769,12 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by bufp.set(buf) } - // NOTE: ticks might be same after tick division, although the real cputicks is - // linear growth. - ticks := uint64(cputicks()) / traceTickDiv - tickDiff := ticks - buf.lastTicks - if tickDiff == 0 { - ticks = buf.lastTicks + 1 - tickDiff = 1 + ts := traceClockNow() + if ts <= buf.lastTime { + ts = buf.lastTime + 1 } - - buf.lastTicks = ticks + tsDiff := uint64(ts - buf.lastTime) + buf.lastTime = ts narg := byte(len(args)) if stackID != 0 || skip >= 0 { narg++ @@ -795,7 +792,7 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by buf.varint(0) lenp = &buf.arr[buf.pos-1] } - buf.varint(tickDiff) + buf.varint(tsDiff) for _, a := range args { buf.varint(a) } @@ -827,7 +824,7 @@ func traceCPUSample(gp *g, pp *p, stk []uintptr) { } // Match the clock used in traceEventLocked - now := cputicks() + now := traceClockNow() // The "header" here is the ID of the P that was running the profiled code, // followed by the ID of the goroutine. (For normal CPU profiling, it's // usually the number of samples with the given stack.) Near syscalls, pp @@ -854,7 +851,7 @@ func traceCPUSample(gp *g, pp *p, stk []uintptr) { // Note: we don't pass a tag pointer here (how should profiling tags // interact with the execution tracer?), but if we did we'd need to be // careful about write barriers. See the long comment in profBuf.write. - log.write(nil, now, hdr[:], stk) + log.write(nil, int64(now), hdr[:], stk) } trace.signalLock.Store(0) @@ -917,7 +914,7 @@ func traceReadCPU() { } stackID := trace.stackTab.put(buf.stk[:nstk]) - traceEventLocked(0, nil, 0, bufp, traceEvCPUSample, stackID, 1, timestamp/traceTickDiv, ppid, goid) + traceEventLocked(0, nil, 0, bufp, traceEvCPUSample, stackID, 1, uint64(timestamp), ppid, goid) } } } @@ -1055,14 +1052,14 @@ func traceFlush(buf traceBufPtr, pid int32) traceBufPtr { bufp.pos = 0 // initialize the buffer for a new batch - ticks := uint64(cputicks()) / traceTickDiv - if ticks == bufp.lastTicks { - ticks = bufp.lastTicks + 1 + ts := traceClockNow() + if ts <= bufp.lastTime { + ts = bufp.lastTime + 1 } - bufp.lastTicks = ticks + bufp.lastTime = ts bufp.byte(traceEvBatch | 1<