diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go index bfe6b73015..294a52f976 100644 --- a/src/internal/trace/parser.go +++ b/src/internal/trace/parser.go @@ -20,12 +20,13 @@ import ( type Event struct { Off int // offset in input file (for debugging and error reporting) Type byte // one of Ev* + Seq int64 // sequence number Ts int64 // timestamp in nanoseconds P int // P on which the event happened (can be one of TimerP, NetpollP, SyscallP) G uint64 // G on which the event happened StkID uint64 // unique stack ID Stk []*Frame // stack trace (can be empty) - Args [2]uint64 // event-type-specific arguments + Args [3]uint64 // event-type-specific arguments // linked event (can be nil), depends on event type: // for GCStart: the GCStop // for GCScanStart: the GCScanDone @@ -88,12 +89,12 @@ type rawEvent struct { // It does not care about specific event types and argument meaning. func readTrace(r io.Reader) ([]rawEvent, error) { // Read and validate trace header. - var buf [8]byte + var buf [16]byte off, err := r.Read(buf[:]) - if off != 8 || err != nil { + if off != 16 || err != nil { return nil, fmt.Errorf("failed to read header: read %v, err %v", off, err) } - if bytes.Compare(buf[:], []byte("gotrace\x00")) != 0 { + if bytes.Compare(buf[:], []byte("go 1.5 trace\x00\x00\x00\x00")) != 0 { return nil, fmt.Errorf("not a trace file") } @@ -111,10 +112,10 @@ func readTrace(r io.Reader) ([]rawEvent, error) { } off += n typ := buf[0] << 2 >> 2 - narg := buf[0]>>6 + 1 + narg := buf[0] >> 6 ev := rawEvent{typ: typ, off: off0} - if narg <= 3 { - for i := 0; i < int(narg); i++ { + if narg < 3 { + for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg var v uint64 v, off, err = readVal(r, off) if err != nil { @@ -123,7 +124,7 @@ func readTrace(r io.Reader) ([]rawEvent, error) { ev.args = append(ev.args, v) } } else { - // If narg == 4, the first value is length of the event in bytes. + // If narg == 3, the first value is length of the event in bytes. var v uint64 v, off, err = readVal(r, off) if err != nil { @@ -150,7 +151,7 @@ func readTrace(r io.Reader) ([]rawEvent, error) { // Parse events transforms raw events into events. // It does analyze and verify per-event-type arguments. func parseEvents(rawEvents []rawEvent) (events []*Event, err error) { - var ticksPerSec, lastTs int64 + var ticksPerSec, lastSeq, lastTs int64 var lastG, timerGoid uint64 var lastP int lastGs := make(map[int]uint64) // last goroutine running on P @@ -171,6 +172,7 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) { narg++ } if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine { + narg++ // sequence number narg++ // timestamp } if len(raw.args) != narg { @@ -184,7 +186,8 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) { lastGs[lastP] = lastG lastP = int(raw.args[0]) lastG = lastGs[lastP] - lastTs = int64(raw.args[1]) + lastSeq = int64(raw.args[1]) + lastTs = int64(raw.args[2]) case EvFrequency: ticksPerSec = int64(raw.args[0]) if ticksPerSec <= 0 { @@ -221,13 +224,15 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) { } default: e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG} - e.Ts = lastTs + int64(raw.args[0]) + e.Seq = lastSeq + int64(raw.args[0]) + e.Ts = lastTs + int64(raw.args[1]) + lastSeq = e.Seq lastTs = e.Ts for i := range desc.Args { - e.Args[i] = raw.args[i+1] + e.Args[i] = raw.args[i+2] } if desc.Stack { - e.StkID = raw.args[len(desc.Args)+1] + e.StkID = raw.args[len(desc.Args)+2] } switch raw.typ { case EvGoStart: @@ -241,10 +246,11 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) { EvGoSysBlock: lastG = 0 case EvGoSysExit: - if e.Args[1] != 0 { - // EvGoSysExit emission is delayed until the thread has a P. - // Give it the real timestamp. - e.Ts = int64(e.Args[1]) + // EvGoSysExit emission is delayed until the thread has a P. + // Give it the real sequence number and time stamp. + e.Seq = int64(e.Args[1]) + if e.Args[2] != 0 { + e.Ts = int64(e.Args[2]) } } events = append(events, e) @@ -262,10 +268,8 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) { } } - // Sort by time and translate cpu ticks to real time. - // Use stable sort because adjacent events in a batch can have the same timestamp - // (this was observed on some VMs). - sort.Stable(eventList(events)) + // Sort by sequence number and translate cpu ticks to real time. + sort.Sort(eventList(events)) if ticksPerSec == 0 { err = fmt.Errorf("no EvFrequency event") return @@ -341,6 +345,10 @@ func removeFutile(events []*Event) ([]*Event, error) { return newEvents, nil } +// ErrTimeOrder is returned by Parse when the trace contains +// time stamps that do not respect actual event ordering. +var ErrTimeOrder = fmt.Errorf("time stamps out of order") + // postProcessTrace does inter-event verification and information restoration. // The resulting trace is guaranteed to be consistent // (for example, a P does not run two Gs at the same time, or a G is indeed @@ -555,6 +563,18 @@ func postProcessTrace(events []*Event) error { // TODO(dvyukov): restore stacks for EvGoStart events. // TODO(dvyukov): test that all EvGoStart events has non-nil Link. + // Last, after all the other consistency checks, + // make sure time stamps respect sequence numbers. + // The tests will skip (not fail) the test case if they see this error, + // so check everything else that could possibly be wrong first. + lastTs := int64(0) + for _, ev := range events { + if ev.Ts < lastTs { + return ErrTimeOrder + } + lastTs = ev.Ts + } + return nil } @@ -639,7 +659,7 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) { var n int n, err = r.Read(buf[:]) if err != nil || n != 1 { - return 0, 0, fmt.Errorf("failed to read trace at offset: read %v, error %v", off0, n, err) + return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err) } off++ v |= uint64(buf[0]&0x7f) << (uint(i) * 7) @@ -657,7 +677,7 @@ func (l eventList) Len() int { } func (l eventList) Less(i, j int) bool { - return l[i].Ts < l[j].Ts + return l[i].Seq < l[j].Seq } func (l eventList) Swap(i, j int) { @@ -725,8 +745,8 @@ var EventDescriptions = [EvCount]struct { Args []string }{ EvNone: {"None", false, []string{}}, - EvBatch: {"Batch", false, []string{"p", "ticks"}}, - EvFrequency: {"Frequency", false, []string{"freq"}}, + EvBatch: {"Batch", false, []string{"p", "seq", "ticks"}}, + EvFrequency: {"Frequency", false, []string{"freq", "unused"}}, EvStack: {"Stack", false, []string{"id", "siz"}}, EvGomaxprocs: {"Gomaxprocs", true, []string{"procs"}}, EvProcStart: {"ProcStart", false, []string{"thread"}}, @@ -753,12 +773,12 @@ var EventDescriptions = [EvCount]struct { EvGoBlockCond: {"GoBlockCond", true, []string{}}, EvGoBlockNet: {"GoBlockNet", true, []string{}}, EvGoSysCall: {"GoSysCall", true, []string{}}, - EvGoSysExit: {"GoSysExit", false, []string{"g", "ts"}}, + EvGoSysExit: {"GoSysExit", false, []string{"g", "seq", "ts"}}, EvGoSysBlock: {"GoSysBlock", false, []string{}}, EvGoWaiting: {"GoWaiting", false, []string{"g"}}, EvGoInSyscall: {"GoInSyscall", false, []string{"g"}}, EvHeapAlloc: {"HeapAlloc", false, []string{"mem"}}, EvNextGC: {"NextGC", false, []string{"mem"}}, - EvTimerGoroutine: {"TimerGoroutine", false, []string{"g"}}, + EvTimerGoroutine: {"TimerGoroutine", false, []string{"g", "unused"}}, EvFutileWakeup: {"FutileWakeup", false, []string{}}, } diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go index 7d066e86fe..0eeb3e600e 100644 --- a/src/internal/trace/parser_test.go +++ b/src/internal/trace/parser_test.go @@ -16,6 +16,10 @@ func TestCorruptedInputs(t *testing.T) { "gotrace\x00Q00\x020", "gotrace\x00T00\x020", "gotrace\x00\xc3\x0200", + "go 1.5 trace\x00\x00\x00\x00\x020", + "go 1.5 trace\x00\x00\x00\x00Q00\x020", + "go 1.5 trace\x00\x00\x00\x00T00\x020", + "go 1.5 trace\x00\x00\x00\x00\xc3\x0200", } for _, data := range tests { events, err := Parse(strings.NewReader(data)) diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go index 788f4fd3b4..23beaf537c 100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@ -1348,7 +1348,7 @@ func execute(gp *g, inheritTime bool) { // GoSysExit has to happen when we have a P, but before GoStart. // So we emit it here. if gp.syscallsp != 0 && gp.sysblocktraced { - traceGoSysExit(gp.sysexitticks) + traceGoSysExit(gp.sysexitseq, gp.sysexitticks) } traceGoStart() } @@ -1986,6 +1986,7 @@ func exitsyscall(dummy int32) { } _g_.sysexitticks = 0 + _g_.sysexitseq = 0 if trace.enabled { // Wait till traceGoSysBlock event is emitted. // This ensures consistency of the trace (the goroutine is started after it is blocked). @@ -1996,7 +1997,7 @@ func exitsyscall(dummy int32) { // Tracing code can invoke write barriers that cannot run without a P. // So instead we remember the syscall exit time and emit the event // in execute when we have a P. - _g_.sysexitticks = cputicks() + _g_.sysexitseq, _g_.sysexitticks = tracestamp() } _g_.m.locks-- @@ -2044,7 +2045,7 @@ func exitsyscallfast() bool { // Denote blocking of the new syscall. traceGoSysBlock(_g_.m.p.ptr()) // Denote completion of the current syscall. - traceGoSysExit(0) + traceGoSysExit(tracestamp()) }) } _g_.m.p.ptr().syscalltick++ @@ -2068,7 +2069,7 @@ func exitsyscallfast() bool { osyield() } } - traceGoSysExit(0) + traceGoSysExit(tracestamp()) } }) if ok { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index dc600ae578..a1c790fa85 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -235,16 +235,17 @@ type g struct { waitsince int64 // approx time when the g become blocked waitreason string // if status==Gwaiting schedlink guintptr - preempt bool // preemption signal, duplicates stackguard0 = stackpreempt - paniconfault bool // panic (instead of crash) on unexpected fault address - preemptscan bool // preempted g does scan for gc - gcscandone bool // g has scanned stack; protected by _Gscan bit in status - gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan - throwsplit bool // must not split stack - raceignore int8 // ignore race detection events - sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine - sysexitticks int64 // cputicks when syscall has returned (for tracing) - m *m // for debuggers, but offset not hard-coded + preempt bool // preemption signal, duplicates stackguard0 = stackpreempt + paniconfault bool // panic (instead of crash) on unexpected fault address + preemptscan bool // preempted g does scan for gc + gcscandone bool // g has scanned stack; protected by _Gscan bit in status + gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan + throwsplit bool // must not split stack + raceignore int8 // ignore race detection events + sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine + sysexitticks int64 // cputicks when syscall has returned (for tracing) + sysexitseq uint64 // trace seq when syscall has returned (for tracing) + m *m // for debuggers, but offset not hard-coded lockedm *m sig uint32 writebuf []byte diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 8cd95893a0..c81846221a 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -95,6 +95,7 @@ var trace struct { headerWritten bool // whether ReadTrace has emitted trace header footerWritten bool // whether ReadTrace has emitted trace footer shutdownSema uint32 // used to wait for ReadTrace completion + seqStart uint64 // sequence number when tracing was started ticksStart int64 // cputicks when tracing was started ticksEnd int64 // cputicks when tracing was stopped timeStart int64 // nanotime when tracing was started @@ -110,9 +111,31 @@ var trace struct { buf *traceBuf // global trace buffer, used when running without a p } +var traceseq uint64 // global trace sequence number + +// tracestamp returns a consistent sequence number, time stamp pair +// for use in a trace. We need to make sure that time stamp ordering +// (assuming synchronized CPUs) and sequence ordering match. +// To do that, we increment traceseq, grab ticks, and increment traceseq again. +// We treat odd traceseq as a sign that another thread is in the middle +// of the sequence and spin until it is done. +// Not splitting stack to avoid preemption, just in case the call sites +// that used to call xadd64 and cputicks are sensitive to that. +//go:nosplit +func tracestamp() (seq uint64, ts int64) { + seq = atomicload64(&traceseq) + for seq&1 != 0 || !cas64(&traceseq, seq, seq+1) { + seq = atomicload64(&traceseq) + } + ts = cputicks() + atomicstore64(&traceseq, seq+2) + return seq >> 1, ts +} + // traceBufHeader is per-P tracing buffer. type traceBufHeader struct { link *traceBuf // in trace.empty/full + lastSeq uint64 // sequence number of last event lastTicks uint64 // when we wrote the last event buf []byte // trace data, always points to traceBuf.arr stk [traceStackSize]uintptr // scratch buffer for traceback @@ -147,7 +170,7 @@ func StartTrace() error { return errorString("tracing is already enabled") } - trace.ticksStart = cputicks() + trace.seqStart, trace.ticksStart = tracestamp() trace.timeStart = nanotime() trace.headerWritten = false trace.footerWritten = false @@ -308,7 +331,7 @@ func ReadTrace() []byte { trace.headerWritten = true trace.lockOwner = nil unlock(&trace.lock) - return []byte("gotrace\x00") + return []byte("go 1.5 trace\x00\x00\x00\x00") } // Wait for new data. if trace.fullHead == nil && !trace.shutdown { @@ -334,9 +357,11 @@ func ReadTrace() []byte { var data []byte data = append(data, traceEvFrequency|0<= 0 { @@ -469,6 +499,7 @@ func traceEvent(ev byte, skip int, args ...uint64) { data = append(data, 0) lenp = &data[len(data)-1] } + data = traceAppend(data, seqDiff) data = traceAppend(data, tickDiff) for _, a := range args { data = traceAppend(data, a) @@ -800,12 +831,12 @@ func traceGoSysCall() { traceEvent(traceEvGoSysCall, 4) } -func traceGoSysExit(ts int64) { - if ts != 0 && ts < trace.ticksStart { +func traceGoSysExit(seq uint64, ts int64) { + if int64(seq)-int64(trace.seqStart) < 0 { // The timestamp was obtained during a previous tracing session, ignore. return } - traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), uint64(ts)/traceTickDiv) + traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv) } func traceGoSysBlock(pp *p) { diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go index 1fca454ed0..58139ba144 100644 --- a/src/runtime/trace/trace_stack_test.go +++ b/src/runtime/trace/trace_stack_test.go @@ -125,7 +125,7 @@ func TestTraceSymbolize(t *testing.T) { <-pipeReadDone Stop() - events, _, err := parseTrace(buf) + events, _, err := parseTrace(t, buf) if err != nil { t.Fatalf("failed to parse trace: %v", err) } diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go index e987564d8b..76d3d44ae7 100644 --- a/src/runtime/trace/trace_test.go +++ b/src/runtime/trace/trace_test.go @@ -75,13 +75,19 @@ func TestTrace(t *testing.T) { } Stop() _, err := trace.Parse(buf) + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } if err != nil { t.Fatalf("failed to parse trace: %v", err) } } -func parseTrace(r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) { +func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) { events, err := trace.Parse(r) + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } if err != nil { return nil, nil, err } @@ -221,7 +227,7 @@ func TestTraceStress(t *testing.T) { runtime.GOMAXPROCS(procs) Stop() - _, _, err = parseTrace(buf) + _, _, err = parseTrace(t, buf) if err != nil { t.Fatalf("failed to parse trace: %v", err) } @@ -361,7 +367,7 @@ func TestTraceStressStartStop(t *testing.T) { } time.Sleep(time.Millisecond) Stop() - if _, _, err := parseTrace(buf); err != nil { + if _, _, err := parseTrace(t, buf); err != nil { t.Fatalf("failed to parse trace: %v", err) } } @@ -428,7 +434,7 @@ func TestTraceFutileWakeup(t *testing.T) { done.Wait() Stop() - events, _, err := parseTrace(buf) + events, _, err := parseTrace(t, buf) if err != nil { t.Fatalf("failed to parse trace: %v", err) }