From 0906d648aa173f65aaafd6296d43247b426f05f3 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sun, 26 Aug 2018 21:33:26 -0400
Subject: [PATCH] runtime: eliminate gchelper mechanism

Now that we do no mark work during mark termination, we no longer need
the gchelper mechanism.

Updates #26903.
Updates #17503.

Change-Id: Ie94e5c0f918cfa047e88cae1028fece106955c1b
Reviewed-on: https://go-review.googlesource.com/c/134785
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/mgc.go      | 68 ++-----------------------------
 src/runtime/mgcmark.go  |  4 --
 src/runtime/proc.go     | 89 ++---------------------------------------
 src/runtime/runtime2.go |  1 -
 src/runtime/stack.go    |  4 +-
 5 files changed, 9 insertions(+), 157 deletions(-)

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 253b2df9e4..9dfee5a4dc 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -938,11 +938,10 @@ var work struct {
 	markrootNext uint32 // next markroot job
 	markrootJobs uint32 // number of markroot jobs
 
-	nproc   uint32
-	tstart  int64
-	nwait   uint32
-	ndone   uint32
-	alldone note
+	nproc  uint32
+	tstart int64
+	nwait  uint32
+	ndone  uint32
 
 	// Number of roots of various root types. Set by gcMarkRootPrepare.
 	nFlushCacheRoots                               int
@@ -1898,30 +1897,12 @@ func gcMark(start_time int64) {
 	}
 	work.tstart = start_time
 
-	work.nwait = 0
-	work.ndone = 0
-	work.nproc = uint32(gcprocs())
-
 	// Check that there's no marking work remaining.
 	if work.full != 0 || work.markrootNext < work.markrootJobs {
 		print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n")
 		panic("non-empty mark queue after concurrent mark")
 	}
 
-	// Clear root marking queue.
-	work.markrootNext = 0
-	work.markrootJobs = 0
-
-	if work.nproc > 1 {
-		noteclear(&work.alldone)
-		helpgc(int32(work.nproc))
-	}
-
-	gchelperstart()
-
-	gcw := &getg().m.p.ptr().gcw
-	gcDrain(gcw, 0)
-
 	if debug.gccheckmark > 0 {
 		// This is expensive when there's a large number of
 		// Gs, so only do it if checkmark is also enabled.
@@ -1931,10 +1912,6 @@ func gcMark(start_time int64) {
 		throw("work.full != 0")
 	}
 
-	if work.nproc > 1 {
-		notesleep(&work.alldone)
-	}
-
 	// Clear out buffers and double-check that all gcWork caches
 	// are empty. This should be ensured by gcMarkDone before we
 	// enter mark termination.
@@ -2094,43 +2071,6 @@ func clearpools() {
 	unlock(&sched.deferlock)
 }
 
-// gchelper runs mark termination tasks on Ps other than the P
-// coordinating mark termination.
-//
-// The caller is responsible for ensuring that this has a P to run on,
-// even though it's running during STW. Because of this, it's allowed
-// to have write barriers.
-//
-//go:yeswritebarrierrec
-func gchelper() {
-	_g_ := getg()
-	_g_.m.traceback = 2
-	gchelperstart()
-
-	// Parallel mark over GC roots and heap
-	if gcphase == _GCmarktermination {
-		gcw := &_g_.m.p.ptr().gcw
-		gcDrain(gcw, 0)
-	}
-
-	nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone
-	if atomic.Xadd(&work.ndone, +1) == nproc-1 {
-		notewakeup(&work.alldone)
-	}
-	_g_.m.traceback = 0
-}
-
-func gchelperstart() {
-	_g_ := getg()
-
-	if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc {
-		throw("gchelperstart: bad m->helpgc")
-	}
-	if _g_ != _g_.m.g0 {
-		throw("gchelper not running on g0 stack")
-	}
-}
-
 // Timing
 
 // itoaDiv formats val/(10**dec) into buf.
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 0f220dd1b9..34e9776d27 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -685,10 +685,6 @@ func scanstack(gp *g, gcw *gcWork) {
 	if gp == getg() {
 		throw("can't scan our own stack")
 	}
-	mp := gp.m
-	if mp != nil && mp.helpgc != 0 {
-		throw("can't scan gchelper stack")
-	}
 
 	// Shrink the stack if not much of it is being used.
 	shrinkstack(gp)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 910918f4b4..db6f908e8c 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -663,59 +663,6 @@ func ready(gp *g, traceskip int, next bool) {
 	}
 }
 
-func gcprocs() int32 {
-	// Figure out how many CPUs to use during GC.
-	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-	lock(&sched.lock)
-	n := gomaxprocs
-	if n > ncpu {
-		n = ncpu
-	}
-	if n > _MaxGcproc {
-		n = _MaxGcproc
-	}
-	if n > sched.nmidle+1 { // one M is currently running
-		n = sched.nmidle + 1
-	}
-	unlock(&sched.lock)
-	return n
-}
-
-func needaddgcproc() bool {
-	lock(&sched.lock)
-	n := gomaxprocs
-	if n > ncpu {
-		n = ncpu
-	}
-	if n > _MaxGcproc {
-		n = _MaxGcproc
-	}
-	n -= sched.nmidle + 1 // one M is currently running
-	unlock(&sched.lock)
-	return n > 0
-}
-
-func helpgc(nproc int32) {
-	_g_ := getg()
-	lock(&sched.lock)
-	pos := 0
-	for n := int32(1); n < nproc; n++ { // one M is currently running
-		if allp[pos].mcache == _g_.m.mcache {
-			pos++
-		}
-		mp := mget()
-		if mp == nil {
-			throw("gcprocs inconsistency")
-		}
-		mp.helpgc = n
-		mp.p.set(allp[pos])
-		mp.mcache = allp[pos].mcache
-		pos++
-		notewakeup(&mp.park)
-	}
-	unlock(&sched.lock)
-}
-
 // freezeStopWait is a large value that freezetheworld sets
 // sched.stopwait to in order to request that all Gs permanently stop.
 const freezeStopWait = 0x7fffffff
@@ -1132,11 +1079,6 @@ func stopTheWorldWithSema() {
 	}
 }
 
-func mhelpgc() {
-	_g_ := getg()
-	_g_.m.helpgc = -1
-}
-
 func startTheWorldWithSema(emitTraceEvent bool) int64 {
 	_g_ := getg()
 
@@ -1145,7 +1087,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 		list := netpoll(false) // non-blocking
 		injectglist(&list)
 	}
-	add := needaddgcproc()
 	lock(&sched.lock)
 
 	procs := gomaxprocs
@@ -1175,7 +1116,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 		} else {
 			// Start M to run P.  Do not start another M below.
 			newm(nil, p)
-			add = false
 		}
 	}
 
@@ -1192,16 +1132,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 		wakep()
 	}
 
-	if add {
-		// If GC could have used another helper proc, start one now,
-		// in the hope that it will be available next time.
-		// It would have been even better to start it before the collection,
-		// but doing so requires allocating memory, so it's tricky to
-		// coordinate. This lazy approach works out in practice:
-		// we don't mind if the first couple gc rounds don't have quite
-		// the maximum number of procs.
-		newm(mhelpgc, nil)
-	}
 	_g_.m.locks--
 	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
 		_g_.stackguard0 = stackPreempt
@@ -1276,10 +1206,7 @@ func mstart1() {
 		fn()
 	}
 
-	if _g_.m.helpgc != 0 {
-		_g_.m.helpgc = 0
-		stopm()
-	} else if _g_.m != &m0 {
+	if _g_.m != &m0 {
 		acquirep(_g_.m.nextp.ptr())
 		_g_.m.nextp = 0
 	}
@@ -2003,21 +1930,11 @@ func stopm() {
 		throw("stopm spinning")
 	}
 
-retry:
 	lock(&sched.lock)
 	mput(_g_.m)
 	unlock(&sched.lock)
 	notesleep(&_g_.m.park)
 	noteclear(&_g_.m.park)
-	if _g_.m.helpgc != 0 {
-		// helpgc() set _g_.m.p and _g_.m.mcache, so we have a P.
-		gchelper()
-		// Undo the effects of helpgc().
-		_g_.m.helpgc = 0
-		_g_.m.mcache = nil
-		_g_.m.p = 0
-		goto retry
-	}
 	acquirep(_g_.m.nextp.ptr())
 	_g_.m.nextp = 0
 }
@@ -3857,7 +3774,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
 				pc = funcPC(_ExternalCode) + sys.PCQuantum
 			}
 			stk[0] = pc
-			if mp.preemptoff != "" || mp.helpgc != 0 {
+			if mp.preemptoff != "" {
 				stk[1] = funcPC(_GC) + sys.PCQuantum
 			} else {
 				stk[1] = funcPC(_System) + sys.PCQuantum
@@ -4634,7 +4551,7 @@ func schedtrace(detailed bool) {
 		if lockedg != nil {
 			id3 = lockedg.goid
 		}
-		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
+		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
 	}
 
 	lock(&allglock)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index fbca3d3ba6..2f009abdbb 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -424,7 +424,6 @@ type m struct {
 	locks         int32
 	dying         int32
 	profilehz     int32
-	helpgc        int32
 	spinning      bool // m is out of work and is actively looking for work
 	blocked       bool // m is blocked on a note
 	inwb          bool // m is executing a write barrier
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index fd9aafd15b..582e94e9d0 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -350,7 +350,7 @@ func stackalloc(n uint32) stack {
 		}
 		var x gclinkptr
 		c := thisg.m.mcache
-		if stackNoCache != 0 || c == nil || thisg.m.preemptoff != "" || thisg.m.helpgc != 0 {
+		if stackNoCache != 0 || c == nil || thisg.m.preemptoff != "" {
 			// c == nil can happen in the guts of exitsyscall or
 			// procresize. Just get a stack from the global pool.
 			// Also don't touch stackcache during gc
@@ -445,7 +445,7 @@ func stackfree(stk stack) {
 		}
 		x := gclinkptr(v)
 		c := gp.m.mcache
-		if stackNoCache != 0 || c == nil || gp.m.preemptoff != "" || gp.m.helpgc != 0 {
+		if stackNoCache != 0 || c == nil || gp.m.preemptoff != "" {
 			lock(&stackpoolmu)
 			stackpoolfree(x, order)
 			unlock(&stackpoolmu)