diff --git a/src/runtime/proc.go b/src/runtime/proc.go index d386797784..c30ce7a5a3 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1787,11 +1787,12 @@ func findrunnable() (gp *g, inheritTime bool) { // an M. top: + _p_ := _g_.m.p.ptr() if sched.gcwaiting != 0 { gcstopm() goto top } - if _g_.m.p.ptr().runSafePointFn != 0 { + if _p_.runSafePointFn != 0 { runSafePointFn() } if fingwait && fingwake { @@ -1801,14 +1802,14 @@ top: } // local runq - if gp, inheritTime := runqget(_g_.m.p.ptr()); gp != nil { + if gp, inheritTime := runqget(_p_); gp != nil { return gp, inheritTime } // global runq if sched.runqsize != 0 { lock(&sched.lock) - gp := globrunqget(_g_.m.p.ptr(), 0) + gp := globrunqget(_p_, 0) unlock(&sched.lock) if gp != nil { return gp, false @@ -1833,31 +1834,33 @@ top: } } + // Steal work from other P's. + procs := uint32(gomaxprocs) + if atomic.Load(&sched.npidle) == procs-1 { + // Either GOMAXPROCS=1 or everybody, except for us, is idle already. + // New work can appear from returning syscall/cgocall, network or timers. + // Neither of that submits to local run queues, so no point in stealing. + goto stop + } // If number of spinning M's >= number of busy P's, block. // This is necessary to prevent excessive CPU consumption // when GOMAXPROCS>>1 but the program parallelism is low. - if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= uint32(gomaxprocs)-atomic.Load(&sched.npidle) { // TODO: fast atomic + if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) { // TODO: fast atomic goto stop } if !_g_.m.spinning { _g_.m.spinning = true atomic.Xadd(&sched.nmspinning, 1) } - // random steal from other P's - for i := 0; i < int(4*gomaxprocs); i++ { - if sched.gcwaiting != 0 { - goto top - } - _p_ := allp[fastrand1()%uint32(gomaxprocs)] - var gp *g - if _p_ == _g_.m.p.ptr() { - gp, _ = runqget(_p_) - } else { - stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g - gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG) - } - if gp != nil { - return gp, false + for i := 0; i < 4; i++ { + for enum := stealOrder.start(fastrand1()); !enum.done(); enum.next() { + if sched.gcwaiting != 0 { + goto top + } + stealRunNextG := i > 2 // first look for ready queues with more than 1 g + if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil { + return gp, false + } } } @@ -1866,7 +1869,7 @@ stop: // We have nothing to do. If we're in the GC mark phase, can // safely scan and blacken objects, and have work to do, run // idle-time marking rather than give up the P. - if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) { + if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) { _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode gp := _p_.gcBgMarkWorker.ptr() casgstatus(gp, _Gwaiting, _Grunnable) @@ -1878,16 +1881,18 @@ stop: // return P and block lock(&sched.lock) - if sched.gcwaiting != 0 || _g_.m.p.ptr().runSafePointFn != 0 { + if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 { unlock(&sched.lock) goto top } if sched.runqsize != 0 { - gp := globrunqget(_g_.m.p.ptr(), 0) + gp := globrunqget(_p_, 0) unlock(&sched.lock) return gp, false } - _p_ := releasep() + if releasep() != _p_ { + throw("findrunnable: wrong p") + } pidleput(_p_) unlock(&sched.lock) @@ -3265,6 +3270,7 @@ func procresize(nprocs int32) *p { runnablePs = p } } + stealOrder.reset(uint32(nprocs)) var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) return runnablePs @@ -4121,3 +4127,59 @@ func sync_runtime_canSpin(i int) bool { func sync_runtime_doSpin() { procyield(active_spin_cnt) } + +var stealOrder randomOrder + +// randomOrder/randomEnum are helper types for randomized work stealing. +// They allow to enumerate all Ps in different pseudo-random orders without repetitions. +// The algorithm is based on the fact that if we have X such that X and GOMAXPROCS +// are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. +type randomOrder struct { + count uint32 + coprimes []uint32 +} + +type randomEnum struct { + i uint32 + count uint32 + pos uint32 + inc uint32 +} + +func (ord *randomOrder) reset(count uint32) { + ord.count = count + ord.coprimes = ord.coprimes[:0] + for i := uint32(1); i <= count; i++ { + if gcd(i, count) == 1 { + ord.coprimes = append(ord.coprimes, i) + } + } +} + +func (ord *randomOrder) start(i uint32) randomEnum { + return randomEnum{ + count: ord.count, + pos: i % ord.count, + inc: ord.coprimes[i%uint32(len(ord.coprimes))], + } +} + +func (enum *randomEnum) done() bool { + return enum.i == enum.count +} + +func (enum *randomEnum) next() { + enum.i++ + enum.pos = (enum.pos + enum.inc) % enum.count +} + +func (enum *randomEnum) position() uint32 { + return enum.pos +} + +func gcd(a, b uint32) uint32 { + for b != 0 { + a, b = b, a%b + } + return a +} diff --git a/src/runtime/proc_runtime_test.go b/src/runtime/proc_runtime_test.go new file mode 100644 index 0000000000..a7bde2c6df --- /dev/null +++ b/src/runtime/proc_runtime_test.go @@ -0,0 +1,33 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Proc unit tests. In runtime package so can use runtime guts. + +package runtime + +func RunStealOrderTest() { + var ord randomOrder + for procs := 1; procs <= 64; procs++ { + ord.reset(uint32(procs)) + if procs >= 3 && len(ord.coprimes) < 2 { + panic("too few coprimes") + } + for co := 0; co < len(ord.coprimes); co++ { + enum := ord.start(uint32(co)) + checked := make([]bool, procs) + for p := 0; p < procs; p++ { + x := enum.position() + if checked[x] { + println("procs:", procs, "inc:", enum.inc) + panic("duplicate during enumeration") + } + checked[x] = true + enum.next() + } + if !enum.done() { + panic("not done") + } + } + } +} diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index fd12945be0..b1d7f75870 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -689,3 +689,7 @@ func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, thres done <- struct{}{} } } + +func TestStealOrder(t *testing.T) { + runtime.RunStealOrderTest() +}