From a0fc306023d77e5605203c14ca92f368bdbce3ae Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 13 May 2015 17:08:16 -0400 Subject: [PATCH] runtime: eliminate runqvictims and a copy from runqsteal Currently, runqsteal steals Gs from another P into an intermediate buffer and then copies those Gs into the current P's run queue. This intermediate buffer itself was moved from the stack to the P in commit c4fe503 to eliminate the cost of zeroing it on every steal. This commit follows up c4fe503 by stealing directly into the current P's run queue, which eliminates the copy and the need for the intermediate buffer. The update to the tail pointer is only committed once the entire steal operation has succeeded, so the semantics of stealing do not change. Change-Id: Icdd7a0eb82668980bf42c0154b51eef6419fdd51 Reviewed-on: https://go-review.googlesource.com/9998 Reviewed-by: Russ Cox Run-TryBot: Austin Clements --- src/runtime/proc1.go | 21 ++++++++++----------- src/runtime/runtime2.go | 7 +++---- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go index 8aeacee747..4ce756b692 100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@ -3460,10 +3460,11 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) { } } -// Grabs a batch of goroutines from local runnable queue. -// batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines. +// Grabs a batch of goroutines from _p_'s runnable queue into batch. +// Batch is a ring buffer starting at batchHead. +// Returns number of grabbed goroutines. // Can be executed by any P. -func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { +func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 { for { h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer @@ -3484,7 +3485,7 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { if !_p_.runnext.cas(next, 0) { continue } - batch[0] = next.ptr() + batch[batchHead%uint32(len(batch))] = next.ptr() return 1 } } @@ -3494,7 +3495,8 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { continue } for i := uint32(0); i < n; i++ { - batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))] + g := _p_.runq[(h+i)%uint32(len(_p_.runq))] + batch[(batchHead+i)%uint32(len(batch))] = g } if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume return n @@ -3506,23 +3508,20 @@ func runqgrab(_p_ *p, batch []*g, stealRunNextG bool) uint32 { // and put onto local runnable queue of p. // Returns one of the stolen elements (or nil if failed). func runqsteal(_p_, p2 *p, stealRunNextG bool) *g { - n := runqgrab(p2, _p_.runqvictims[:], stealRunNextG) + t := _p_.runqtail + n := runqgrab(p2, &_p_.runq, t, stealRunNextG) if n == 0 { return nil } n-- - gp := _p_.runqvictims[n] + gp := _p_.runq[(t+n)%uint32(len(_p_.runq))] if n == 0 { return gp } h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers - t := _p_.runqtail if t-h+n >= uint32(len(_p_.runq)) { throw("runqsteal: runq overflow") } - for i := uint32(0); i < n; i++ { - _p_.runq[(t+i)%uint32(len(_p_.runq))] = _p_.runqvictims[i] - } atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption return gp } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ae93bb8dcb..8dfece5845 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -353,10 +353,9 @@ type p struct { goidcacheend uint64 // Queue of runnable goroutines. Accessed without lock. - runqhead uint32 - runqtail uint32 - runq [256]*g - runqvictims [128]*g // Used to stage victims from another p's runq + runqhead uint32 + runqtail uint32 + runq [256]*g // runnext, if non-nil, is a runnable G that was ready'd by // the current G and should be run next instead of what's in // runq if there's time remaining in the running G's time