From 2fbca94db7e14fc1d18162cd203d7afc19b520e8 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 23 Jan 2020 20:38:20 -0800 Subject: [PATCH] runtime: add goroutines returned by poller to local run queue In Go 1.13, when the network poller found a list of ready goroutines, they were added to the global run queue. The timer goroutine would typically sleep in a futex with a timeout, and when the timeout expired the timer goroutine would either be handed off to an idle P or added to the global run queue. The effect was that on a busy system with no idle P's goroutines waiting for timeouts and goroutines waiting for the network would start at the same priority. That changed on tip with the new timer code. Now timer functions are invoked directly from a P, and it happens that the functions used by time.Sleep and time.After and time.Ticker add the newly ready goroutines to the local run queue. When a P looks for work it will prefer goroutines on the local run queue; in fact it will only occasionally look at the global run queue, and even when it does it will just pull one goroutine off. So on a busy system with both active timers and active network connections the system can noticeably prefer to run goroutines waiting for timers rather than goroutines waiting for the network. This CL undoes that change by, when possible, adding goroutines waiting for the network to the local run queue of the P that checked. This doesn't affect network poller checks done by sysmon, but it does affect network poller checks done as each P enters the scheduler. This CL also makes injecting a list into either the local or global run queue more efficient, using bulk operations rather than individual ones. Change-Id: I85a66ad74e4fc3b458256fb7ab395d06f0d2ffac Reviewed-on: https://go-review.googlesource.com/c/go/+/216198 Reviewed-by: Michael Knyszek --- src/runtime/proc.go | 90 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 2174564637..6f143cbe18 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2417,7 +2417,13 @@ func resetspinning() { } } -// Injects the list of runnable G's into the scheduler and clears glist. +// injectglist adds each runnable G on the list to some run queue, +// and clears glist. If there is no current P, they are added to the +// global queue, and up to npidle M's are started to run them. +// Otherwise, for each idle P, this adds a G to the global queue +// and starts an M. Any remaining G's are added to the current P's +// local run queue. +// This may temporarily acquire the scheduler lock. // Can run concurrently with GC. func injectglist(glist *gList) { if glist.empty() { @@ -2428,18 +2434,52 @@ func injectglist(glist *gList) { traceGoUnpark(gp, 0) } } - lock(&sched.lock) - var n int - for n = 0; !glist.empty(); n++ { - gp := glist.pop() + + // Mark all the goroutines as runnable before we put them + // on the run queues. + head := glist.head.ptr() + var tail *g + qsize := 0 + for gp := head; gp != nil; gp = gp.schedlink.ptr() { + tail = gp + qsize++ casgstatus(gp, _Gwaiting, _Grunnable) - globrunqput(gp) + } + + // Turn the gList into a gQueue. + var q gQueue + q.head.set(head) + q.tail.set(tail) + *glist = gList{} + + startIdle := func(n int) { + for ; n != 0 && sched.npidle != 0; n-- { + startm(nil, false) + } + } + + pp := getg().m.p.ptr() + if pp == nil { + lock(&sched.lock) + globrunqputbatch(&q, int32(qsize)) + unlock(&sched.lock) + startIdle(qsize) + return + } + + lock(&sched.lock) + npidle := int(sched.npidle) + var n int + for n = 0; n < npidle && !q.empty(); n++ { + globrunqput(q.pop()) } unlock(&sched.lock) - for ; n != 0 && sched.npidle != 0; n-- { - startm(nil, false) + startIdle(n) + qsize -= n + + if !q.empty() { + runqputbatch(pp, &q, qsize) } - *glist = gList{} } // One round of scheduler: find a runnable goroutine and execute it. @@ -5015,6 +5055,38 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool { return true } +// runqputbatch tries to put all the G's on q on the local runnable queue. +// If the queue is full, they are put on the global queue; in that case +// this will temporarily acquire the scheduler lock. +// Executed only by the owner P. +func runqputbatch(pp *p, q *gQueue, qsize int) { + h := atomic.LoadAcq(&pp.runqhead) + t := pp.runqtail + n := uint32(0) + for !q.empty() && t-h < uint32(len(pp.runq)) { + gp := q.pop() + pp.runq[t%uint32(len(pp.runq))].set(gp) + t++ + n++ + } + qsize -= int(n) + + if randomizeScheduler { + off := func(o uint32) uint32 { + return (pp.runqtail + o) % uint32(len(pp.runq)) + } + for i := uint32(1); i < n; i++ { + j := fastrandn(i + 1) + pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)] + } + } + + atomic.StoreRel(&pp.runqtail, t) + if !q.empty() { + globrunqputbatch(q, int32(qsize)) + } +} + // Get g from local runnable queue. // If inheritTime is true, gp should inherit the remaining time in the // current time slice. Otherwise, it should start a new time slice.