1
0
mirror of https://github.com/golang/go synced 2024-09-29 21:34:28 -06:00

runtime: add goroutines returned by poller to local run queue

In Go 1.13, when the network poller found a list of ready goroutines,
they were added to the global run queue. The timer goroutine would
typically sleep in a futex with a timeout, and when the timeout
expired the timer goroutine would either be handed off to an idle P
or added to the global run queue. The effect was that on a busy system
with no idle P's goroutines waiting for timeouts and goroutines waiting
for the network would start at the same priority.

That changed on tip with the new timer code. Now timer functions are
invoked directly from a P, and it happens that the functions used
by time.Sleep and time.After and time.Ticker add the newly ready
goroutines to the local run queue. When a P looks for work it will
prefer goroutines on the local run queue; in fact it will only
occasionally look at the global run queue, and even when it does it
will just pull one goroutine off. So on a busy system with both active
timers and active network connections the system can noticeably prefer
to run goroutines waiting for timers rather than goroutines waiting
for the network.

This CL undoes that change by, when possible, adding goroutines
waiting for the network to the local run queue of the P that checked.
This doesn't affect network poller checks done by sysmon, but it
does affect network poller checks done as each P enters the scheduler.

This CL also makes injecting a list into either the local or global run
queue more efficient, using bulk operations rather than individual ones.

Change-Id: I85a66ad74e4fc3b458256fb7ab395d06f0d2ffac
Reviewed-on: https://go-review.googlesource.com/c/go/+/216198
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Ian Lance Taylor 2020-01-23 20:38:20 -08:00
parent ff1eb42865
commit 2fbca94db7

View File

@ -2417,7 +2417,13 @@ func resetspinning() {
}
}
// Injects the list of runnable G's into the scheduler and clears glist.
// injectglist adds each runnable G on the list to some run queue,
// and clears glist. If there is no current P, they are added to the
// global queue, and up to npidle M's are started to run them.
// Otherwise, for each idle P, this adds a G to the global queue
// and starts an M. Any remaining G's are added to the current P's
// local run queue.
// This may temporarily acquire the scheduler lock.
// Can run concurrently with GC.
func injectglist(glist *gList) {
if glist.empty() {
@ -2428,18 +2434,52 @@ func injectglist(glist *gList) {
traceGoUnpark(gp, 0)
}
}
lock(&sched.lock)
var n int
for n = 0; !glist.empty(); n++ {
gp := glist.pop()
// Mark all the goroutines as runnable before we put them
// on the run queues.
head := glist.head.ptr()
var tail *g
qsize := 0
for gp := head; gp != nil; gp = gp.schedlink.ptr() {
tail = gp
qsize++
casgstatus(gp, _Gwaiting, _Grunnable)
globrunqput(gp)
}
// Turn the gList into a gQueue.
var q gQueue
q.head.set(head)
q.tail.set(tail)
*glist = gList{}
startIdle := func(n int) {
for ; n != 0 && sched.npidle != 0; n-- {
startm(nil, false)
}
}
pp := getg().m.p.ptr()
if pp == nil {
lock(&sched.lock)
globrunqputbatch(&q, int32(qsize))
unlock(&sched.lock)
startIdle(qsize)
return
}
lock(&sched.lock)
npidle := int(sched.npidle)
var n int
for n = 0; n < npidle && !q.empty(); n++ {
globrunqput(q.pop())
}
unlock(&sched.lock)
for ; n != 0 && sched.npidle != 0; n-- {
startm(nil, false)
startIdle(n)
qsize -= n
if !q.empty() {
runqputbatch(pp, &q, qsize)
}
*glist = gList{}
}
// One round of scheduler: find a runnable goroutine and execute it.
@ -5015,6 +5055,38 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
return true
}
// runqputbatch tries to put all the G's on q on the local runnable queue.
// If the queue is full, they are put on the global queue; in that case
// this will temporarily acquire the scheduler lock.
// Executed only by the owner P.
func runqputbatch(pp *p, q *gQueue, qsize int) {
h := atomic.LoadAcq(&pp.runqhead)
t := pp.runqtail
n := uint32(0)
for !q.empty() && t-h < uint32(len(pp.runq)) {
gp := q.pop()
pp.runq[t%uint32(len(pp.runq))].set(gp)
t++
n++
}
qsize -= int(n)
if randomizeScheduler {
off := func(o uint32) uint32 {
return (pp.runqtail + o) % uint32(len(pp.runq))
}
for i := uint32(1); i < n; i++ {
j := fastrandn(i + 1)
pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)]
}
}
atomic.StoreRel(&pp.runqtail, t)
if !q.empty() {
globrunqputbatch(q, int32(qsize))
}
}
// Get g from local runnable queue.
// If inheritTime is true, gp should inherit the remaining time in the
// current time slice. Otherwise, it should start a new time slice.