From e09dbaa1de2e323d35a6b8c2617fc0c4ae0505f8 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 5 Oct 2017 12:16:45 -0400 Subject: [PATCH] runtime: schedule fractional workers on all Ps Currently only a single P can run a fractional mark worker at a time. This doesn't let us spread out the load, so it gets concentrated on whatever unlucky P picks up the token to run a fractional worker. This can significantly delay goroutines on that P. This commit changes this scheduling rule so each P separately schedules fractional workers. This can significantly reduce the load on any individual P and allows workers to self-preempt earlier. It does have the downside that it's possible for all Ps to be in fractional workers simultaneously (an effect STW). Updates #21698. Change-Id: Ia1e300c422043fa62bb4e3dd23c6232d81e4419c Reviewed-on: https://go-review.googlesource.com/68574 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Rick Hudson --- src/runtime/mgc.go | 72 +++++++++++++++-------------------------- src/runtime/runtime2.go | 7 ++-- 2 files changed, 30 insertions(+), 49 deletions(-) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f070fc2f3d..c1edd17842 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -396,23 +396,18 @@ type gcControllerState struct { assistBytesPerWork float64 // fractionalUtilizationGoal is the fraction of wall clock - // time that should be spent in the fractional mark worker. - // For example, if the overall mark utilization goal is 25% - // and GOMAXPROCS is 6, one P will be a dedicated mark worker - // and this will be set to 0.5 so that 50% of the time some P - // is in a fractional mark worker. This is computed at the - // beginning of each cycle. + // time that should be spent in the fractional mark worker on + // each P that isn't running a dedicated worker. + // + // For example, if the utilization goal is 25% and there are + // no dedicated workers, this will be 0.25. If there goal is + // 25%, there is one dedicated worker, and GOMAXPROCS is 5, + // this will be 0.05 to make up the missing 5%. + // + // If this is zero, no fractional workers are needed. fractionalUtilizationGoal float64 _ [sys.CacheLineSize]byte - - // fractionalMarkWorkersNeeded is the number of fractional - // mark workers that need to be started. This is either 0 or - // 1. This is potentially updated atomically at every - // scheduling point (hence it gets its own cache line). - fractionalMarkWorkersNeeded int64 - - _ [sys.CacheLineSize]byte } // startCycle resets the GC controller's state and computes estimates @@ -471,19 +466,15 @@ func (c *gcControllerState) startCycle() { // Too many dedicated workers. c.dedicatedMarkWorkersNeeded-- } - c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) + c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs) } else { c.fractionalUtilizationGoal = 0 } - if c.fractionalUtilizationGoal > 0 { - c.fractionalMarkWorkersNeeded = 1 - } else { - c.fractionalMarkWorkersNeeded = 0 - } // Clear per-P state for _, p := range allp { p.gcAssistTime = 0 + p.gcFractionalMarkTime = 0 } // Compute initial values for controls that are updated @@ -496,7 +487,7 @@ func (c *gcControllerState) startCycle() { work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", " workers=", c.dedicatedMarkWorkersNeeded, - "+", c.fractionalMarkWorkersNeeded, "\n") + "+", c.fractionalUtilizationGoal, "\n") } } @@ -702,31 +693,20 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { // This P is now dedicated to marking until the end of // the concurrent mark phase. _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode + } else if c.fractionalUtilizationGoal == 0 { + // No need for fractional workers. + return nil } else { - if !decIfPositive(&c.fractionalMarkWorkersNeeded) { - // No more workers are need right now. - return nil - } - - // This P has picked the token for the fractional worker. - // Is the GC currently under or at the utilization goal? - // If so, do more work. + // Is this P behind on the fractional utilization + // goal? // // This should be kept in sync with pollFractionalWorkerExit. - - // TODO(austin): We could fast path this and basically - // eliminate contention on c.fractionalMarkWorkersNeeded by - // precomputing the minimum time at which it's worth - // next scheduling the fractional worker. Then Ps - // don't have to fight in the window where we've - // passed that deadline and no one has started the - // worker yet. - delta := nanotime() - c.markStartTime - if delta > 0 && float64(c.fractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { - // Nope, we'd overshoot the utilization goal - atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1) + delta := nanotime() - gcController.markStartTime + if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { + // Nope. No need to run a fractional worker. return nil } + // Run a fractional worker. _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode } @@ -751,8 +731,7 @@ func pollFractionalWorkerExit() bool { return true } p := getg().m.p.ptr() - // Account for time since starting this worker. - selfTime := gcController.fractionalMarkTime + (now - p.gcMarkWorkerStartTime) + selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime) // Add some slack to the utilization goal so that the // fractional worker isn't behind again the instant it exits. return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal @@ -1387,7 +1366,8 @@ top: // TODO(austin): Should dedicated workers keep an eye on this // and exit gcDrain promptly? atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff) + prevFractionalGoal := gcController.fractionalUtilizationGoal + gcController.fractionalUtilizationGoal = 0 if !gcBlackenPromptly { // Transition from mark 1 to mark 2. @@ -1430,7 +1410,7 @@ top: // Now we can start up mark 2 workers. atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff) + gcController.fractionalUtilizationGoal = prevFractionalGoal incnwait := atomic.Xadd(&work.nwait, +1) if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { @@ -1849,7 +1829,7 @@ func gcBgMarkWorker(_p_ *p) { atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) case gcMarkWorkerFractionalMode: atomic.Xaddint64(&gcController.fractionalMarkTime, duration) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) + atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) case gcMarkWorkerIdleMode: atomic.Xaddint64(&gcController.idleMarkTime, duration) } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ff8b3ff74c..ca796169fe 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -522,9 +522,10 @@ type p struct { palloc persistentAlloc // per-P to avoid mutex // Per-P GC state - gcAssistTime int64 // Nanoseconds in assistAlloc - gcBgMarkWorker guintptr - gcMarkWorkerMode gcMarkWorkerMode + gcAssistTime int64 // Nanoseconds in assistAlloc + gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker + gcBgMarkWorker guintptr + gcMarkWorkerMode gcMarkWorkerMode // gcMarkWorkerStartTime is the nanotime() at which this mark // worker started.