1
0
mirror of https://github.com/golang/go synced 2024-11-26 03:57:57 -07:00

runtime,runtime/metrics: add metric to track scheduling latencies

This change adds a metric to track scheduling latencies, defined as the
cumulative amount of time a goroutine spends being runnable before
running again. The metric is an approximations and samples instead of
trying to record every goroutine scheduling latency.

This change was primarily authored by mknyszek@google.com.

Change-Id: Ie0be7e6e7be421572eb2317d3dd8dd6f3d6aa152
Reviewed-on: https://go-review.googlesource.com/c/go/+/308933
Trust: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Michael Pratt 2020-08-07 16:28:35 +00:00
parent 105a6e9518
commit bedfeed54a
7 changed files with 81 additions and 1 deletions

View File

@ -81,6 +81,10 @@ type timeHistogram struct {
}
// record adds the given duration to the distribution.
//
// Disallow preemptions and stack growths because this function
// may run in sensitive locations.
//go:nosplit
func (h *timeHistogram) record(duration int64) {
if duration < 0 {
atomic.Xadd64(&h.underflow, 1)

View File

@ -245,6 +245,15 @@ func initMetrics() {
out.scalar = uint64(gcount())
},
},
"/sched/latencies:seconds": {
compute: func(_ *statAggregate, out *metricValue) {
hist := out.float64HistOrInit(timeHistBuckets)
hist.counts[0] = atomic.Load64(&sched.timeToRun.underflow)
for i := range sched.timeToRun.counts {
hist.counts[i+1] = atomic.Load64(&sched.timeToRun.counts[i])
}
},
},
}
metricsInit = true
}

View File

@ -176,6 +176,11 @@ var allDesc = []Description{
Description: "Count of live goroutines.",
Kind: KindUint64,
},
{
Name: "/sched/latencies:seconds",
Description: "Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running.",
Kind: KindFloat64Histogram,
},
}
// All returns a slice of containing metric descriptions for all supported metrics.

View File

@ -139,5 +139,9 @@ Below is the full list of supported metrics, ordered lexicographically.
/sched/goroutines:goroutines
Count of live goroutines.
/sched/latencies:seconds
Distribution of the time goroutines have spent in the scheduler
in a runnable state before actually running.
*/
package metrics

View File

@ -697,6 +697,11 @@ func schedinit() {
sigsave(&_g_.m.sigmask)
initSigmask = _g_.m.sigmask
if offset := unsafe.Offsetof(sched.timeToRun); offset%8 != 0 {
println(offset)
throw("sched.timeToRun not aligned to 8 bytes")
}
goargs()
goenvs()
parsedebugvars()
@ -973,6 +978,37 @@ func casgstatus(gp *g, oldval, newval uint32) {
nextYield = nanotime() + yieldDelay/2
}
}
// Handle tracking for scheduling latencies.
if oldval == _Grunning {
// Track every 8th time a goroutine transitions out of running.
if gp.trackingSeq%gTrackingPeriod == 0 {
gp.tracking = true
}
gp.trackingSeq++
}
if gp.tracking {
now := nanotime()
if oldval == _Grunnable {
// We transitioned out of runnable, so measure how much
// time we spent in this state and add it to
// runnableTime.
gp.runnableTime += now - gp.runnableStamp
gp.runnableStamp = 0
}
if newval == _Grunnable {
// We just transitioned into runnable, so record what
// time that happened.
gp.runnableStamp = now
} else if newval == _Grunning {
// We're transitioning into running, so turn off
// tracking and record how much time we spent in
// runnable.
gp.tracking = false
sched.timeToRun.record(gp.runnableTime)
gp.runnableTime = 0
}
}
}
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
@ -4286,6 +4322,11 @@ func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerp
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
// Track initial transition?
newg.trackingSeq = uint8(fastrand())
if newg.trackingSeq%gTrackingPeriod == 0 {
newg.tracking = true
}
casgstatus(newg, _Gdead, _Grunnable)
if _p_.goidcache == _p_.goidcacheend {

View File

@ -462,6 +462,10 @@ type g struct {
raceignore int8 // ignore race detection events
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
tracking bool // whether we're tracking this G for sched latency statistics
trackingSeq uint8 // used to decide whether to track this G
runnableStamp int64 // timestamp of when the G last became runnable, only used when tracking
runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking
sysexitticks int64 // cputicks when syscall has returned (for tracing)
traceseq uint64 // trace event sequencer
tracelastp puintptr // last P emitted an event for this goroutine
@ -493,6 +497,10 @@ type g struct {
gcAssistBytes int64
}
// gTrackingPeriod is the number of transitions out of _Grunning between
// latency tracking runs.
const gTrackingPeriod = 8
const (
// tlsSlots is the number of pointer-sized slots reserved for TLS on some platforms,
// like Windows.
@ -824,6 +832,15 @@ type schedt struct {
// Acquire and hold this mutex to block sysmon from interacting
// with the rest of the runtime.
sysmonlock mutex
_ uint32 // ensure timeToRun has 8-byte alignment
// timeToRun is a distribution of scheduling latencies, defined
// as the sum of time a G spends in the _Grunnable state before
// it transitions to _Grunning.
//
// timeToRun is protected by sched.lock.
timeToRun timeHistogram
}
// Values for the flags field of a sigTabT.

View File

@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
_32bit uintptr // size on 32bit platforms
_64bit uintptr // size on 64bit platforms
}{
{runtime.G{}, 216, 376}, // g, but exported for testing
{runtime.G{}, 236, 392}, // g, but exported for testing
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
}