mirror of
https://github.com/golang/go
synced 2024-11-26 03:57:57 -07:00
runtime,runtime/metrics: add metric to track scheduling latencies
This change adds a metric to track scheduling latencies, defined as the cumulative amount of time a goroutine spends being runnable before running again. The metric is an approximations and samples instead of trying to record every goroutine scheduling latency. This change was primarily authored by mknyszek@google.com. Change-Id: Ie0be7e6e7be421572eb2317d3dd8dd6f3d6aa152 Reviewed-on: https://go-review.googlesource.com/c/go/+/308933 Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
105a6e9518
commit
bedfeed54a
@ -81,6 +81,10 @@ type timeHistogram struct {
|
||||
}
|
||||
|
||||
// record adds the given duration to the distribution.
|
||||
//
|
||||
// Disallow preemptions and stack growths because this function
|
||||
// may run in sensitive locations.
|
||||
//go:nosplit
|
||||
func (h *timeHistogram) record(duration int64) {
|
||||
if duration < 0 {
|
||||
atomic.Xadd64(&h.underflow, 1)
|
||||
|
@ -245,6 +245,15 @@ func initMetrics() {
|
||||
out.scalar = uint64(gcount())
|
||||
},
|
||||
},
|
||||
"/sched/latencies:seconds": {
|
||||
compute: func(_ *statAggregate, out *metricValue) {
|
||||
hist := out.float64HistOrInit(timeHistBuckets)
|
||||
hist.counts[0] = atomic.Load64(&sched.timeToRun.underflow)
|
||||
for i := range sched.timeToRun.counts {
|
||||
hist.counts[i+1] = atomic.Load64(&sched.timeToRun.counts[i])
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
metricsInit = true
|
||||
}
|
||||
|
@ -176,6 +176,11 @@ var allDesc = []Description{
|
||||
Description: "Count of live goroutines.",
|
||||
Kind: KindUint64,
|
||||
},
|
||||
{
|
||||
Name: "/sched/latencies:seconds",
|
||||
Description: "Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running.",
|
||||
Kind: KindFloat64Histogram,
|
||||
},
|
||||
}
|
||||
|
||||
// All returns a slice of containing metric descriptions for all supported metrics.
|
||||
|
@ -139,5 +139,9 @@ Below is the full list of supported metrics, ordered lexicographically.
|
||||
|
||||
/sched/goroutines:goroutines
|
||||
Count of live goroutines.
|
||||
|
||||
/sched/latencies:seconds
|
||||
Distribution of the time goroutines have spent in the scheduler
|
||||
in a runnable state before actually running.
|
||||
*/
|
||||
package metrics
|
||||
|
@ -697,6 +697,11 @@ func schedinit() {
|
||||
sigsave(&_g_.m.sigmask)
|
||||
initSigmask = _g_.m.sigmask
|
||||
|
||||
if offset := unsafe.Offsetof(sched.timeToRun); offset%8 != 0 {
|
||||
println(offset)
|
||||
throw("sched.timeToRun not aligned to 8 bytes")
|
||||
}
|
||||
|
||||
goargs()
|
||||
goenvs()
|
||||
parsedebugvars()
|
||||
@ -973,6 +978,37 @@ func casgstatus(gp *g, oldval, newval uint32) {
|
||||
nextYield = nanotime() + yieldDelay/2
|
||||
}
|
||||
}
|
||||
|
||||
// Handle tracking for scheduling latencies.
|
||||
if oldval == _Grunning {
|
||||
// Track every 8th time a goroutine transitions out of running.
|
||||
if gp.trackingSeq%gTrackingPeriod == 0 {
|
||||
gp.tracking = true
|
||||
}
|
||||
gp.trackingSeq++
|
||||
}
|
||||
if gp.tracking {
|
||||
now := nanotime()
|
||||
if oldval == _Grunnable {
|
||||
// We transitioned out of runnable, so measure how much
|
||||
// time we spent in this state and add it to
|
||||
// runnableTime.
|
||||
gp.runnableTime += now - gp.runnableStamp
|
||||
gp.runnableStamp = 0
|
||||
}
|
||||
if newval == _Grunnable {
|
||||
// We just transitioned into runnable, so record what
|
||||
// time that happened.
|
||||
gp.runnableStamp = now
|
||||
} else if newval == _Grunning {
|
||||
// We're transitioning into running, so turn off
|
||||
// tracking and record how much time we spent in
|
||||
// runnable.
|
||||
gp.tracking = false
|
||||
sched.timeToRun.record(gp.runnableTime)
|
||||
gp.runnableTime = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
|
||||
@ -4286,6 +4322,11 @@ func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerp
|
||||
if isSystemGoroutine(newg, false) {
|
||||
atomic.Xadd(&sched.ngsys, +1)
|
||||
}
|
||||
// Track initial transition?
|
||||
newg.trackingSeq = uint8(fastrand())
|
||||
if newg.trackingSeq%gTrackingPeriod == 0 {
|
||||
newg.tracking = true
|
||||
}
|
||||
casgstatus(newg, _Gdead, _Grunnable)
|
||||
|
||||
if _p_.goidcache == _p_.goidcacheend {
|
||||
|
@ -462,6 +462,10 @@ type g struct {
|
||||
|
||||
raceignore int8 // ignore race detection events
|
||||
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
|
||||
tracking bool // whether we're tracking this G for sched latency statistics
|
||||
trackingSeq uint8 // used to decide whether to track this G
|
||||
runnableStamp int64 // timestamp of when the G last became runnable, only used when tracking
|
||||
runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking
|
||||
sysexitticks int64 // cputicks when syscall has returned (for tracing)
|
||||
traceseq uint64 // trace event sequencer
|
||||
tracelastp puintptr // last P emitted an event for this goroutine
|
||||
@ -493,6 +497,10 @@ type g struct {
|
||||
gcAssistBytes int64
|
||||
}
|
||||
|
||||
// gTrackingPeriod is the number of transitions out of _Grunning between
|
||||
// latency tracking runs.
|
||||
const gTrackingPeriod = 8
|
||||
|
||||
const (
|
||||
// tlsSlots is the number of pointer-sized slots reserved for TLS on some platforms,
|
||||
// like Windows.
|
||||
@ -824,6 +832,15 @@ type schedt struct {
|
||||
// Acquire and hold this mutex to block sysmon from interacting
|
||||
// with the rest of the runtime.
|
||||
sysmonlock mutex
|
||||
|
||||
_ uint32 // ensure timeToRun has 8-byte alignment
|
||||
|
||||
// timeToRun is a distribution of scheduling latencies, defined
|
||||
// as the sum of time a G spends in the _Grunnable state before
|
||||
// it transitions to _Grunning.
|
||||
//
|
||||
// timeToRun is protected by sched.lock.
|
||||
timeToRun timeHistogram
|
||||
}
|
||||
|
||||
// Values for the flags field of a sigTabT.
|
||||
|
@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
|
||||
_32bit uintptr // size on 32bit platforms
|
||||
_64bit uintptr // size on 64bit platforms
|
||||
}{
|
||||
{runtime.G{}, 216, 376}, // g, but exported for testing
|
||||
{runtime.G{}, 236, 392}, // g, but exported for testing
|
||||
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user