1
0
mirror of https://github.com/golang/go synced 2024-11-23 18:30:06 -07:00

runtime: profile with per-thread timers on Linux

Using setitimer on Linux to request SIGPROF signal deliveries in
proportion to the process's on-CPU time results in under-reporting when
the program uses several goroutines in parallel. Linux calculates the
process's total CPU spend on a regular basis (often every 4ms); if the
process has spent enough CPU time since the last calculation to warrant
more than one SIGPROF (usually 10ms for the default sample rate of 100
Hz), the kernel is often able to deliver only one of them. With these
common settings, that results in Go CPU profiles being attenuated for
programs that use more than 2.5 goroutines in parallel.

To avoid in effect overflowing the kernel's process-wide CPU counter,
and relying on Linux's typical behavior of having the active thread
handle the resulting process-targeted signal, use timer_create to
request a timer for each OS thread that the Go runtime manages. Have
each timer track the CPU time of a single thread, with the resulting
SIGPROF going directly to that thread.

To continue tracking CPU time spent on threads that don't interact with
the Go runtime (such as those created and used in cgo), keep using
setitimer in addition to the new mechanism. When a SIGPROF signal
arrives, check whether it's due to setitimer or timer_create and filter
as appropriate: If the thread is known to Go (has an M) and has a
timer_create timer, ignore SIGPROF signals from setitimer. If the thread
is not known to Go (does not have an M), ignore SIGPROF signals that are
not from setitimer.

Counteract the new bias that per-thread profiling adds against
short-lived threads (or those that are only active on occasion for a
short time, such as garbage collection workers on mostly-idle systems)
by configuring the timers' initial trigger to be from a uniform random
distribution between "immediate trigger" and the full requested sample
period.

Updates #35057

Change-Id: Iab753c4e5101bdc09ef9132eec84a75478e05579
Reviewed-on: https://go-review.googlesource.com/c/go/+/324129
Run-TryBot: Rhys Hiltner <rhys@justin.tv>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: David Chase <drchase@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Rhys Hiltner 2021-08-13 08:58:40 -07:00 committed by Michael Pratt
parent f9e90f7e74
commit 8cfd8c3db8
4 changed files with 286 additions and 9 deletions

View File

@ -291,6 +291,13 @@ func cgocallbackg1(fn, frame unsafe.Pointer, ctxt uintptr) {
<-main_init_done
}
// Check whether the profiler needs to be turned on or off; this route to
// run Go code does not use runtime.execute, so bypasses the check there.
hz := sched.profilehz
if gp.m.profilehz != hz {
setThreadCPUProfiler(hz)
}
// Add entry to defer stack in case of panic.
restore := true
defer unwindm(&restore)

View File

@ -7,10 +7,21 @@ package runtime
import (
"internal/abi"
"internal/goarch"
"runtime/internal/atomic"
"unsafe"
)
type mOS struct{}
type mOS struct {
// profileTimer holds the ID of the POSIX interval timer for profiling CPU
// usage on this thread.
//
// It is valid when the profileTimerValid field is non-zero. A thread
// creates and manages its own timer, and these fields are read and written
// only by this thread. But because some of the reads on profileTimerValid
// are in signal handling code, access to that field uses atomic operations.
profileTimer int32
profileTimerValid uint32
}
//go:noescape
func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
@ -518,18 +529,136 @@ func signalM(mp *m, sig int) {
tgkill(getpid(), int(mp.procid), sig)
}
// go118UseTimerCreateProfiler enables the per-thread CPU profiler. Platforms
// with support for SetCgoTraceback do some signal handling in assembly; do not
// enable it for them until the changes to those code paths are in place.
const go118UseTimerCreateProfiler = GOARCH != "amd64" && GOARCH != "ppc64le"
// validSIGPROF compares this signal delivery's code against the signal sources
// that the profiler uses, returning whether the delivery should be processed.
// To be processed, a signal delivery from a known profiling mechanism should
// correspond to the best profiling mechanism available to this thread. Signals
// from other sources are always considered valid.
//
//go:nosplit
func validSIGPROF(mp *m, c *sigctxt) bool {
code := int32(c.sigcode())
setitimer := code == _SI_KERNEL
timer_create := code == _SI_TIMER
if !(setitimer || timer_create) {
// The signal doesn't correspond to a profiling mechanism that the
// runtime enables itself. There's no reason to process it, but there's
// no reason to ignore it either.
return true
}
if mp == nil {
// Since we don't have an M, we can't check if there's an active
// per-thread timer for this thread. We don't know how long this thread
// has been around, and if it happened to interact with the Go scheduler
// at a time when profiling was active (causing it to have a per-thread
// timer). But it may have never interacted with the Go scheduler, or
// never while profiling was active. To avoid double-counting, process
// only signals from setitimer.
//
// When a custom cgo traceback function has been registered (on
// platforms that support runtime.SetCgoTraceback), SIGPROF signals
// delivered to a thread that cannot find a matching M do this check in
// the assembly implementations of runtime.cgoSigtramp.
return setitimer
}
// Having an M means the thread interacts with the Go scheduler, and we can
// check whether there's an active per-thread timer for this thread.
if atomic.Load(&mp.profileTimerValid) != 0 {
// If this M has its own per-thread CPU profiling interval timer, we
// should track the SIGPROF signals that come from that timer (for
// accurate reporting of its CPU usage; see issue 35057) and ignore any
// that it gets from the process-wide setitimer (to not over-count its
// CPU consumption).
return timer_create
}
// No active per-thread timer means the only valid profiler is setitimer.
return setitimer
}
func setProcessCPUProfiler(hz int32) {
setProcessCPUProfilerTimer(hz)
}
func setThreadCPUProfiler(hz int32) {
setThreadCPUProfilerHz(hz)
}
mp := getg().m
mp.profilehz = hz
//go:nosplit
func validSIGPROF(mp *m, c *sigctxt) bool {
// Once the profiler for Linux uses timer_create, this function will need to
// do more than return true. But so long as the only profiling timer in use
// is setitimer, returning true is correct.
return true
if !go118UseTimerCreateProfiler {
return
}
// destroy any active timer
if atomic.Load(&mp.profileTimerValid) != 0 {
timerid := mp.profileTimer
atomic.Store(&mp.profileTimerValid, 0)
mp.profileTimer = 0
ret := timer_delete(timerid)
if ret != 0 {
print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
throw("timer_delete")
}
}
if hz == 0 {
// If the goal was to disable profiling for this thread, then the job's done.
return
}
// The period of the timer should be 1/Hz. For every "1/Hz" of additional
// work, the user should expect one additional sample in the profile.
//
// But to scale down to very small amounts of application work, to observe
// even CPU usage of "one tenth" of the requested period, set the initial
// timing delay in a different way: So that "one tenth" of a period of CPU
// spend shows up as a 10% chance of one sample (for an expected value of
// 0.1 samples), and so that "two and six tenths" periods of CPU spend show
// up as a 60% chance of 3 samples and a 40% chance of 2 samples (for an
// expected value of 2.6). Set the initial delay to a value in the unifom
// random distribution between 0 and the desired period. And because "0"
// means "disable timer", add 1 so the half-open interval [0,period) turns
// into (0,period].
//
// Otherwise, this would show up as a bias away from short-lived threads and
// from threads that are only occasionally active: for example, when the
// garbage collector runs on a mostly-idle system, the additional threads it
// activates may do a couple milliseconds of GC-related work and nothing
// else in the few seconds that the profiler observes.
spec := new(itimerspec)
spec.it_value.setNsec(1 + int64(fastrandn(uint32(1e9/hz))))
spec.it_interval.setNsec(1e9 / int64(hz))
var timerid int32
sevp := &sigevent{
notify: _SIGEV_THREAD_ID,
signo: _SIGPROF,
sigev_notify_thread_id: int32(mp.procid),
}
ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, sevp, &timerid)
if ret != 0 {
// If we cannot create a timer for this M, leave profileTimerValid false
// to fall back to the process-wide setitimer profiler.
return
}
ret = timer_settime(timerid, 0, spec, nil)
if ret != 0 {
print("runtime: failed to configure profiling timer; timer_settime(", timerid,
", 0, {interval: {",
spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
throw("timer_settime")
}
mp.profileTimer = timerid
atomic.Store(&mp.profileTimerValid, 1)
}

View File

@ -79,6 +79,10 @@ func cpuHog2(x int) int {
return foo
}
func cpuHog3(x int) int {
return cpuHog0(x, 1e5)
}
// Return a list of functions that we don't want to ever appear in CPU
// profiles. For gccgo, that list includes the sigprof handler itself.
func avoidFunctions() []string {
@ -107,6 +111,93 @@ func TestCPUProfileMultithreaded(t *testing.T) {
})
}
func TestCPUProfileMultithreadMagnitude(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("issue 35057 is only confirmed on Linux")
}
// Run a workload in a single goroutine, then run copies of the same
// workload in several goroutines. For both the serial and parallel cases,
// the CPU time the process measures with its own profiler should match the
// total CPU usage that the OS reports.
//
// We could also check that increases in parallelism (GOMAXPROCS) lead to a
// linear increase in the CPU usage reported by both the OS and the
// profiler, but without a guarantee of exclusive access to CPU resources
// that is likely to be a flaky test.
// Require the smaller value to be within 10%, or 40% in short mode.
maxDiff := 0.10
if testing.Short() {
maxDiff = 0.40
}
parallelism := runtime.GOMAXPROCS(0)
var cpuTime1, cpuTimeN time.Duration
p := testCPUProfile(t, stackContains, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog3"}, avoidFunctions(), func(dur time.Duration) {
cpuTime1 = diffCPUTime(t, func() {
// Consume CPU in one goroutine
cpuHogger(cpuHog1, &salt1, dur)
})
cpuTimeN = diffCPUTime(t, func() {
// Next, consume CPU in several goroutines
var wg sync.WaitGroup
var once sync.Once
for i := 0; i < parallelism; i++ {
wg.Add(1)
go func() {
defer wg.Done()
var salt = 0
cpuHogger(cpuHog3, &salt, dur)
once.Do(func() { salt1 = salt })
}()
}
wg.Wait()
})
})
for i, unit := range []string{"count", "nanoseconds"} {
if have, want := p.SampleType[i].Unit, unit; have != want {
t.Errorf("pN SampleType[%d]; %q != %q", i, have, want)
}
}
var value1, valueN time.Duration
for _, sample := range p.Sample {
if stackContains("runtime/pprof.cpuHog1", uintptr(sample.Value[0]), sample.Location, sample.Label) {
value1 += time.Duration(sample.Value[1]) * time.Nanosecond
}
if stackContains("runtime/pprof.cpuHog3", uintptr(sample.Value[0]), sample.Location, sample.Label) {
valueN += time.Duration(sample.Value[1]) * time.Nanosecond
}
}
compare := func(a, b time.Duration, maxDiff float64) func(*testing.T) {
return func(t *testing.T) {
t.Logf("compare %s vs %s", a, b)
if a <= 0 || b <= 0 {
t.Errorf("Expected both time reports to be positive")
return
}
if a < b {
a, b = b, a
}
diff := float64(a-b) / float64(a)
if diff > maxDiff {
t.Errorf("CPU usage reports are too different (limit -%.1f%%, got -%.1f%%)", maxDiff*100, diff*100)
}
}
}
// check that the OS's perspective matches what the Go runtime measures
t.Run("serial execution OS vs pprof", compare(cpuTime1, value1, maxDiff))
t.Run("parallel execution OS vs pprof", compare(cpuTimeN, valueN, maxDiff))
}
// containsInlinedCall reports whether the function body for the function f is
// known to contain an inlined function call within the first maxBytes bytes.
func containsInlinedCall(f interface{}, maxBytes int) bool {
@ -350,6 +441,16 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
return nil
}
var diffCPUTimeImpl func(f func()) time.Duration
func diffCPUTime(t *testing.T, f func()) time.Duration {
if fn := diffCPUTimeImpl; fn != nil {
return fn(f)
}
t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH)
return 0
}
func contains(slice []string, s string) bool {
for i := range slice {
if slice[i] == s {

View File

@ -0,0 +1,40 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build darwin || freebsd || linux || netbsd || openbsd
// +build darwin freebsd linux netbsd openbsd
package pprof
import (
"syscall"
"time"
)
func init() {
diffCPUTimeImpl = diffCPUTimeRUsage
}
func diffCPUTimeRUsage(f func()) time.Duration {
ok := true
var before, after syscall.Rusage
err := syscall.Getrusage(syscall.RUSAGE_SELF, &before)
if err != nil {
ok = false
}
f()
err = syscall.Getrusage(syscall.RUSAGE_SELF, &after)
if err != nil {
ok = false
}
if !ok {
return 0
}
return time.Duration((after.Utime.Nano() + after.Stime.Nano()) - (before.Utime.Nano() + before.Stime.Nano()))
}