mirror of
https://github.com/golang/go
synced 2024-11-17 17:54:48 -07:00
Revert "runtime: profile contended lock calls"
This reverts commit go.dev/cl/528657. Reason for revert: broke a lot of builders. Change-Id: I70c33062020e997c4df67b3eaa2e886cf0da961e Reviewed-on: https://go-review.googlesource.com/c/go/+/543660 Reviewed-by: Than McIntosh <thanm@google.com> Auto-Submit: Matthew Dempsky <mdempsky@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
237715cf7c
commit
468bc94188
@ -52,17 +52,6 @@ func NumCgoCall() int64 {
|
||||
return n
|
||||
}
|
||||
|
||||
func totalMutexWaitTimeNanos() int64 {
|
||||
total := sched.totalMutexWaitTime.Load()
|
||||
|
||||
total += sched.totalRuntimeLockWaitTime.Load()
|
||||
for mp := (*m)(atomic.Loadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink {
|
||||
total += mp.mLockProfile.waitTime.Load()
|
||||
}
|
||||
|
||||
return total
|
||||
}
|
||||
|
||||
// NumGoroutine returns the number of goroutines that currently exist.
|
||||
func NumGoroutine() int {
|
||||
return int(gcount())
|
||||
|
@ -1340,27 +1340,6 @@ func PageCachePagesLeaked() (leaked uintptr) {
|
||||
return
|
||||
}
|
||||
|
||||
type Mutex = mutex
|
||||
|
||||
var Lock = lock
|
||||
var Unlock = unlock
|
||||
|
||||
func MutexContended(l *mutex) bool {
|
||||
switch atomic.Loaduintptr(&l.key) {
|
||||
case 0: // unlocked
|
||||
return false
|
||||
case 1: // locked
|
||||
return false
|
||||
default: // an M is sleeping
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func SemRootLock(addr *uint32) *mutex {
|
||||
root := semtable.rootFor(addr)
|
||||
return &root.lock
|
||||
}
|
||||
|
||||
var Semacquire = semacquire
|
||||
var Semrelease1 = semrelease1
|
||||
|
||||
|
@ -145,14 +145,6 @@ It is a comma-separated list of name=val pairs setting these named variables:
|
||||
risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this
|
||||
option for some applications can produce large traces, so use with care.
|
||||
|
||||
profileruntimelocks: setting profileruntimelocks=1 includes call stacks related to
|
||||
contention on runtime-internal locks in the "mutex" profile, subject to the
|
||||
MutexProfileFraction setting. The call stacks will correspond to the unlock call that
|
||||
released the lock. But instead of the value corresponding to the amount of contention that
|
||||
call stack caused, it corresponds to the amount of time the caller of unlock had to wait
|
||||
in its original call to lock. A future release is expected to align those and remove this
|
||||
setting.
|
||||
|
||||
invalidptr: invalidptr=1 (the default) causes the garbage collector and stack
|
||||
copier to crash the program if an invalid pointer value (for example, 1)
|
||||
is found in a pointer-typed location. Setting invalidptr=0 disables this check.
|
||||
|
@ -71,8 +71,6 @@ func lock2(l *mutex) {
|
||||
// its wakeup call.
|
||||
wait := v
|
||||
|
||||
timer := &lockTimer{lock: l}
|
||||
timer.begin()
|
||||
// On uniprocessors, no point spinning.
|
||||
// On multiprocessors, spin for ACTIVE_SPIN attempts.
|
||||
spin := 0
|
||||
@ -84,7 +82,6 @@ func lock2(l *mutex) {
|
||||
for i := 0; i < spin; i++ {
|
||||
for l.key == mutex_unlocked {
|
||||
if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
|
||||
timer.end()
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -95,7 +92,6 @@ func lock2(l *mutex) {
|
||||
for i := 0; i < passive_spin; i++ {
|
||||
for l.key == mutex_unlocked {
|
||||
if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
|
||||
timer.end()
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -105,7 +101,6 @@ func lock2(l *mutex) {
|
||||
// Sleep.
|
||||
v = atomic.Xchg(key32(&l.key), mutex_sleeping)
|
||||
if v == mutex_unlocked {
|
||||
timer.end()
|
||||
return
|
||||
}
|
||||
wait = mutex_sleeping
|
||||
@ -127,7 +122,6 @@ func unlock2(l *mutex) {
|
||||
}
|
||||
|
||||
gp := getg()
|
||||
gp.m.mLockProfile.recordUnlock(l)
|
||||
gp.m.locks--
|
||||
if gp.m.locks < 0 {
|
||||
throw("runtime·unlock: lock count")
|
||||
|
@ -48,8 +48,6 @@ func lock2(l *mutex) {
|
||||
}
|
||||
semacreate(gp.m)
|
||||
|
||||
timer := &lockTimer{lock: l}
|
||||
timer.begin()
|
||||
// On uniprocessor's, no point spinning.
|
||||
// On multiprocessors, spin for ACTIVE_SPIN attempts.
|
||||
spin := 0
|
||||
@ -62,7 +60,6 @@ Loop:
|
||||
if v&locked == 0 {
|
||||
// Unlocked. Try to lock.
|
||||
if atomic.Casuintptr(&l.key, v, v|locked) {
|
||||
timer.end()
|
||||
return
|
||||
}
|
||||
i = 0
|
||||
@ -122,7 +119,6 @@ func unlock2(l *mutex) {
|
||||
}
|
||||
}
|
||||
}
|
||||
gp.m.mLockProfile.recordUnlock(l)
|
||||
gp.m.locks--
|
||||
if gp.m.locks < 0 {
|
||||
throw("runtime·unlock: lock count")
|
||||
|
@ -470,7 +470,7 @@ func initMetrics() {
|
||||
"/sync/mutex/wait/total:seconds": {
|
||||
compute: func(_ *statAggregate, out *metricValue) {
|
||||
out.kind = metricKindFloat64
|
||||
out.scalar = float64bits(nsToSec(totalMutexWaitTimeNanos()))
|
||||
out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load()))
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -6,21 +6,15 @@ package runtime_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"internal/goexperiment"
|
||||
"internal/profile"
|
||||
"os"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"runtime/metrics"
|
||||
"runtime/pprof"
|
||||
"runtime/trace"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
"unsafe"
|
||||
@ -945,305 +939,3 @@ func TestSchedPauseMetrics(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeLockMetricsAndProfile(t *testing.T) {
|
||||
old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
|
||||
defer runtime.SetMutexProfileFraction(old)
|
||||
if old != 0 {
|
||||
t.Fatalf("need MutexProfileRate 0, got %d", old)
|
||||
}
|
||||
|
||||
{
|
||||
before := os.Getenv("GODEBUG")
|
||||
for _, s := range strings.Split(before, ",") {
|
||||
if strings.HasPrefix(s, "profileruntimelocks=") {
|
||||
t.Logf("GODEBUG includes explicit setting %q", s)
|
||||
}
|
||||
}
|
||||
defer func() { os.Setenv("GODEBUG", before) }()
|
||||
os.Setenv("GODEBUG", fmt.Sprintf("%s,profileruntimelocks=1", before))
|
||||
}
|
||||
|
||||
loadProfile := func(t *testing.T) *profile.Profile {
|
||||
var w bytes.Buffer
|
||||
pprof.Lookup("mutex").WriteTo(&w, 0)
|
||||
p, err := profile.Parse(&w)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse profile: %v", err)
|
||||
}
|
||||
if err := p.CheckValid(); err != nil {
|
||||
t.Fatalf("invalid profile: %v", err)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
|
||||
beforeProfile := loadProfile(t)
|
||||
beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
|
||||
metrics.Read(beforeMetrics)
|
||||
|
||||
fn()
|
||||
|
||||
afterProfile := loadProfile(t)
|
||||
afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
|
||||
metrics.Read(afterMetrics)
|
||||
|
||||
sumSamples := func(p *profile.Profile, i int) int64 {
|
||||
var sum int64
|
||||
for _, s := range p.Sample {
|
||||
sum += s.Value[i]
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
|
||||
profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
|
||||
|
||||
// The internal/profile package does not support compaction; this delta
|
||||
// profile will include separate positive and negative entries.
|
||||
p = afterProfile.Copy()
|
||||
if len(beforeProfile.Sample) > 0 {
|
||||
err := p.Merge(beforeProfile, -1)
|
||||
if err != nil {
|
||||
t.Fatalf("Merge profiles: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return metricGrowth, profileGrowth, p
|
||||
}
|
||||
|
||||
testcase := func(stk []string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
|
||||
return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
|
||||
metricGrowth, profileGrowth, p := measureDelta(t, func() {
|
||||
var started, stopped sync.WaitGroup
|
||||
started.Add(workers)
|
||||
stopped.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
w := &contentionWorker{
|
||||
before: func() {
|
||||
started.Done()
|
||||
started.Wait()
|
||||
},
|
||||
after: func() {
|
||||
stopped.Done()
|
||||
},
|
||||
fn: fn,
|
||||
}
|
||||
go w.run()
|
||||
}
|
||||
stopped.Wait()
|
||||
})
|
||||
|
||||
if profileGrowth == 0 {
|
||||
t.Errorf("no increase in mutex profile")
|
||||
}
|
||||
if metricGrowth == 0 {
|
||||
t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
|
||||
}
|
||||
// This comparison is possible because the time measurements in support of
|
||||
// runtime/pprof and runtime/metrics for runtime-internal locks are so close
|
||||
// together. It doesn't work as well for user-space contention, where the
|
||||
// involved goroutines are not _Grunnable the whole time and so need to pass
|
||||
// through the scheduler.
|
||||
t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth)
|
||||
t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
|
||||
|
||||
if goexperiment.StaticLockRanking {
|
||||
if !slices.ContainsFunc(stk, func(s string) bool {
|
||||
return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
|
||||
}) {
|
||||
// stk is a call stack that is still on the user stack when
|
||||
// it calls runtime.unlock. Add the extra function that
|
||||
// we'll see, when the static lock ranking implementation of
|
||||
// runtime.unlockWithRank switches to the system stack.
|
||||
stk = append([]string{"runtime.unlockWithRank"}, stk...)
|
||||
}
|
||||
}
|
||||
|
||||
var stks [][]string
|
||||
for _, s := range p.Sample {
|
||||
var have []string
|
||||
for _, loc := range s.Location {
|
||||
for _, line := range loc.Line {
|
||||
have = append(have, line.Function.Name)
|
||||
}
|
||||
}
|
||||
stks = append(stks, have)
|
||||
if slices.Equal(have, stk) {
|
||||
n += s.Value[0]
|
||||
value += s.Value[1]
|
||||
}
|
||||
}
|
||||
t.Logf("stack %v has samples totaling n=%d value=%d", stk, n, value)
|
||||
if n == 0 && value == 0 {
|
||||
t.Logf("profile:\n%s", p)
|
||||
for _, have := range stks {
|
||||
t.Logf("have stack %v", have)
|
||||
}
|
||||
t.Errorf("want stack %v", stk)
|
||||
}
|
||||
|
||||
return metricGrowth, profileGrowth, n, value
|
||||
}
|
||||
}
|
||||
|
||||
name := t.Name()
|
||||
|
||||
t.Run("runtime.lock", func(t *testing.T) {
|
||||
mus := make([]runtime.Mutex, 100)
|
||||
var needContention atomic.Int64
|
||||
delay := 10 * time.Microsecond
|
||||
delayMicros := delay.Microseconds()
|
||||
|
||||
// The goroutine that acquires the lock will only proceed when it
|
||||
// detects that its partner is contended for the lock. That will lead to
|
||||
// live-lock if anything (such as a STW) prevents the partner goroutine
|
||||
// from running. Allowing the contention workers to pause and restart
|
||||
// (to allow a STW to proceed) makes it harder to confirm that we're
|
||||
// counting the correct number of contention events, since some locks
|
||||
// will end up contended twice. Instead, disable the GC.
|
||||
defer debug.SetGCPercent(debug.SetGCPercent(-1))
|
||||
|
||||
const workers = 2
|
||||
if runtime.GOMAXPROCS(0) < workers {
|
||||
t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
|
||||
}
|
||||
|
||||
fn := func() bool {
|
||||
n := int(needContention.Load())
|
||||
if n < 0 {
|
||||
return false
|
||||
}
|
||||
mu := &mus[n]
|
||||
|
||||
runtime.Lock(mu)
|
||||
for int(needContention.Load()) == n {
|
||||
if runtime.MutexContended(mu) {
|
||||
// make them wait a little while
|
||||
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
|
||||
runtime.Usleep(uint32(delayMicros))
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
runtime.Unlock(mu)
|
||||
needContention.Store(int64(n - 1))
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
stk := []string{
|
||||
"runtime.unlock",
|
||||
"runtime_test." + name + ".func5.1",
|
||||
"runtime_test.(*contentionWorker).run",
|
||||
}
|
||||
|
||||
t.Run("sample-1", func(t *testing.T) {
|
||||
old := runtime.SetMutexProfileFraction(1)
|
||||
defer runtime.SetMutexProfileFraction(old)
|
||||
|
||||
needContention.Store(int64(len(mus) - 1))
|
||||
metricGrowth, profileGrowth, n, _ := testcase(stk, workers, fn)(t)
|
||||
|
||||
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
|
||||
// The test imposes a delay with usleep, verified with calls to
|
||||
// nanotime. Compare against the runtime/metrics package's view
|
||||
// (based on nanotime) rather than runtime/pprof's view (based
|
||||
// on cputicks).
|
||||
t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
|
||||
}
|
||||
if have, want := n, int64(len(mus)); have != want {
|
||||
t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
|
||||
}
|
||||
|
||||
const slop = 1.5 // account for nanotime vs cputicks
|
||||
if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
|
||||
t.Errorf("views differ by more than %fx", slop)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sample-2", func(t *testing.T) {
|
||||
old := runtime.SetMutexProfileFraction(2)
|
||||
defer runtime.SetMutexProfileFraction(old)
|
||||
|
||||
needContention.Store(int64(len(mus) - 1))
|
||||
metricGrowth, profileGrowth, n, _ := testcase(stk, workers, fn)(t)
|
||||
|
||||
// With 100 trials and profile fraction of 2, we expect to capture
|
||||
// 50 samples. Allow the test to pass if we get at least 20 samples;
|
||||
// the CDF of the binomial distribution says there's less than a
|
||||
// 1e-9 chance of that, which is an acceptably low flakiness rate.
|
||||
const samplingSlop = 2.5
|
||||
|
||||
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
|
||||
// The test imposes a delay with usleep, verified with calls to
|
||||
// nanotime. Compare against the runtime/metrics package's view
|
||||
// (based on nanotime) rather than runtime/pprof's view (based
|
||||
// on cputicks).
|
||||
t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
|
||||
}
|
||||
if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
|
||||
t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
|
||||
}
|
||||
|
||||
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
|
||||
if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
|
||||
t.Errorf("views differ by more than %fx", timerSlop)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("runtime.semrelease", func(t *testing.T) {
|
||||
old := runtime.SetMutexProfileFraction(1)
|
||||
defer runtime.SetMutexProfileFraction(old)
|
||||
|
||||
const workers = 3
|
||||
if runtime.GOMAXPROCS(0) < workers {
|
||||
t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
|
||||
}
|
||||
|
||||
var sem uint32 = 1
|
||||
var sawContention atomic.Int32
|
||||
var need int32 = 1000 // counteract low timer resolution by requiring more samples
|
||||
fn := func() bool {
|
||||
if sawContention.Load() >= need {
|
||||
return false
|
||||
}
|
||||
runtime.Semacquire(&sem)
|
||||
runtime.Semrelease1(&sem, false, 0)
|
||||
if runtime.MutexContended(runtime.SemRootLock(&sem)) {
|
||||
sawContention.Add(1)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
stk := []string{
|
||||
"runtime.unlock",
|
||||
"runtime.semrelease1",
|
||||
"runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
|
||||
"runtime_test.(*contentionWorker).run",
|
||||
}
|
||||
|
||||
// Verify that we get call stack we expect, with anything more than zero
|
||||
// nanoseconds / zero samples. The duration of each contention event is
|
||||
// too small relative to the expected overhead for us to verify its
|
||||
// value more directly. Leave that to the explicit lock/unlock test.
|
||||
|
||||
testcase(stk, workers, fn)(t)
|
||||
})
|
||||
}
|
||||
|
||||
// contentionWorker provides cleaner call stacks for lock contention profile tests
|
||||
type contentionWorker struct {
|
||||
before func()
|
||||
fn func() bool
|
||||
after func()
|
||||
}
|
||||
|
||||
func (w *contentionWorker) run() {
|
||||
defer w.after()
|
||||
w.before()
|
||||
|
||||
for w.fn() {
|
||||
}
|
||||
}
|
||||
|
@ -234,10 +234,6 @@ func newBucket(typ bucketType, nstk int) *bucket {
|
||||
// stk returns the slice in b holding the stack.
|
||||
func (b *bucket) stk() []uintptr {
|
||||
stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
|
||||
if b.nstk > maxStack {
|
||||
// prove that slicing works; otherwise a failure requires a P
|
||||
throw("bad profile stack count")
|
||||
}
|
||||
return stk[:b.nstk:b.nstk]
|
||||
}
|
||||
|
||||
@ -513,237 +509,7 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
|
||||
} else {
|
||||
nstk = gcallers(gp.m.curg, skip, stk[:])
|
||||
}
|
||||
|
||||
saveBlockEventStack(cycles, rate, stk[:nstk], which)
|
||||
}
|
||||
|
||||
// lockTimer assists with profiling contention on runtime-internal locks.
|
||||
//
|
||||
// There are several steps between the time that an M experiences contention and
|
||||
// when that contention may be added to the profile. This comes from our
|
||||
// constraints: We need to keep the critical section of each lock small,
|
||||
// especially when those locks are contended. The reporting code cannot acquire
|
||||
// new locks until the M has released all other locks, which means no memory
|
||||
// allocations and encourages use of (temporary) M-local storage.
|
||||
//
|
||||
// The M will have space for storing one call stack that caused contention, and
|
||||
// for the magnitude of that contention. It will also have space to store the
|
||||
// magnitude of additional contention the M caused, since it only has space to
|
||||
// remember one call stack and might encounter several contention events before
|
||||
// it releases all of its locks and is thus able to transfer the local buffer
|
||||
// into the profile.
|
||||
//
|
||||
// The M will collect the call stack when it unlocks the contended lock. That
|
||||
// minimizes the impact on the critical section of the contended lock, and
|
||||
// matches the mutex profile's behavior for contention in sync.Mutex: measured
|
||||
// at the Unlock method.
|
||||
//
|
||||
// The profile for contention on sync.Mutex blames the caller of Unlock for the
|
||||
// amount of contention experienced by the callers of Lock which had to wait.
|
||||
// When there are several critical sections, this allows identifying which of
|
||||
// them is responsible.
|
||||
//
|
||||
// Matching that behavior for runtime-internal locks will require identifying
|
||||
// which Ms are blocked on the mutex. The semaphore-based implementation is
|
||||
// ready to allow that, but the futex-based implementation will require a bit
|
||||
// more work. Until then, we report contention on runtime-internal locks with a
|
||||
// call stack taken from the unlock call (like the rest of the user-space
|
||||
// "mutex" profile), but assign it a duration value based on how long the
|
||||
// previous lock call took (like the user-space "block" profile).
|
||||
//
|
||||
// Thus, reporting the call stacks of runtime-internal lock contention is
|
||||
// guarded by GODEBUG for now. Set GODEBUG=profileruntimelocks=1 to enable.
|
||||
//
|
||||
// TODO(rhysh): plumb through the delay duration, remove GODEBUG, update comment
|
||||
//
|
||||
// The M will track this by storing a pointer to the lock; lock/unlock pairs for
|
||||
// runtime-internal locks are always on the same M.
|
||||
//
|
||||
// Together, that demands several steps for recording contention. First, when
|
||||
// finally acquiring a contended lock, the M decides whether it should plan to
|
||||
// profile that event by storing a pointer to the lock in its "to be profiled
|
||||
// upon unlock" field. If that field is already set, it uses the relative
|
||||
// magnitudes to weight a random choice between itself and the other lock, with
|
||||
// the loser's time being added to the "additional contention" field. Otherwise
|
||||
// if the M's call stack buffer is occupied, it does the comparison against that
|
||||
// sample's magnitude.
|
||||
//
|
||||
// Second, having unlocked a mutex the M checks to see if it should capture the
|
||||
// call stack into its local buffer. Finally, when the M unlocks its last mutex,
|
||||
// it transfers the local buffer into the profile. As part of that step, it also
|
||||
// transfers any "additional contention" time to the profile. Any lock
|
||||
// contention that it experiences while adding samples to the profile will be
|
||||
// recorded later as "additional contention" and not include a call stack, to
|
||||
// avoid an echo.
|
||||
type lockTimer struct {
|
||||
lock *mutex
|
||||
timeRate int64
|
||||
timeStart int64
|
||||
tickStart int64
|
||||
}
|
||||
|
||||
func (lt *lockTimer) begin() {
|
||||
rate := int64(atomic.Load64(&mutexprofilerate))
|
||||
|
||||
lt.timeRate = gTrackingPeriod
|
||||
if rate != 0 && rate < lt.timeRate {
|
||||
lt.timeRate = rate
|
||||
}
|
||||
if int64(fastrand())%lt.timeRate == 0 {
|
||||
lt.timeStart = nanotime()
|
||||
}
|
||||
|
||||
if rate > 0 && int64(fastrand())%rate == 0 {
|
||||
lt.tickStart = cputicks()
|
||||
}
|
||||
}
|
||||
|
||||
func (lt *lockTimer) end() {
|
||||
gp := getg()
|
||||
|
||||
if lt.timeStart != 0 {
|
||||
nowTime := nanotime()
|
||||
gp.m.mLockProfile.waitTime.Add((nowTime - lt.timeStart) * lt.timeRate)
|
||||
}
|
||||
|
||||
if lt.tickStart != 0 {
|
||||
nowTick := cputicks()
|
||||
gp.m.mLockProfile.recordLock(nowTick-lt.tickStart, lt.lock)
|
||||
}
|
||||
}
|
||||
|
||||
type mLockProfile struct {
|
||||
waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank
|
||||
stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank
|
||||
pending uintptr // *mutex that experienced contention (to be traceback-ed)
|
||||
cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack"
|
||||
cyclesLost int64 // contention for which we weren't able to record a call stack
|
||||
disabled bool // attribute all time to "lost"
|
||||
}
|
||||
|
||||
func (prof *mLockProfile) recordLock(cycles int64, l *mutex) {
|
||||
if cycles <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if prof.disabled {
|
||||
// We're experiencing contention while attempting to report contention.
|
||||
// Make a note of its magnitude, but don't allow it to be the sole cause
|
||||
// of another contention report.
|
||||
prof.cyclesLost += cycles
|
||||
return
|
||||
}
|
||||
|
||||
if uintptr(unsafe.Pointer(l)) == prof.pending {
|
||||
// Optimization: we'd already planned to profile this same lock (though
|
||||
// possibly from a different unlock site).
|
||||
prof.cycles += cycles
|
||||
return
|
||||
}
|
||||
|
||||
if prev := prof.cycles; prev > 0 {
|
||||
// We can only store one call stack for runtime-internal lock contention
|
||||
// on this M, and we've already got one. Decide which should stay, and
|
||||
// add the other to the report for runtime._LostContendedLock.
|
||||
prevScore := fastrand64() % uint64(prev)
|
||||
thisScore := fastrand64() % uint64(cycles)
|
||||
if prevScore > thisScore {
|
||||
prof.cyclesLost += cycles
|
||||
return
|
||||
} else {
|
||||
prof.cyclesLost += prev
|
||||
}
|
||||
}
|
||||
// Saving the *mutex as a uintptr is safe because:
|
||||
// - lockrank_on.go does this too, which gives it regular exercise
|
||||
// - the lock would only move if it's stack allocated, which means it
|
||||
// cannot experience multi-M contention
|
||||
prof.pending = uintptr(unsafe.Pointer(l))
|
||||
prof.cycles = cycles
|
||||
}
|
||||
|
||||
// From unlock2, we might not be holding a p in this code.
|
||||
//
|
||||
//go:nowritebarrierrec
|
||||
func (prof *mLockProfile) recordUnlock(l *mutex) {
|
||||
if uintptr(unsafe.Pointer(l)) == prof.pending {
|
||||
prof.captureStack()
|
||||
}
|
||||
if gp := getg(); gp.m.locks == 1 && gp.m.mLockProfile.cycles != 0 {
|
||||
prof.store()
|
||||
}
|
||||
}
|
||||
|
||||
func (prof *mLockProfile) captureStack() {
|
||||
skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank
|
||||
if staticLockRanking {
|
||||
// When static lock ranking is enabled, we'll always be on the system
|
||||
// stack at this point. There will be a runtime.unlockWithRank.func1
|
||||
// frame, and if the call to runtime.unlock took place on a user stack
|
||||
// then there'll also be a runtime.systemstack frame. To keep stack
|
||||
// traces somewhat consistent whether or not static lock ranking is
|
||||
// enabled, we'd like to skip those. But it's hard to tell how long
|
||||
// we've been on the system stack so accept an extra frame in that case,
|
||||
// with a leaf of "runtime.unlockWithRank runtime.unlock" instead of
|
||||
// "runtime.unlock".
|
||||
skip += 1 // runtime.unlockWithRank.func1
|
||||
}
|
||||
prof.pending = 0
|
||||
|
||||
if debug.profileruntimelocks.Load() == 0 {
|
||||
prof.stack[0] = abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum
|
||||
prof.stack[1] = 0
|
||||
return
|
||||
}
|
||||
|
||||
var nstk int
|
||||
gp := getg()
|
||||
sp := getcallersp()
|
||||
pc := getcallerpc()
|
||||
systemstack(func() {
|
||||
var u unwinder
|
||||
u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack)
|
||||
nstk = tracebackPCs(&u, skip, prof.stack[:])
|
||||
})
|
||||
if nstk < len(prof.stack) {
|
||||
prof.stack[nstk] = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (prof *mLockProfile) store() {
|
||||
// Report any contention we experience within this function as "lost"; it's
|
||||
// important that the act of reporting a contention event not lead to a
|
||||
// reportable contention event. This also means we can use prof.stack
|
||||
// without copying, since it won't change during this function.
|
||||
mp := acquirem()
|
||||
prof.disabled = true
|
||||
|
||||
nstk := maxStack
|
||||
for i := 0; i < nstk; i++ {
|
||||
if pc := prof.stack[i]; pc == 0 {
|
||||
nstk = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
cycles, lost := prof.cycles, prof.cyclesLost
|
||||
prof.cycles, prof.cyclesLost = 0, 0
|
||||
|
||||
rate := int64(atomic.Load64(&mutexprofilerate))
|
||||
saveBlockEventStack(cycles, rate, prof.stack[:nstk], mutexProfile)
|
||||
if lost > 0 {
|
||||
lostStk := [...]uintptr{
|
||||
abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum,
|
||||
}
|
||||
saveBlockEventStack(lost, rate, lostStk[:], mutexProfile)
|
||||
}
|
||||
|
||||
prof.disabled = false
|
||||
releasem(mp)
|
||||
}
|
||||
|
||||
func saveBlockEventStack(cycles, rate int64, stk []uintptr, which bucketType) {
|
||||
b := stkbucket(which, 0, stk, true)
|
||||
b := stkbucket(which, 0, stk[:nstk], true)
|
||||
bp := b.bp()
|
||||
|
||||
lock(&profBlockLock)
|
||||
|
@ -919,7 +919,7 @@ func mcommoninit(mp *m, id int64) {
|
||||
// when it is just in a register or thread-local storage.
|
||||
mp.alllink = allm
|
||||
|
||||
// NumCgoCall() and others iterate over allm w/o schedlock,
|
||||
// NumCgoCall() iterates over allm w/o schedlock,
|
||||
// so we need to publish it safely.
|
||||
atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
|
||||
unlock(&sched.lock)
|
||||
@ -1852,7 +1852,6 @@ found:
|
||||
unlock(&sched.lock)
|
||||
|
||||
atomic.Xadd64(&ncgocall, int64(mp.ncgocall))
|
||||
sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load())
|
||||
|
||||
// Release the P.
|
||||
handoffp(releasep())
|
||||
@ -5271,7 +5270,6 @@ func _ExternalCode() { _ExternalCode() }
|
||||
func _LostExternalCode() { _LostExternalCode() }
|
||||
func _GC() { _GC() }
|
||||
func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
|
||||
func _LostContendedLock() { _LostContendedLock() }
|
||||
func _VDSO() { _VDSO() }
|
||||
|
||||
// Called if we receive a SIGPROF signal.
|
||||
|
@ -307,27 +307,26 @@ type dbgVar struct {
|
||||
// existing int var for that value, which may
|
||||
// already have an initial value.
|
||||
var debug struct {
|
||||
cgocheck int32
|
||||
clobberfree int32
|
||||
dontfreezetheworld int32
|
||||
efence int32
|
||||
gccheckmark int32
|
||||
gcpacertrace int32
|
||||
gcshrinkstackoff int32
|
||||
gcstoptheworld int32
|
||||
gctrace int32
|
||||
invalidptr int32
|
||||
madvdontneed int32 // for Linux; issue 28466
|
||||
profileruntimelocks atomic.Int32
|
||||
scavtrace int32
|
||||
scheddetail int32
|
||||
schedtrace int32
|
||||
tracebackancestors int32
|
||||
asyncpreemptoff int32
|
||||
harddecommit int32
|
||||
adaptivestackstart int32
|
||||
tracefpunwindoff int32
|
||||
traceadvanceperiod int32
|
||||
cgocheck int32
|
||||
clobberfree int32
|
||||
dontfreezetheworld int32
|
||||
efence int32
|
||||
gccheckmark int32
|
||||
gcpacertrace int32
|
||||
gcshrinkstackoff int32
|
||||
gcstoptheworld int32
|
||||
gctrace int32
|
||||
invalidptr int32
|
||||
madvdontneed int32 // for Linux; issue 28466
|
||||
scavtrace int32
|
||||
scheddetail int32
|
||||
schedtrace int32
|
||||
tracebackancestors int32
|
||||
asyncpreemptoff int32
|
||||
harddecommit int32
|
||||
adaptivestackstart int32
|
||||
tracefpunwindoff int32
|
||||
traceadvanceperiod int32
|
||||
|
||||
// debug.malloc is used as a combined debug check
|
||||
// in the malloc function and should be set
|
||||
@ -353,7 +352,6 @@ var dbgvars = []*dbgVar{
|
||||
{name: "gctrace", value: &debug.gctrace},
|
||||
{name: "invalidptr", value: &debug.invalidptr},
|
||||
{name: "madvdontneed", value: &debug.madvdontneed},
|
||||
{name: "profileruntimelocks", atomic: &debug.profileruntimelocks},
|
||||
{name: "sbrk", value: &debug.sbrk},
|
||||
{name: "scavtrace", value: &debug.scavtrace},
|
||||
{name: "scheddetail", value: &debug.scheddetail},
|
||||
|
@ -593,8 +593,6 @@ type m struct {
|
||||
lockedInt uint32 // tracking for internal lockOSThread
|
||||
nextwaitm muintptr // next m waiting for lock
|
||||
|
||||
mLockProfile mLockProfile // fields relating to runtime.lock contention
|
||||
|
||||
// wait* are used to carry arguments from gopark into park_m, because
|
||||
// there's no stack to put them on. That is their sole purpose.
|
||||
waitunlockf func(*g, unsafe.Pointer) bool
|
||||
@ -902,12 +900,6 @@ type schedt struct {
|
||||
// stwTotalTimeOther covers the others.
|
||||
stwTotalTimeGC timeHistogram
|
||||
stwTotalTimeOther timeHistogram
|
||||
|
||||
// totalRuntimeLockWaitTime (plus the value of lockWaitTime on each M in
|
||||
// allm) is the sum of time goroutines have spent in _Grunnable and with an
|
||||
// M, but waiting for locks within the runtime. This field stores the value
|
||||
// for Ms that have exited.
|
||||
totalRuntimeLockWaitTime atomic.Int64
|
||||
}
|
||||
|
||||
// Values for the flags field of a sigTabT.
|
||||
|
Loading…
Reference in New Issue
Block a user