From 468bc941886ae83b8d16af5deaaf12ea5cabb05b Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 20 Nov 2023 06:27:20 +0000 Subject: [PATCH] Revert "runtime: profile contended lock calls" This reverts commit go.dev/cl/528657. Reason for revert: broke a lot of builders. Change-Id: I70c33062020e997c4df67b3eaa2e886cf0da961e Reviewed-on: https://go-review.googlesource.com/c/go/+/543660 Reviewed-by: Than McIntosh Auto-Submit: Matthew Dempsky LUCI-TryBot-Result: Go LUCI --- src/runtime/debug.go | 11 -- src/runtime/export_test.go | 21 --- src/runtime/extern.go | 8 - src/runtime/lock_futex.go | 6 - src/runtime/lock_sema.go | 4 - src/runtime/metrics.go | 2 +- src/runtime/metrics_test.go | 308 ------------------------------------ src/runtime/mprof.go | 236 +-------------------------- src/runtime/proc.go | 4 +- src/runtime/runtime1.go | 42 +++-- src/runtime/runtime2.go | 8 - 11 files changed, 23 insertions(+), 627 deletions(-) diff --git a/src/runtime/debug.go b/src/runtime/debug.go index 3233ce8ee7..0e61692f3d 100644 --- a/src/runtime/debug.go +++ b/src/runtime/debug.go @@ -52,17 +52,6 @@ func NumCgoCall() int64 { return n } -func totalMutexWaitTimeNanos() int64 { - total := sched.totalMutexWaitTime.Load() - - total += sched.totalRuntimeLockWaitTime.Load() - for mp := (*m)(atomic.Loadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink { - total += mp.mLockProfile.waitTime.Load() - } - - return total -} - // NumGoroutine returns the number of goroutines that currently exist. func NumGoroutine() int { return int(gcount()) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 5f0e24d4f1..d2f3563956 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1340,27 +1340,6 @@ func PageCachePagesLeaked() (leaked uintptr) { return } -type Mutex = mutex - -var Lock = lock -var Unlock = unlock - -func MutexContended(l *mutex) bool { - switch atomic.Loaduintptr(&l.key) { - case 0: // unlocked - return false - case 1: // locked - return false - default: // an M is sleeping - return true - } -} - -func SemRootLock(addr *uint32) *mutex { - root := semtable.rootFor(addr) - return &root.lock -} - var Semacquire = semacquire var Semrelease1 = semrelease1 diff --git a/src/runtime/extern.go b/src/runtime/extern.go index d199720b9b..f8db296e6b 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -145,14 +145,6 @@ It is a comma-separated list of name=val pairs setting these named variables: risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this option for some applications can produce large traces, so use with care. - profileruntimelocks: setting profileruntimelocks=1 includes call stacks related to - contention on runtime-internal locks in the "mutex" profile, subject to the - MutexProfileFraction setting. The call stacks will correspond to the unlock call that - released the lock. But instead of the value corresponding to the amount of contention that - call stack caused, it corresponds to the amount of time the caller of unlock had to wait - in its original call to lock. A future release is expected to align those and remove this - setting. - invalidptr: invalidptr=1 (the default) causes the garbage collector and stack copier to crash the program if an invalid pointer value (for example, 1) is found in a pointer-typed location. Setting invalidptr=0 disables this check. diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go index b4f57d5259..cc7d465ef1 100644 --- a/src/runtime/lock_futex.go +++ b/src/runtime/lock_futex.go @@ -71,8 +71,6 @@ func lock2(l *mutex) { // its wakeup call. wait := v - timer := &lockTimer{lock: l} - timer.begin() // On uniprocessors, no point spinning. // On multiprocessors, spin for ACTIVE_SPIN attempts. spin := 0 @@ -84,7 +82,6 @@ func lock2(l *mutex) { for i := 0; i < spin; i++ { for l.key == mutex_unlocked { if atomic.Cas(key32(&l.key), mutex_unlocked, wait) { - timer.end() return } } @@ -95,7 +92,6 @@ func lock2(l *mutex) { for i := 0; i < passive_spin; i++ { for l.key == mutex_unlocked { if atomic.Cas(key32(&l.key), mutex_unlocked, wait) { - timer.end() return } } @@ -105,7 +101,6 @@ func lock2(l *mutex) { // Sleep. v = atomic.Xchg(key32(&l.key), mutex_sleeping) if v == mutex_unlocked { - timer.end() return } wait = mutex_sleeping @@ -127,7 +122,6 @@ func unlock2(l *mutex) { } gp := getg() - gp.m.mLockProfile.recordUnlock(l) gp.m.locks-- if gp.m.locks < 0 { throw("runtimeĀ·unlock: lock count") diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go index 84cd344db8..9afba08b0b 100644 --- a/src/runtime/lock_sema.go +++ b/src/runtime/lock_sema.go @@ -48,8 +48,6 @@ func lock2(l *mutex) { } semacreate(gp.m) - timer := &lockTimer{lock: l} - timer.begin() // On uniprocessor's, no point spinning. // On multiprocessors, spin for ACTIVE_SPIN attempts. spin := 0 @@ -62,7 +60,6 @@ Loop: if v&locked == 0 { // Unlocked. Try to lock. if atomic.Casuintptr(&l.key, v, v|locked) { - timer.end() return } i = 0 @@ -122,7 +119,6 @@ func unlock2(l *mutex) { } } } - gp.m.mLockProfile.recordUnlock(l) gp.m.locks-- if gp.m.locks < 0 { throw("runtimeĀ·unlock: lock count") diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index f97a3804ab..4bd167135d 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -470,7 +470,7 @@ func initMetrics() { "/sync/mutex/wait/total:seconds": { compute: func(_ *statAggregate, out *metricValue) { out.kind = metricKindFloat64 - out.scalar = float64bits(nsToSec(totalMutexWaitTimeNanos())) + out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load())) }, }, } diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 56f3340b16..1e82897381 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -6,21 +6,15 @@ package runtime_test import ( "bytes" - "fmt" - "internal/goexperiment" - "internal/profile" "os" "reflect" "runtime" "runtime/debug" "runtime/metrics" - "runtime/pprof" "runtime/trace" - "slices" "sort" "strings" "sync" - "sync/atomic" "testing" "time" "unsafe" @@ -945,305 +939,3 @@ func TestSchedPauseMetrics(t *testing.T) { }) } } - -func TestRuntimeLockMetricsAndProfile(t *testing.T) { - old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests - defer runtime.SetMutexProfileFraction(old) - if old != 0 { - t.Fatalf("need MutexProfileRate 0, got %d", old) - } - - { - before := os.Getenv("GODEBUG") - for _, s := range strings.Split(before, ",") { - if strings.HasPrefix(s, "profileruntimelocks=") { - t.Logf("GODEBUG includes explicit setting %q", s) - } - } - defer func() { os.Setenv("GODEBUG", before) }() - os.Setenv("GODEBUG", fmt.Sprintf("%s,profileruntimelocks=1", before)) - } - - loadProfile := func(t *testing.T) *profile.Profile { - var w bytes.Buffer - pprof.Lookup("mutex").WriteTo(&w, 0) - p, err := profile.Parse(&w) - if err != nil { - t.Fatalf("failed to parse profile: %v", err) - } - if err := p.CheckValid(); err != nil { - t.Fatalf("invalid profile: %v", err) - } - return p - } - - measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) { - beforeProfile := loadProfile(t) - beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} - metrics.Read(beforeMetrics) - - fn() - - afterProfile := loadProfile(t) - afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} - metrics.Read(afterMetrics) - - sumSamples := func(p *profile.Profile, i int) int64 { - var sum int64 - for _, s := range p.Sample { - sum += s.Value[i] - } - return sum - } - - metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64() - profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds() - - // The internal/profile package does not support compaction; this delta - // profile will include separate positive and negative entries. - p = afterProfile.Copy() - if len(beforeProfile.Sample) > 0 { - err := p.Merge(beforeProfile, -1) - if err != nil { - t.Fatalf("Merge profiles: %v", err) - } - } - - return metricGrowth, profileGrowth, p - } - - testcase := func(stk []string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { - return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { - metricGrowth, profileGrowth, p := measureDelta(t, func() { - var started, stopped sync.WaitGroup - started.Add(workers) - stopped.Add(workers) - for i := 0; i < workers; i++ { - w := &contentionWorker{ - before: func() { - started.Done() - started.Wait() - }, - after: func() { - stopped.Done() - }, - fn: fn, - } - go w.run() - } - stopped.Wait() - }) - - if profileGrowth == 0 { - t.Errorf("no increase in mutex profile") - } - if metricGrowth == 0 { - t.Errorf("no increase in /sync/mutex/wait/total:seconds metric") - } - // This comparison is possible because the time measurements in support of - // runtime/pprof and runtime/metrics for runtime-internal locks are so close - // together. It doesn't work as well for user-space contention, where the - // involved goroutines are not _Grunnable the whole time and so need to pass - // through the scheduler. - t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth) - t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth) - - if goexperiment.StaticLockRanking { - if !slices.ContainsFunc(stk, func(s string) bool { - return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart" - }) { - // stk is a call stack that is still on the user stack when - // it calls runtime.unlock. Add the extra function that - // we'll see, when the static lock ranking implementation of - // runtime.unlockWithRank switches to the system stack. - stk = append([]string{"runtime.unlockWithRank"}, stk...) - } - } - - var stks [][]string - for _, s := range p.Sample { - var have []string - for _, loc := range s.Location { - for _, line := range loc.Line { - have = append(have, line.Function.Name) - } - } - stks = append(stks, have) - if slices.Equal(have, stk) { - n += s.Value[0] - value += s.Value[1] - } - } - t.Logf("stack %v has samples totaling n=%d value=%d", stk, n, value) - if n == 0 && value == 0 { - t.Logf("profile:\n%s", p) - for _, have := range stks { - t.Logf("have stack %v", have) - } - t.Errorf("want stack %v", stk) - } - - return metricGrowth, profileGrowth, n, value - } - } - - name := t.Name() - - t.Run("runtime.lock", func(t *testing.T) { - mus := make([]runtime.Mutex, 100) - var needContention atomic.Int64 - delay := 10 * time.Microsecond - delayMicros := delay.Microseconds() - - // The goroutine that acquires the lock will only proceed when it - // detects that its partner is contended for the lock. That will lead to - // live-lock if anything (such as a STW) prevents the partner goroutine - // from running. Allowing the contention workers to pause and restart - // (to allow a STW to proceed) makes it harder to confirm that we're - // counting the correct number of contention events, since some locks - // will end up contended twice. Instead, disable the GC. - defer debug.SetGCPercent(debug.SetGCPercent(-1)) - - const workers = 2 - if runtime.GOMAXPROCS(0) < workers { - t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers) - } - - fn := func() bool { - n := int(needContention.Load()) - if n < 0 { - return false - } - mu := &mus[n] - - runtime.Lock(mu) - for int(needContention.Load()) == n { - if runtime.MutexContended(mu) { - // make them wait a little while - for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { - runtime.Usleep(uint32(delayMicros)) - } - break - } - } - runtime.Unlock(mu) - needContention.Store(int64(n - 1)) - - return true - } - - stk := []string{ - "runtime.unlock", - "runtime_test." + name + ".func5.1", - "runtime_test.(*contentionWorker).run", - } - - t.Run("sample-1", func(t *testing.T) { - old := runtime.SetMutexProfileFraction(1) - defer runtime.SetMutexProfileFraction(old) - - needContention.Store(int64(len(mus) - 1)) - metricGrowth, profileGrowth, n, _ := testcase(stk, workers, fn)(t) - - if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want { - // The test imposes a delay with usleep, verified with calls to - // nanotime. Compare against the runtime/metrics package's view - // (based on nanotime) rather than runtime/pprof's view (based - // on cputicks). - t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want) - } - if have, want := n, int64(len(mus)); have != want { - t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want) - } - - const slop = 1.5 // account for nanotime vs cputicks - if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth { - t.Errorf("views differ by more than %fx", slop) - } - }) - - t.Run("sample-2", func(t *testing.T) { - old := runtime.SetMutexProfileFraction(2) - defer runtime.SetMutexProfileFraction(old) - - needContention.Store(int64(len(mus) - 1)) - metricGrowth, profileGrowth, n, _ := testcase(stk, workers, fn)(t) - - // With 100 trials and profile fraction of 2, we expect to capture - // 50 samples. Allow the test to pass if we get at least 20 samples; - // the CDF of the binomial distribution says there's less than a - // 1e-9 chance of that, which is an acceptably low flakiness rate. - const samplingSlop = 2.5 - - if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want { - // The test imposes a delay with usleep, verified with calls to - // nanotime. Compare against the runtime/metrics package's view - // (based on nanotime) rather than runtime/pprof's view (based - // on cputicks). - t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want) - } - if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop { - t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want) - } - - const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling - if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth { - t.Errorf("views differ by more than %fx", timerSlop) - } - }) - }) - - t.Run("runtime.semrelease", func(t *testing.T) { - old := runtime.SetMutexProfileFraction(1) - defer runtime.SetMutexProfileFraction(old) - - const workers = 3 - if runtime.GOMAXPROCS(0) < workers { - t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers) - } - - var sem uint32 = 1 - var sawContention atomic.Int32 - var need int32 = 1000 // counteract low timer resolution by requiring more samples - fn := func() bool { - if sawContention.Load() >= need { - return false - } - runtime.Semacquire(&sem) - runtime.Semrelease1(&sem, false, 0) - if runtime.MutexContended(runtime.SemRootLock(&sem)) { - sawContention.Add(1) - } - return true - } - - stk := []string{ - "runtime.unlock", - "runtime.semrelease1", - "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", - "runtime_test.(*contentionWorker).run", - } - - // Verify that we get call stack we expect, with anything more than zero - // nanoseconds / zero samples. The duration of each contention event is - // too small relative to the expected overhead for us to verify its - // value more directly. Leave that to the explicit lock/unlock test. - - testcase(stk, workers, fn)(t) - }) -} - -// contentionWorker provides cleaner call stacks for lock contention profile tests -type contentionWorker struct { - before func() - fn func() bool - after func() -} - -func (w *contentionWorker) run() { - defer w.after() - w.before() - - for w.fn() { - } -} diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index b1930b3020..af461eef1a 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -234,10 +234,6 @@ func newBucket(typ bucketType, nstk int) *bucket { // stk returns the slice in b holding the stack. func (b *bucket) stk() []uintptr { stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) - if b.nstk > maxStack { - // prove that slicing works; otherwise a failure requires a P - throw("bad profile stack count") - } return stk[:b.nstk:b.nstk] } @@ -513,237 +509,7 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) { } else { nstk = gcallers(gp.m.curg, skip, stk[:]) } - - saveBlockEventStack(cycles, rate, stk[:nstk], which) -} - -// lockTimer assists with profiling contention on runtime-internal locks. -// -// There are several steps between the time that an M experiences contention and -// when that contention may be added to the profile. This comes from our -// constraints: We need to keep the critical section of each lock small, -// especially when those locks are contended. The reporting code cannot acquire -// new locks until the M has released all other locks, which means no memory -// allocations and encourages use of (temporary) M-local storage. -// -// The M will have space for storing one call stack that caused contention, and -// for the magnitude of that contention. It will also have space to store the -// magnitude of additional contention the M caused, since it only has space to -// remember one call stack and might encounter several contention events before -// it releases all of its locks and is thus able to transfer the local buffer -// into the profile. -// -// The M will collect the call stack when it unlocks the contended lock. That -// minimizes the impact on the critical section of the contended lock, and -// matches the mutex profile's behavior for contention in sync.Mutex: measured -// at the Unlock method. -// -// The profile for contention on sync.Mutex blames the caller of Unlock for the -// amount of contention experienced by the callers of Lock which had to wait. -// When there are several critical sections, this allows identifying which of -// them is responsible. -// -// Matching that behavior for runtime-internal locks will require identifying -// which Ms are blocked on the mutex. The semaphore-based implementation is -// ready to allow that, but the futex-based implementation will require a bit -// more work. Until then, we report contention on runtime-internal locks with a -// call stack taken from the unlock call (like the rest of the user-space -// "mutex" profile), but assign it a duration value based on how long the -// previous lock call took (like the user-space "block" profile). -// -// Thus, reporting the call stacks of runtime-internal lock contention is -// guarded by GODEBUG for now. Set GODEBUG=profileruntimelocks=1 to enable. -// -// TODO(rhysh): plumb through the delay duration, remove GODEBUG, update comment -// -// The M will track this by storing a pointer to the lock; lock/unlock pairs for -// runtime-internal locks are always on the same M. -// -// Together, that demands several steps for recording contention. First, when -// finally acquiring a contended lock, the M decides whether it should plan to -// profile that event by storing a pointer to the lock in its "to be profiled -// upon unlock" field. If that field is already set, it uses the relative -// magnitudes to weight a random choice between itself and the other lock, with -// the loser's time being added to the "additional contention" field. Otherwise -// if the M's call stack buffer is occupied, it does the comparison against that -// sample's magnitude. -// -// Second, having unlocked a mutex the M checks to see if it should capture the -// call stack into its local buffer. Finally, when the M unlocks its last mutex, -// it transfers the local buffer into the profile. As part of that step, it also -// transfers any "additional contention" time to the profile. Any lock -// contention that it experiences while adding samples to the profile will be -// recorded later as "additional contention" and not include a call stack, to -// avoid an echo. -type lockTimer struct { - lock *mutex - timeRate int64 - timeStart int64 - tickStart int64 -} - -func (lt *lockTimer) begin() { - rate := int64(atomic.Load64(&mutexprofilerate)) - - lt.timeRate = gTrackingPeriod - if rate != 0 && rate < lt.timeRate { - lt.timeRate = rate - } - if int64(fastrand())%lt.timeRate == 0 { - lt.timeStart = nanotime() - } - - if rate > 0 && int64(fastrand())%rate == 0 { - lt.tickStart = cputicks() - } -} - -func (lt *lockTimer) end() { - gp := getg() - - if lt.timeStart != 0 { - nowTime := nanotime() - gp.m.mLockProfile.waitTime.Add((nowTime - lt.timeStart) * lt.timeRate) - } - - if lt.tickStart != 0 { - nowTick := cputicks() - gp.m.mLockProfile.recordLock(nowTick-lt.tickStart, lt.lock) - } -} - -type mLockProfile struct { - waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank - stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank - pending uintptr // *mutex that experienced contention (to be traceback-ed) - cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack" - cyclesLost int64 // contention for which we weren't able to record a call stack - disabled bool // attribute all time to "lost" -} - -func (prof *mLockProfile) recordLock(cycles int64, l *mutex) { - if cycles <= 0 { - return - } - - if prof.disabled { - // We're experiencing contention while attempting to report contention. - // Make a note of its magnitude, but don't allow it to be the sole cause - // of another contention report. - prof.cyclesLost += cycles - return - } - - if uintptr(unsafe.Pointer(l)) == prof.pending { - // Optimization: we'd already planned to profile this same lock (though - // possibly from a different unlock site). - prof.cycles += cycles - return - } - - if prev := prof.cycles; prev > 0 { - // We can only store one call stack for runtime-internal lock contention - // on this M, and we've already got one. Decide which should stay, and - // add the other to the report for runtime._LostContendedLock. - prevScore := fastrand64() % uint64(prev) - thisScore := fastrand64() % uint64(cycles) - if prevScore > thisScore { - prof.cyclesLost += cycles - return - } else { - prof.cyclesLost += prev - } - } - // Saving the *mutex as a uintptr is safe because: - // - lockrank_on.go does this too, which gives it regular exercise - // - the lock would only move if it's stack allocated, which means it - // cannot experience multi-M contention - prof.pending = uintptr(unsafe.Pointer(l)) - prof.cycles = cycles -} - -// From unlock2, we might not be holding a p in this code. -// -//go:nowritebarrierrec -func (prof *mLockProfile) recordUnlock(l *mutex) { - if uintptr(unsafe.Pointer(l)) == prof.pending { - prof.captureStack() - } - if gp := getg(); gp.m.locks == 1 && gp.m.mLockProfile.cycles != 0 { - prof.store() - } -} - -func (prof *mLockProfile) captureStack() { - skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank - if staticLockRanking { - // When static lock ranking is enabled, we'll always be on the system - // stack at this point. There will be a runtime.unlockWithRank.func1 - // frame, and if the call to runtime.unlock took place on a user stack - // then there'll also be a runtime.systemstack frame. To keep stack - // traces somewhat consistent whether or not static lock ranking is - // enabled, we'd like to skip those. But it's hard to tell how long - // we've been on the system stack so accept an extra frame in that case, - // with a leaf of "runtime.unlockWithRank runtime.unlock" instead of - // "runtime.unlock". - skip += 1 // runtime.unlockWithRank.func1 - } - prof.pending = 0 - - if debug.profileruntimelocks.Load() == 0 { - prof.stack[0] = abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum - prof.stack[1] = 0 - return - } - - var nstk int - gp := getg() - sp := getcallersp() - pc := getcallerpc() - systemstack(func() { - var u unwinder - u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack) - nstk = tracebackPCs(&u, skip, prof.stack[:]) - }) - if nstk < len(prof.stack) { - prof.stack[nstk] = 0 - } -} - -func (prof *mLockProfile) store() { - // Report any contention we experience within this function as "lost"; it's - // important that the act of reporting a contention event not lead to a - // reportable contention event. This also means we can use prof.stack - // without copying, since it won't change during this function. - mp := acquirem() - prof.disabled = true - - nstk := maxStack - for i := 0; i < nstk; i++ { - if pc := prof.stack[i]; pc == 0 { - nstk = i - break - } - } - - cycles, lost := prof.cycles, prof.cyclesLost - prof.cycles, prof.cyclesLost = 0, 0 - - rate := int64(atomic.Load64(&mutexprofilerate)) - saveBlockEventStack(cycles, rate, prof.stack[:nstk], mutexProfile) - if lost > 0 { - lostStk := [...]uintptr{ - abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum, - } - saveBlockEventStack(lost, rate, lostStk[:], mutexProfile) - } - - prof.disabled = false - releasem(mp) -} - -func saveBlockEventStack(cycles, rate int64, stk []uintptr, which bucketType) { - b := stkbucket(which, 0, stk, true) + b := stkbucket(which, 0, stk[:nstk], true) bp := b.bp() lock(&profBlockLock) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 66d7da887e..ea011e3192 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -919,7 +919,7 @@ func mcommoninit(mp *m, id int64) { // when it is just in a register or thread-local storage. mp.alllink = allm - // NumCgoCall() and others iterate over allm w/o schedlock, + // NumCgoCall() iterates over allm w/o schedlock, // so we need to publish it safely. atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) unlock(&sched.lock) @@ -1852,7 +1852,6 @@ found: unlock(&sched.lock) atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) - sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load()) // Release the P. handoffp(releasep()) @@ -5271,7 +5270,6 @@ func _ExternalCode() { _ExternalCode() } func _LostExternalCode() { _LostExternalCode() } func _GC() { _GC() } func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } -func _LostContendedLock() { _LostContendedLock() } func _VDSO() { _VDSO() } // Called if we receive a SIGPROF signal. diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 877d94eef2..489dcdd79c 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -307,27 +307,26 @@ type dbgVar struct { // existing int var for that value, which may // already have an initial value. var debug struct { - cgocheck int32 - clobberfree int32 - dontfreezetheworld int32 - efence int32 - gccheckmark int32 - gcpacertrace int32 - gcshrinkstackoff int32 - gcstoptheworld int32 - gctrace int32 - invalidptr int32 - madvdontneed int32 // for Linux; issue 28466 - profileruntimelocks atomic.Int32 - scavtrace int32 - scheddetail int32 - schedtrace int32 - tracebackancestors int32 - asyncpreemptoff int32 - harddecommit int32 - adaptivestackstart int32 - tracefpunwindoff int32 - traceadvanceperiod int32 + cgocheck int32 + clobberfree int32 + dontfreezetheworld int32 + efence int32 + gccheckmark int32 + gcpacertrace int32 + gcshrinkstackoff int32 + gcstoptheworld int32 + gctrace int32 + invalidptr int32 + madvdontneed int32 // for Linux; issue 28466 + scavtrace int32 + scheddetail int32 + schedtrace int32 + tracebackancestors int32 + asyncpreemptoff int32 + harddecommit int32 + adaptivestackstart int32 + tracefpunwindoff int32 + traceadvanceperiod int32 // debug.malloc is used as a combined debug check // in the malloc function and should be set @@ -353,7 +352,6 @@ var dbgvars = []*dbgVar{ {name: "gctrace", value: &debug.gctrace}, {name: "invalidptr", value: &debug.invalidptr}, {name: "madvdontneed", value: &debug.madvdontneed}, - {name: "profileruntimelocks", atomic: &debug.profileruntimelocks}, {name: "sbrk", value: &debug.sbrk}, {name: "scavtrace", value: &debug.scavtrace}, {name: "scheddetail", value: &debug.scheddetail}, diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 6bdd66766d..e64c3c5695 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -593,8 +593,6 @@ type m struct { lockedInt uint32 // tracking for internal lockOSThread nextwaitm muintptr // next m waiting for lock - mLockProfile mLockProfile // fields relating to runtime.lock contention - // wait* are used to carry arguments from gopark into park_m, because // there's no stack to put them on. That is their sole purpose. waitunlockf func(*g, unsafe.Pointer) bool @@ -902,12 +900,6 @@ type schedt struct { // stwTotalTimeOther covers the others. stwTotalTimeGC timeHistogram stwTotalTimeOther timeHistogram - - // totalRuntimeLockWaitTime (plus the value of lockWaitTime on each M in - // allm) is the sum of time goroutines have spent in _Grunnable and with an - // M, but waiting for locks within the runtime. This field stores the value - // for Ms that have exited. - totalRuntimeLockWaitTime atomic.Int64 } // Values for the flags field of a sigTabT.