mirror of
https://github.com/golang/go
synced 2024-11-23 00:30:07 -07:00
runtime: put ReadMemStats debug assertions behind a double-check mode
ReadMemStats has a few assertions it makes about the consistency of the stats it's about to produce. Specifically, how those stats line up with runtime-internal stats. These checks are generally useful, but crashing just because some stats are wrong is a heavy price to pay. For a long time this wasn't a problem, but very recently it became a real problem. It turns out that there's real benign skew that can happen wherein sysmon (which doesn't synchronize with a STW) generates a trace event when tracing is enabled, and may mutate some stats while ReadMemStats is running its checks. Fix this by synchronizing with both sysmon and the tracer. This is a bit heavy-handed, but better that than false positives. Also, put the checks behind a debug mode. We want to reduce the risk of backporting this change, and again, it's not great to crash just because user-facing stats are off. Still, enable this debug mode during the runtime tests so we don't lose quite as much coverage from disabling these checks by default. Fixes #64401. Change-Id: I9adb3e5c7161d207648d07373a11da8a5f0fda9a Reviewed-on: https://go-review.googlesource.com/c/go/+/545277 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>
This commit is contained in:
parent
5df4a6376f
commit
b2efd1de97
@ -464,6 +464,8 @@ func ReadMetricsSlow(memStats *MemStats, samplesp unsafe.Pointer, len, cap int)
|
||||
startTheWorld(stw)
|
||||
}
|
||||
|
||||
var DoubleCheckReadMemStats = &doubleCheckReadMemStats
|
||||
|
||||
// ReadMemStatsSlow returns both the runtime-computed MemStats and
|
||||
// MemStats accumulated by scanning the heap.
|
||||
func ReadMemStatsSlow() (base, slow MemStats) {
|
||||
|
@ -577,6 +577,11 @@ func TestPageAccounting(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Enable ReadMemStats' double-check mode.
|
||||
*runtime.DoubleCheckReadMemStats = true
|
||||
}
|
||||
|
||||
func TestReadMemStats(t *testing.T) {
|
||||
base, slow := runtime.ReadMemStatsSlow()
|
||||
if base != slow {
|
||||
|
@ -361,6 +361,11 @@ func ReadMemStats(m *MemStats) {
|
||||
startTheWorld(stw)
|
||||
}
|
||||
|
||||
// doubleCheckReadMemStats controls a double-check mode for ReadMemStats that
|
||||
// ensures consistency between the values that ReadMemStats is using and the
|
||||
// runtime-internal stats.
|
||||
var doubleCheckReadMemStats = false
|
||||
|
||||
// readmemstats_m populates stats for internal runtime values.
|
||||
//
|
||||
// The world must be stopped.
|
||||
@ -435,56 +440,65 @@ func readmemstats_m(stats *MemStats) {
|
||||
|
||||
heapGoal := gcController.heapGoal()
|
||||
|
||||
// The world is stopped, so the consistent stats (after aggregation)
|
||||
// should be identical to some combination of memstats. In particular:
|
||||
//
|
||||
// * memstats.heapInUse == inHeap
|
||||
// * memstats.heapReleased == released
|
||||
// * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits
|
||||
// * memstats.totalAlloc == totalAlloc
|
||||
// * memstats.totalFree == totalFree
|
||||
//
|
||||
// Check if that's actually true.
|
||||
//
|
||||
// TODO(mknyszek): Maybe don't throw here. It would be bad if a
|
||||
// bug in otherwise benign accounting caused the whole application
|
||||
// to crash.
|
||||
if gcController.heapInUse.load() != uint64(consStats.inHeap) {
|
||||
print("runtime: heapInUse=", gcController.heapInUse.load(), "\n")
|
||||
print("runtime: consistent value=", consStats.inHeap, "\n")
|
||||
throw("heapInUse and consistent stats are not equal")
|
||||
}
|
||||
if gcController.heapReleased.load() != uint64(consStats.released) {
|
||||
print("runtime: heapReleased=", gcController.heapReleased.load(), "\n")
|
||||
print("runtime: consistent value=", consStats.released, "\n")
|
||||
throw("heapReleased and consistent stats are not equal")
|
||||
}
|
||||
heapRetained := gcController.heapInUse.load() + gcController.heapFree.load()
|
||||
consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
|
||||
if heapRetained != consRetained {
|
||||
print("runtime: global value=", heapRetained, "\n")
|
||||
print("runtime: consistent value=", consRetained, "\n")
|
||||
throw("measures of the retained heap are not equal")
|
||||
}
|
||||
if gcController.totalAlloc.Load() != totalAlloc {
|
||||
print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n")
|
||||
print("runtime: consistent value=", totalAlloc, "\n")
|
||||
throw("totalAlloc and consistent stats are not equal")
|
||||
}
|
||||
if gcController.totalFree.Load() != totalFree {
|
||||
print("runtime: totalFree=", gcController.totalFree.Load(), "\n")
|
||||
print("runtime: consistent value=", totalFree, "\n")
|
||||
throw("totalFree and consistent stats are not equal")
|
||||
}
|
||||
// Also check that mappedReady lines up with totalMapped - released.
|
||||
// This isn't really the same type of "make sure consistent stats line up" situation,
|
||||
// but this is an opportune time to check.
|
||||
if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) {
|
||||
print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n")
|
||||
print("runtime: totalMapped=", totalMapped, "\n")
|
||||
print("runtime: released=", uint64(consStats.released), "\n")
|
||||
print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n")
|
||||
throw("mappedReady and other memstats are not equal")
|
||||
if doubleCheckReadMemStats {
|
||||
// Only check this if we're debugging. It would be bad to crash an application
|
||||
// just because the debugging stats are wrong. We mostly rely on tests to catch
|
||||
// these issues, and we enable the double check mode for tests.
|
||||
//
|
||||
// The world is stopped, so the consistent stats (after aggregation)
|
||||
// should be identical to some combination of memstats. In particular:
|
||||
//
|
||||
// * memstats.heapInUse == inHeap
|
||||
// * memstats.heapReleased == released
|
||||
// * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits
|
||||
// * memstats.totalAlloc == totalAlloc
|
||||
// * memstats.totalFree == totalFree
|
||||
//
|
||||
// Check if that's actually true.
|
||||
//
|
||||
// Prevent sysmon and the tracer from skewing the stats since they can
|
||||
// act without synchronizing with a STW. See #64401.
|
||||
lock(&sched.sysmonlock)
|
||||
lock(&trace.lock)
|
||||
if gcController.heapInUse.load() != uint64(consStats.inHeap) {
|
||||
print("runtime: heapInUse=", gcController.heapInUse.load(), "\n")
|
||||
print("runtime: consistent value=", consStats.inHeap, "\n")
|
||||
throw("heapInUse and consistent stats are not equal")
|
||||
}
|
||||
if gcController.heapReleased.load() != uint64(consStats.released) {
|
||||
print("runtime: heapReleased=", gcController.heapReleased.load(), "\n")
|
||||
print("runtime: consistent value=", consStats.released, "\n")
|
||||
throw("heapReleased and consistent stats are not equal")
|
||||
}
|
||||
heapRetained := gcController.heapInUse.load() + gcController.heapFree.load()
|
||||
consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
|
||||
if heapRetained != consRetained {
|
||||
print("runtime: global value=", heapRetained, "\n")
|
||||
print("runtime: consistent value=", consRetained, "\n")
|
||||
throw("measures of the retained heap are not equal")
|
||||
}
|
||||
if gcController.totalAlloc.Load() != totalAlloc {
|
||||
print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n")
|
||||
print("runtime: consistent value=", totalAlloc, "\n")
|
||||
throw("totalAlloc and consistent stats are not equal")
|
||||
}
|
||||
if gcController.totalFree.Load() != totalFree {
|
||||
print("runtime: totalFree=", gcController.totalFree.Load(), "\n")
|
||||
print("runtime: consistent value=", totalFree, "\n")
|
||||
throw("totalFree and consistent stats are not equal")
|
||||
}
|
||||
// Also check that mappedReady lines up with totalMapped - released.
|
||||
// This isn't really the same type of "make sure consistent stats line up" situation,
|
||||
// but this is an opportune time to check.
|
||||
if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) {
|
||||
print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n")
|
||||
print("runtime: totalMapped=", totalMapped, "\n")
|
||||
print("runtime: released=", uint64(consStats.released), "\n")
|
||||
print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n")
|
||||
throw("mappedReady and other memstats are not equal")
|
||||
}
|
||||
unlock(&trace.lock)
|
||||
unlock(&sched.sysmonlock)
|
||||
}
|
||||
|
||||
// We've calculated all the values we need. Now, populate stats.
|
||||
|
Loading…
Reference in New Issue
Block a user