1
0
mirror of https://github.com/golang/go synced 2024-10-02 08:18:32 -06:00
go/src/runtime/mstats.go

690 lines
23 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Memory statistics
package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
// Statistics.
// If you edit this structure, also edit type MemStats below.
// Their layouts must match exactly.
//
// For detailed descriptions see the documentation for MemStats.
// Fields that differ from MemStats are further documented here.
//
// Many of these fields are updated on the fly, while others are only
// updated when updatememstats is called.
type mstats struct {
// General statistics.
alloc uint64 // bytes allocated and not yet freed
total_alloc uint64 // bytes allocated (even if freed)
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
nlookup uint64 // number of pointer lookups
nmalloc uint64 // number of mallocs
nfree uint64 // number of frees
// Statistics about malloc heap.
// Protected by mheap.lock
//
// Like MemStats, heap_sys and heap_inuse do not count memory
// in manually-managed spans.
heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above)
heap_sys uint64 // virtual address space obtained from system for GC'd heap
heap_idle uint64 // bytes in idle spans
heap_inuse uint64 // bytes in _MSpanInUse spans
heap_released uint64 // bytes released to the os
heap_objects uint64 // total number of allocated objects
// TODO(austin): heap_released is both useless and inaccurate
// in its current form. It's useless because, from the user's
// and OS's perspectives, there's no difference between a page
// that has not yet been faulted in and a page that has been
// released back to the OS. We could fix this by considering
// newly mapped spans to be "released". It's inaccurate
// because when we split a large span for allocation, we
// "unrelease" all pages in the large span and not just the
// ones we split off for use. This is trickier to fix because
// we currently don't know which pages of a span we've
// released. We could fix it by separating "free" and
// "released" spans, but then we have to allocate from runs of
// free and released spans.
// Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks.
stacks_inuse uint64 // bytes in manually-managed stack spans
stacks_sys uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
mspan_inuse uint64 // mspan structures
mspan_sys uint64
mcache_inuse uint64 // mcache structures
mcache_sys uint64
buckhash_sys uint64 // profiling bucket hash table
gc_sys uint64
other_sys uint64
// Statistics about garbage collector.
// Protected by mheap or stopping the world during GC.
next_gc uint64 // goal heap_live for when next GC ends; ^0 if disabled
last_gc_unix uint64 // last gc (in unix time)
pause_total_ns uint64
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
numgc uint32
numforcedgc uint32 // number of user-forced GCs
gc_cpu_fraction float64 // fraction of CPU time used by GC
enablegc bool
debuggc bool
// Statistics about allocation size classes.
by_size [_NumSizeClasses]struct {
size uint32
nmalloc uint64
nfree uint64
}
// Statistics below here are not exported to MemStats directly.
runtime: introduce heap_live; replace use of heap_alloc in GC Currently there are two main consumers of memstats.heap_alloc: updatememstats (aka ReadMemStats) and shouldtriggergc. updatememstats recomputes heap_alloc from the ground up, so we don't need to keep heap_alloc up to date for it. shouldtriggergc wants to know how many bytes were marked by the previous GC plus how many bytes have been allocated since then, but this *isn't* what heap_alloc tracks. heap_alloc also includes objects that are not marked and haven't yet been swept. Introduce a new memstat called heap_live that actually tracks what shouldtriggergc wants to know and stop keeping heap_alloc up to date. Unlike heap_alloc, heap_live follows a simple sawtooth that drops during each mark termination and increases monotonically between GCs. heap_alloc, on the other hand, has much more complicated behavior: it may drop during sweep termination, slowly decreases from background sweeping between GCs, is roughly unaffected by allocation as long as there are unswept spans (because we sweep and allocate at the same rate), and may go up after background sweeping is done depending on the GC trigger. heap_live simplifies computing next_gc and using it to figure out when to trigger garbage collection. Currently, we guess next_gc at the end of a cycle and update it as we sweep and get a better idea of how much heap was marked. Now, since we're directly tracking how much heap is marked, we can directly compute next_gc. This also corrects bugs that could cause us to trigger GC early. Currently, in any case where sweep termination actually finds spans to sweep, heap_alloc is an overestimation of live heap, so we'll trigger GC too early. heap_live, on the other hand, is unaffected by sweeping. Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388 Reviewed-on: https://go-review.googlesource.com/8389 Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
last_gc_nanotime uint64 // last gc (monotonic time)
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
runtime: introduce heap_live; replace use of heap_alloc in GC Currently there are two main consumers of memstats.heap_alloc: updatememstats (aka ReadMemStats) and shouldtriggergc. updatememstats recomputes heap_alloc from the ground up, so we don't need to keep heap_alloc up to date for it. shouldtriggergc wants to know how many bytes were marked by the previous GC plus how many bytes have been allocated since then, but this *isn't* what heap_alloc tracks. heap_alloc also includes objects that are not marked and haven't yet been swept. Introduce a new memstat called heap_live that actually tracks what shouldtriggergc wants to know and stop keeping heap_alloc up to date. Unlike heap_alloc, heap_live follows a simple sawtooth that drops during each mark termination and increases monotonically between GCs. heap_alloc, on the other hand, has much more complicated behavior: it may drop during sweep termination, slowly decreases from background sweeping between GCs, is roughly unaffected by allocation as long as there are unswept spans (because we sweep and allocate at the same rate), and may go up after background sweeping is done depending on the GC trigger. heap_live simplifies computing next_gc and using it to figure out when to trigger garbage collection. Currently, we guess next_gc at the end of a cycle and update it as we sweep and get a better idea of how much heap was marked. Now, since we're directly tracking how much heap is marked, we can directly compute next_gc. This also corrects bugs that could cause us to trigger GC early. Currently, in any case where sweep termination actually finds spans to sweep, heap_alloc is an overestimation of live heap, so we'll trigger GC too early. heap_live, on the other hand, is unaffected by sweeping. Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388 Reviewed-on: https://go-review.googlesource.com/8389 Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
// triggerRatio is the heap growth ratio that triggers marking.
//
// E.g., if this is 0.6, then GC should start when the live
// heap has reached 1.6 times the heap size marked by the
// previous cycle. This should be ≤ GOGC/100 so the trigger
// heap size is less than the goal heap size. This is set
// during mark termination for the next cycle's trigger.
triggerRatio float64
// gc_trigger is the heap size that triggers marking.
//
// When heap_live ≥ gc_trigger, the mark phase will start.
// This is also the heap size by which proportional sweeping
// must be complete.
//
// This is computed from triggerRatio during mark termination
// for the next cycle's trigger.
gc_trigger uint64
runtime: introduce heap_live; replace use of heap_alloc in GC Currently there are two main consumers of memstats.heap_alloc: updatememstats (aka ReadMemStats) and shouldtriggergc. updatememstats recomputes heap_alloc from the ground up, so we don't need to keep heap_alloc up to date for it. shouldtriggergc wants to know how many bytes were marked by the previous GC plus how many bytes have been allocated since then, but this *isn't* what heap_alloc tracks. heap_alloc also includes objects that are not marked and haven't yet been swept. Introduce a new memstat called heap_live that actually tracks what shouldtriggergc wants to know and stop keeping heap_alloc up to date. Unlike heap_alloc, heap_live follows a simple sawtooth that drops during each mark termination and increases monotonically between GCs. heap_alloc, on the other hand, has much more complicated behavior: it may drop during sweep termination, slowly decreases from background sweeping between GCs, is roughly unaffected by allocation as long as there are unswept spans (because we sweep and allocate at the same rate), and may go up after background sweeping is done depending on the GC trigger. heap_live simplifies computing next_gc and using it to figure out when to trigger garbage collection. Currently, we guess next_gc at the end of a cycle and update it as we sweep and get a better idea of how much heap was marked. Now, since we're directly tracking how much heap is marked, we can directly compute next_gc. This also corrects bugs that could cause us to trigger GC early. Currently, in any case where sweep termination actually finds spans to sweep, heap_alloc is an overestimation of live heap, so we'll trigger GC too early. heap_live, on the other hand, is unaffected by sweeping. Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388 Reviewed-on: https://go-review.googlesource.com/8389 Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
// heap_live is the number of bytes considered live by the GC.
// That is: retained by the most recent GC plus allocated
runtime: fix (sometimes major) underestimation of heap_live Currently, we update memstats.heap_live from mcache.local_cachealloc whenever we lock the heap (e.g., to obtain a fresh span or to release an unused span). However, under the right circumstances, local_cachealloc can accumulate allocations up to the size of the *entire heap* without flushing them to heap_live. Specifically, since span allocations from an mcentral don't lock the heap, if a large number of pages are held in an mcentral and the application continues to use and free objects of that size class (e.g., the BinaryTree17 benchmark), local_cachealloc won't be flushed until the mcentral runs out of spans. This is a problem because, unlike many of the memory statistics that are purely informative, heap_live is used to determine when the garbage collector should start and how hard it should work. This commit eliminates local_cachealloc, instead atomically updating heap_live directly. To control contention, we do this only when obtaining a span from an mcentral. Furthermore, we make heap_live conservative: allocating a span assumes that all free slots in that span will be used and accounts for these when the span is allocated, *before* the objects themselves are. This is important because 1) this triggers the GC earlier than necessary rather than potentially too late and 2) this leads to a conservative GC rate rather than a GC rate that is potentially too low. Alternatively, we could have flushed local_cachealloc when it passed some threshold, but this would require determining a threshold and would cause heap_live to underestimate the true value rather than overestimate. Fixes #12199. name old time/op new time/op delta BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19) FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14) FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18) FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18) FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18) FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19) FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19) FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18) GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19) GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19) Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20) Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19) HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19) JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19) JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19) Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18) GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19) RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18) RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18) RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19) RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19) RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20) RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19) RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17) RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19) Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17) Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20) TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20) TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19) [Geo mean] 62.0µs 61.8µs -0.44% name old time/op new time/op delta XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18) Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0 Reviewed-on: https://go-review.googlesource.com/17748 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
// since then. heap_live <= heap_alloc, since heap_alloc
// includes unmarked objects that have not yet been swept (and
// hence goes up as we allocate and down as we sweep) while
// heap_live excludes these objects (and hence only goes up
// between GCs).
//
// This is updated atomically without locking. To reduce
// contention, this is updated only when obtaining a span from
// an mcentral and at this point it counts all of the
// unallocated slots in that span (which will be allocated
// before that mcache obtains another span from that
// mcentral). Hence, it slightly overestimates the "true" live
// heap size. It's better to overestimate than to
// underestimate because 1) this triggers the GC earlier than
// necessary rather than potentially too late and 2) this
// leads to a conservative GC rate rather than a GC rate that
// is potentially too low.
//
// Reads should likewise be atomic (or during STW).
//
runtime: fix (sometimes major) underestimation of heap_live Currently, we update memstats.heap_live from mcache.local_cachealloc whenever we lock the heap (e.g., to obtain a fresh span or to release an unused span). However, under the right circumstances, local_cachealloc can accumulate allocations up to the size of the *entire heap* without flushing them to heap_live. Specifically, since span allocations from an mcentral don't lock the heap, if a large number of pages are held in an mcentral and the application continues to use and free objects of that size class (e.g., the BinaryTree17 benchmark), local_cachealloc won't be flushed until the mcentral runs out of spans. This is a problem because, unlike many of the memory statistics that are purely informative, heap_live is used to determine when the garbage collector should start and how hard it should work. This commit eliminates local_cachealloc, instead atomically updating heap_live directly. To control contention, we do this only when obtaining a span from an mcentral. Furthermore, we make heap_live conservative: allocating a span assumes that all free slots in that span will be used and accounts for these when the span is allocated, *before* the objects themselves are. This is important because 1) this triggers the GC earlier than necessary rather than potentially too late and 2) this leads to a conservative GC rate rather than a GC rate that is potentially too low. Alternatively, we could have flushed local_cachealloc when it passed some threshold, but this would require determining a threshold and would cause heap_live to underestimate the true value rather than overestimate. Fixes #12199. name old time/op new time/op delta BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19) FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14) FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18) FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18) FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18) FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19) FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19) FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18) GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19) GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19) Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20) Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19) HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19) JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19) JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19) Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18) GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19) RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18) RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18) RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19) RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19) RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20) RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19) RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17) RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19) Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17) Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20) TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20) TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19) [Geo mean] 62.0µs 61.8µs -0.44% name old time/op new time/op delta XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18) Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0 Reviewed-on: https://go-review.googlesource.com/17748 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
// Whenever this is updated, call traceHeapAlloc() and
// gcController.revise().
runtime: introduce heap_live; replace use of heap_alloc in GC Currently there are two main consumers of memstats.heap_alloc: updatememstats (aka ReadMemStats) and shouldtriggergc. updatememstats recomputes heap_alloc from the ground up, so we don't need to keep heap_alloc up to date for it. shouldtriggergc wants to know how many bytes were marked by the previous GC plus how many bytes have been allocated since then, but this *isn't* what heap_alloc tracks. heap_alloc also includes objects that are not marked and haven't yet been swept. Introduce a new memstat called heap_live that actually tracks what shouldtriggergc wants to know and stop keeping heap_alloc up to date. Unlike heap_alloc, heap_live follows a simple sawtooth that drops during each mark termination and increases monotonically between GCs. heap_alloc, on the other hand, has much more complicated behavior: it may drop during sweep termination, slowly decreases from background sweeping between GCs, is roughly unaffected by allocation as long as there are unswept spans (because we sweep and allocate at the same rate), and may go up after background sweeping is done depending on the GC trigger. heap_live simplifies computing next_gc and using it to figure out when to trigger garbage collection. Currently, we guess next_gc at the end of a cycle and update it as we sweep and get a better idea of how much heap was marked. Now, since we're directly tracking how much heap is marked, we can directly compute next_gc. This also corrects bugs that could cause us to trigger GC early. Currently, in any case where sweep termination actually finds spans to sweep, heap_alloc is an overestimation of live heap, so we'll trigger GC too early. heap_live, on the other hand, is unaffected by sweeping. Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388 Reviewed-on: https://go-review.googlesource.com/8389 Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
heap_live uint64
// heap_scan is the number of bytes of "scannable" heap. This
// is the live heap (as counted by heap_live), but omitting
// no-scan objects and no-scan tails of objects.
runtime: fix (sometimes major) underestimation of heap_live Currently, we update memstats.heap_live from mcache.local_cachealloc whenever we lock the heap (e.g., to obtain a fresh span or to release an unused span). However, under the right circumstances, local_cachealloc can accumulate allocations up to the size of the *entire heap* without flushing them to heap_live. Specifically, since span allocations from an mcentral don't lock the heap, if a large number of pages are held in an mcentral and the application continues to use and free objects of that size class (e.g., the BinaryTree17 benchmark), local_cachealloc won't be flushed until the mcentral runs out of spans. This is a problem because, unlike many of the memory statistics that are purely informative, heap_live is used to determine when the garbage collector should start and how hard it should work. This commit eliminates local_cachealloc, instead atomically updating heap_live directly. To control contention, we do this only when obtaining a span from an mcentral. Furthermore, we make heap_live conservative: allocating a span assumes that all free slots in that span will be used and accounts for these when the span is allocated, *before* the objects themselves are. This is important because 1) this triggers the GC earlier than necessary rather than potentially too late and 2) this leads to a conservative GC rate rather than a GC rate that is potentially too low. Alternatively, we could have flushed local_cachealloc when it passed some threshold, but this would require determining a threshold and would cause heap_live to underestimate the true value rather than overestimate. Fixes #12199. name old time/op new time/op delta BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19) FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14) FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18) FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18) FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18) FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19) FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19) FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18) GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19) GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19) Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20) Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19) HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19) JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19) JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19) Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18) GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19) RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18) RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18) RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19) RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19) RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20) RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19) RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17) RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19) Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17) Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20) TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20) TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19) [Geo mean] 62.0µs 61.8µs -0.44% name old time/op new time/op delta XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18) Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0 Reviewed-on: https://go-review.googlesource.com/17748 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
//
// Whenever this is updated, call gcController.revise().
heap_scan uint64
// heap_marked is the number of bytes marked by the previous
// GC. After mark termination, heap_live == heap_marked, but
// unlike heap_live, heap_marked does not change until the
// next mark termination.
heap_marked uint64
}
var memstats mstats
// A MemStats records statistics about the memory allocator.
type MemStats struct {
// General statistics.
// Alloc is bytes of allocated heap objects.
//
// This is the same as HeapAlloc (see below).
Alloc uint64
// TotalAlloc is cumulative bytes allocated for heap objects.
//
// TotalAlloc increases as heap objects are allocated, but
// unlike Alloc and HeapAlloc, it does not decrease when
// objects are freed.
TotalAlloc uint64
// Sys is the total bytes of memory obtained from the OS.
//
// Sys is the sum of the XSys fields below. Sys measures the
// virtual address space reserved by the Go runtime for the
// heap, stacks, and other internal data structures. It's
// likely that not all of the virtual address space is backed
// by physical memory at any given moment, though in general
// it all was at some point.
Sys uint64
// Lookups is the number of pointer lookups performed by the
// runtime.
//
// This is primarily useful for debugging runtime internals.
Lookups uint64
// Mallocs is the cumulative count of heap objects allocated.
// The number of live objects is Mallocs - Frees.
Mallocs uint64
// Frees is the cumulative count of heap objects freed.
Frees uint64
// Heap memory statistics.
//
// Interpreting the heap statistics requires some knowledge of
// how Go organizes memory. Go divides the virtual address
// space of the heap into "spans", which are contiguous
// regions of memory 8K or larger. A span may be in one of
// three states:
//
// An "idle" span contains no objects or other data. The
// physical memory backing an idle span can be released back
// to the OS (but the virtual address space never is), or it
// can be converted into an "in use" or "stack" span.
//
// An "in use" span contains at least one heap object and may
// have free space available to allocate more heap objects.
//
// A "stack" span is used for goroutine stacks. Stack spans
// are not considered part of the heap. A span can change
// between heap and stack memory; it is never used for both
// simultaneously.
// HeapAlloc is bytes of allocated heap objects.
//
// "Allocated" heap objects include all reachable objects, as
// well as unreachable objects that the garbage collector has
// not yet freed. Specifically, HeapAlloc increases as heap
// objects are allocated and decreases as the heap is swept
// and unreachable objects are freed. Sweeping occurs
// incrementally between GC cycles, so these two processes
// occur simultaneously, and as a result HeapAlloc tends to
// change smoothly (in contrast with the sawtooth that is
// typical of stop-the-world garbage collectors).
HeapAlloc uint64
// HeapSys is bytes of heap memory obtained from the OS.
//
// HeapSys measures the amount of virtual address space
// reserved for the heap. This includes virtual address space
// that has been reserved but not yet used, which consumes no
// physical memory, but tends to be small, as well as virtual
// address space for which the physical memory has been
// returned to the OS after it became unused (see HeapReleased
// for a measure of the latter).
//
// HeapSys estimates the largest size the heap has had.
HeapSys uint64
// HeapIdle is bytes in idle (unused) spans.
//
// Idle spans have no objects in them. These spans could be
// (and may already have been) returned to the OS, or they can
// be reused for heap allocations, or they can be reused as
// stack memory.
//
// HeapIdle minus HeapReleased estimates the amount of memory
// that could be returned to the OS, but is being retained by
// the runtime so it can grow the heap without requesting more
// memory from the OS. If this difference is significantly
// larger than the heap size, it indicates there was a recent
// transient spike in live heap size.
HeapIdle uint64
// HeapInuse is bytes in in-use spans.
//
// In-use spans have at least one object in them. These spans
// can only be used for other objects of roughly the same
// size.
//
// HeapInuse minus HeapAlloc esimates the amount of memory
// that has been dedicated to particular size classes, but is
// not currently being used. This is an upper bound on
// fragmentation, but in general this memory can be reused
// efficiently.
HeapInuse uint64
// HeapReleased is bytes of physical memory returned to the OS.
//
// This counts heap memory from idle spans that was returned
// to the OS and has not yet been reacquired for the heap.
HeapReleased uint64
// HeapObjects is the number of allocated heap objects.
//
// Like HeapAlloc, this increases as objects are allocated and
// decreases as the heap is swept and unreachable objects are
// freed.
HeapObjects uint64
// Stack memory statistics.
//
// Stacks are not considered part of the heap, but the runtime
// can reuse a span of heap memory for stack memory, and
// vice-versa.
// StackInuse is bytes in stack spans.
//
// In-use stack spans have at least one stack in them. These
// spans can only be used for other stacks of the same size.
//
// There is no StackIdle because unused stack spans are
// returned to the heap (and hence counted toward HeapIdle).
StackInuse uint64
// StackSys is bytes of stack memory obtained from the OS.
//
// StackSys is StackInuse, plus any memory obtained directly
// from the OS for OS thread stacks (which should be minimal).
StackSys uint64
// Off-heap memory statistics.
//
// The following statistics measure runtime-internal
// structures that are not allocated from heap memory (usually
// because they are part of implementing the heap). Unlike
// heap or stack memory, any memory allocated to these
// structures is dedicated to these structures.
//
// These are primarily useful for debugging runtime memory
// overheads.
// MSpanInuse is bytes of allocated mspan structures.
MSpanInuse uint64
// MSpanSys is bytes of memory obtained from the OS for mspan
// structures.
MSpanSys uint64
// MCacheInuse is bytes of allocated mcache structures.
MCacheInuse uint64
// MCacheSys is bytes of memory obtained from the OS for
// mcache structures.
MCacheSys uint64
// BuckHashSys is bytes of memory in profiling bucket hash tables.
BuckHashSys uint64
// GCSys is bytes of memory in garbage collection metadata.
GCSys uint64
// OtherSys is bytes of memory in miscellaneous off-heap
// runtime allocations.
OtherSys uint64
// Garbage collector statistics.
// NextGC is the target heap size of the next GC cycle.
//
// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
// At the end of each GC cycle, the target for the next cycle
// is computed based on the amount of reachable data and the
// value of GOGC.
NextGC uint64
// LastGC is the time the last garbage collection finished, as
// nanoseconds since 1970 (the UNIX epoch).
LastGC uint64
// PauseTotalNs is the cumulative nanoseconds in GC
// stop-the-world pauses since the program started.
//
// During a stop-the-world pause, all goroutines are paused
// and only the garbage collector can run.
PauseTotalNs uint64
// PauseNs is a circular buffer of recent GC stop-the-world
// pause times in nanoseconds.
//
// The most recent pause is at PauseNs[(NumGC+255)%256]. In
// general, PauseNs[N%256] records the time paused in the most
// recent N%256th GC cycle. There may be multiple pauses per
// GC cycle; this is the sum of all pauses during a cycle.
PauseNs [256]uint64
// PauseEnd is a circular buffer of recent GC pause end times,
// as nanoseconds since 1970 (the UNIX epoch).
//
// This buffer is filled the same way as PauseNs. There may be
// multiple pauses per GC cycle; this records the end of the
// last pause in a cycle.
PauseEnd [256]uint64
// NumGC is the number of completed GC cycles.
NumGC uint32
// NumForcedGC is the number of GC cycles that were forced by
// the application calling the GC function.
NumForcedGC uint32
// GCCPUFraction is the fraction of this program's available
// CPU time used by the GC since the program started.
//
// GCCPUFraction is expressed as a number between 0 and 1,
// where 0 means GC has consumed none of this program's CPU. A
// program's available CPU time is defined as the integral of
// GOMAXPROCS since the program started. That is, if
// GOMAXPROCS is 2 and a program has been running for 10
// seconds, its "available CPU" is 20 seconds. GCCPUFraction
// does not include CPU time used for write barrier activity.
//
// This is the same as the fraction of CPU reported by
// GODEBUG=gctrace=1.
GCCPUFraction float64
// EnableGC indicates that GC is enabled. It is always true,
// even if GOGC=off.
EnableGC bool
// DebugGC is currently unused.
DebugGC bool
// BySize reports per-size class allocation statistics.
//
// BySize[N] gives statistics for allocations of size S where
// BySize[N-1].Size < S ≤ BySize[N].Size.
//
// This does not report allocations larger than BySize[60].Size.
BySize [61]struct {
// Size is the maximum byte size of an object in this
// size class.
Size uint32
// Mallocs is the cumulative count of heap objects
// allocated in this size class. The cumulative bytes
// of allocation is Size*Mallocs. The number of live
// objects in this size class is Mallocs - Frees.
Mallocs uint64
// Frees is the cumulative count of heap objects freed
// in this size class.
Frees uint64
}
}
// Size of the trailing by_size array differs between mstats and MemStats,
// and all data after by_size is local to runtime, not exported.
// NumSizeClasses was changed, but we cannot change MemStats because of backward compatibility.
// sizeof_C_MStats is the size of the prefix of mstats that
// corresponds to MemStats. It should match Sizeof(MemStats{}).
var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
func init() {
var memStats MemStats
if sizeof_C_MStats != unsafe.Sizeof(memStats) {
println(sizeof_C_MStats, unsafe.Sizeof(memStats))
throw("MStats vs MemStatsType size mismatch")
}
if unsafe.Offsetof(memstats.heap_live)%8 != 0 {
println(unsafe.Offsetof(memstats.heap_live))
throw("memstats.heap_live not aligned to 8 bytes")
}
}
// ReadMemStats populates m with memory allocator statistics.
//
// The returned memory allocator statistics are up to date as of the
// call to ReadMemStats. This is in contrast with a heap profile,
// which is a snapshot as of the most recently completed garbage
// collection cycle.
func ReadMemStats(m *MemStats) {
stopTheWorld("read mem stats")
systemstack(func() {
readmemstats_m(m)
})
startTheWorld()
}
func readmemstats_m(stats *MemStats) {
updatememstats()
// The size of the trailing by_size array differs between
// mstats and MemStats. NumSizeClasses was changed, but we
// cannot change MemStats because of backward compatibility.
memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
// memstats.stacks_sys is only memory mapped directly for OS stacks.
// Add in heap-allocated stack memory for user consumption.
stats.StackSys += stats.StackInuse
}
//go:linkname readGCStats runtime/debug.readGCStats
func readGCStats(pauses *[]uint64) {
systemstack(func() {
readGCStats_m(pauses)
})
}
func readGCStats_m(pauses *[]uint64) {
p := *pauses
// Calling code in runtime/debug should make the slice large enough.
if cap(p) < len(memstats.pause_ns)+3 {
throw("short slice passed to readGCStats")
}
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
lock(&mheap_.lock)
n := memstats.numgc
if n > uint32(len(memstats.pause_ns)) {
n = uint32(len(memstats.pause_ns))
}
// The pause buffer is circular. The most recent pause is at
// pause_ns[(numgc-1)%len(pause_ns)], and then backward
// from there to go back farther in time. We deliver the times
// most recent first (in p[0]).
p = p[:cap(p)]
for i := uint32(0); i < n; i++ {
j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
p[i] = memstats.pause_ns[j]
p[n+i] = memstats.pause_end[j]
}
p[n+n] = memstats.last_gc_unix
p[n+n+1] = uint64(memstats.numgc)
p[n+n+2] = memstats.pause_total_ns
unlock(&mheap_.lock)
*pauses = p[:n+n+3]
}
//go:nowritebarrier
func updatememstats() {
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
// We also count stacks_inuse as sys memory.
memstats.sys += memstats.stacks_inuse
// Calculate memory allocator stats.
// During program execution we only count number of frees and amount of freed memory.
// Current number of alive object in the heap and amount of alive heap memory
// are calculated by scanning all spans.
// Total number of mallocs is calculated as number of frees plus number of alive objects.
// Similarly, total amount of allocated memory is calculated as amount of freed memory
// plus amount of alive heap memory.
memstats.alloc = 0
memstats.total_alloc = 0
memstats.nmalloc = 0
memstats.nfree = 0
for i := 0; i < len(memstats.by_size); i++ {
memstats.by_size[i].nmalloc = 0
memstats.by_size[i].nfree = 0
}
// Flush MCache's to MCentral.
systemstack(flushallmcaches)
// Aggregate local stats.
cachestats()
// Collect allocation stats. This is safe and consistent
// because the world is stopped.
var smallFree, totalAlloc, totalFree uint64
// Collect per-spanclass stats.
for spc := range mheap_.central {
// The mcaches are now empty, so mcentral stats are
// up-to-date.
c := &mheap_.central[spc].mcentral
memstats.nmalloc += c.nmalloc
i := spanClass(spc).sizeclass()
memstats.by_size[i].nmalloc += c.nmalloc
totalAlloc += c.nmalloc * uint64(class_to_size[i])
}
// Collect per-sizeclass stats.
for i := 0; i < _NumSizeClasses; i++ {
if i == 0 {
memstats.nmalloc += mheap_.nlargealloc
totalAlloc += mheap_.largealloc
totalFree += mheap_.largefree
memstats.nfree += mheap_.nlargefree
continue
}
// The mcache stats have been flushed to mheap_.
memstats.nfree += mheap_.nsmallfree[i]
memstats.by_size[i].nfree = mheap_.nsmallfree[i]
smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
}
totalFree += smallFree
memstats.nfree += memstats.tinyallocs
memstats.nmalloc += memstats.tinyallocs
// Calculate derived stats.
memstats.total_alloc = totalAlloc
memstats.alloc = totalAlloc - totalFree
memstats.heap_alloc = memstats.alloc
memstats.heap_objects = memstats.nmalloc - memstats.nfree
}
// cachestats flushes all mcache stats.
//
// The world must be stopped.
//
//go:nowritebarrier
func cachestats() {
for _, p := range allp {
c := p.mcache
if c == nil {
continue
}
purgecachedstats(c)
}
}
// flushmcache flushes the mcache of allp[i].
//
// The world must be stopped.
//
//go:nowritebarrier
func flushmcache(i int) {
p := allp[i]
c := p.mcache
if c == nil {
return
}
c.releaseAll()
stackcache_clear(c)
}
// flushallmcaches flushes the mcaches of all Ps.
//
// The world must be stopped.
//
//go:nowritebarrier
func flushallmcaches() {
for i := 0; i < int(gomaxprocs); i++ {
flushmcache(i)
}
}
//go:nosplit
func purgecachedstats(c *mcache) {
// Protected by either heap or GC lock.
h := &mheap_
memstats.heap_scan += uint64(c.local_scan)
c.local_scan = 0
memstats.tinyallocs += uint64(c.local_tinyallocs)
c.local_tinyallocs = 0
memstats.nlookup += uint64(c.local_nlookup)
c.local_nlookup = 0
h.largefree += uint64(c.local_largefree)
c.local_largefree = 0
h.nlargefree += uint64(c.local_nlargefree)
c.local_nlargefree = 0
for i := 0; i < len(c.local_nsmallfree); i++ {
h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
c.local_nsmallfree[i] = 0
}
}
// Atomically increases a given *system* memory stat. We are counting on this
// stat never overflowing a uintptr, so this function must only be used for
// system memory stats.
//
// The current implementation for little endian architectures is based on
// xadduintptr(), which is less than ideal: xadd64() should really be used.
// Using xadduintptr() is a stop-gap solution until arm supports xadd64() that
// doesn't use locks. (Locks are a problem as they require a valid G, which
// restricts their useability.)
//
// A side-effect of using xadduintptr() is that we need to check for
// overflow errors.
//go:nosplit
func mSysStatInc(sysStat *uint64, n uintptr) {
if sys.BigEndian {
atomic.Xadd64(sysStat, int64(n))
return
}
if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n {
print("runtime: stat overflow: val ", val, ", n ", n, "\n")
exit(2)
}
}
// Atomically decreases a given *system* memory stat. Same comments as
// mSysStatInc apply.
//go:nosplit
func mSysStatDec(sysStat *uint64, n uintptr) {
if sys.BigEndian {
atomic.Xadd64(sysStat, -int64(n))
return
}
if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n {
print("runtime: stat underflow: val ", val, ", n ", n, "\n")
exit(2)
}
}