2016-03-01 15:57:46 -07:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
2015-02-19 11:38:46 -07:00
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
// Memory statistics
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
2015-11-02 12:09:24 -07:00
|
|
|
import (
|
|
|
|
"runtime/internal/atomic"
|
2015-11-11 10:39:30 -07:00
|
|
|
"runtime/internal/sys"
|
2015-11-02 12:09:24 -07:00
|
|
|
"unsafe"
|
|
|
|
)
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Statistics.
|
2015-07-29 12:02:34 -06:00
|
|
|
// If you edit this structure, also edit type MemStats below.
|
2016-09-09 19:43:50 -06:00
|
|
|
// Their layouts must match exactly.
|
|
|
|
//
|
|
|
|
// For detailed descriptions see the documentation for MemStats.
|
|
|
|
// Fields that differ from MemStats are further documented here.
|
|
|
|
//
|
|
|
|
// Many of these fields are updated on the fly, while others are only
|
|
|
|
// updated when updatememstats is called.
|
2015-02-19 11:38:46 -07:00
|
|
|
type mstats struct {
|
|
|
|
// General statistics.
|
2015-03-30 14:59:09 -06:00
|
|
|
alloc uint64 // bytes allocated and not yet freed
|
2015-02-19 11:38:46 -07:00
|
|
|
total_alloc uint64 // bytes allocated (even if freed)
|
|
|
|
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
|
|
|
|
nlookup uint64 // number of pointer lookups
|
|
|
|
nmalloc uint64 // number of mallocs
|
|
|
|
nfree uint64 // number of frees
|
|
|
|
|
|
|
|
// Statistics about malloc heap.
|
2016-09-09 19:43:50 -06:00
|
|
|
// Protected by mheap.lock
|
|
|
|
//
|
2017-03-22 11:45:12 -06:00
|
|
|
// Like MemStats, heap_sys and heap_inuse do not count memory
|
|
|
|
// in manually-managed spans.
|
2015-03-30 14:59:09 -06:00
|
|
|
heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above)
|
2017-03-22 11:45:12 -06:00
|
|
|
heap_sys uint64 // virtual address space obtained from system for GC'd heap
|
2015-02-19 11:38:46 -07:00
|
|
|
heap_idle uint64 // bytes in idle spans
|
2017-03-22 11:45:12 -06:00
|
|
|
heap_inuse uint64 // bytes in _MSpanInUse spans
|
2015-02-19 11:38:46 -07:00
|
|
|
heap_released uint64 // bytes released to the os
|
|
|
|
heap_objects uint64 // total number of allocated objects
|
|
|
|
|
2016-09-09 19:43:50 -06:00
|
|
|
// TODO(austin): heap_released is both useless and inaccurate
|
|
|
|
// in its current form. It's useless because, from the user's
|
|
|
|
// and OS's perspectives, there's no difference between a page
|
|
|
|
// that has not yet been faulted in and a page that has been
|
|
|
|
// released back to the OS. We could fix this by considering
|
|
|
|
// newly mapped spans to be "released". It's inaccurate
|
|
|
|
// because when we split a large span for allocation, we
|
|
|
|
// "unrelease" all pages in the large span and not just the
|
|
|
|
// ones we split off for use. This is trickier to fix because
|
|
|
|
// we currently don't know which pages of a span we've
|
|
|
|
// released. We could fix it by separating "free" and
|
|
|
|
// "released" spans, but then we have to allocate from runs of
|
|
|
|
// free and released spans.
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
// Statistics about allocation of low-level fixed-size structures.
|
|
|
|
// Protected by FixAlloc locks.
|
2017-03-22 11:45:12 -06:00
|
|
|
stacks_inuse uint64 // bytes in manually-managed stack spans
|
2016-09-09 19:43:50 -06:00
|
|
|
stacks_sys uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
|
2015-02-19 11:38:46 -07:00
|
|
|
mspan_inuse uint64 // mspan structures
|
|
|
|
mspan_sys uint64
|
|
|
|
mcache_inuse uint64 // mcache structures
|
|
|
|
mcache_sys uint64
|
|
|
|
buckhash_sys uint64 // profiling bucket hash table
|
|
|
|
gc_sys uint64
|
|
|
|
other_sys uint64
|
|
|
|
|
|
|
|
// Statistics about garbage collector.
|
|
|
|
// Protected by mheap or stopping the world during GC.
|
2016-09-29 09:46:53 -06:00
|
|
|
next_gc uint64 // goal heap_live for when next GC ends; ^0 if disabled
|
2017-02-03 17:26:13 -07:00
|
|
|
last_gc_unix uint64 // last gc (in unix time)
|
2015-07-29 12:02:34 -06:00
|
|
|
pause_total_ns uint64
|
|
|
|
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
|
|
|
|
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
|
|
|
|
numgc uint32
|
2016-12-06 15:42:42 -07:00
|
|
|
numforcedgc uint32 // number of user-forced GCs
|
2015-07-29 12:02:34 -06:00
|
|
|
gc_cpu_fraction float64 // fraction of CPU time used by GC
|
|
|
|
enablegc bool
|
|
|
|
debuggc bool
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Statistics about allocation size classes.
|
|
|
|
|
|
|
|
by_size [_NumSizeClasses]struct {
|
|
|
|
size uint32
|
|
|
|
nmalloc uint64
|
|
|
|
nfree uint64
|
|
|
|
}
|
|
|
|
|
2016-09-09 19:43:50 -06:00
|
|
|
// Statistics below here are not exported to MemStats directly.
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
|
2017-02-03 17:26:13 -07:00
|
|
|
last_gc_nanotime uint64 // last gc (monotonic time)
|
|
|
|
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
|
2017-03-31 15:09:41 -06:00
|
|
|
// triggerRatio is the heap growth ratio that triggers marking.
|
|
|
|
//
|
|
|
|
// E.g., if this is 0.6, then GC should start when the live
|
|
|
|
// heap has reached 1.6 times the heap size marked by the
|
|
|
|
// previous cycle. This should be ≤ GOGC/100 so the trigger
|
|
|
|
// heap size is less than the goal heap size. This is set
|
|
|
|
// during mark termination for the next cycle's trigger.
|
|
|
|
triggerRatio float64
|
|
|
|
|
2016-09-15 12:08:04 -06:00
|
|
|
// gc_trigger is the heap size that triggers marking.
|
|
|
|
//
|
|
|
|
// When heap_live ≥ gc_trigger, the mark phase will start.
|
|
|
|
// This is also the heap size by which proportional sweeping
|
|
|
|
// must be complete.
|
2017-03-31 15:09:41 -06:00
|
|
|
//
|
|
|
|
// This is computed from triggerRatio during mark termination
|
|
|
|
// for the next cycle's trigger.
|
2016-09-15 12:08:04 -06:00
|
|
|
gc_trigger uint64
|
|
|
|
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
// heap_live is the number of bytes considered live by the GC.
|
|
|
|
// That is: retained by the most recent GC plus allocated
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
|
|
|
// since then. heap_live <= heap_alloc, since heap_alloc
|
|
|
|
// includes unmarked objects that have not yet been swept (and
|
|
|
|
// hence goes up as we allocate and down as we sweep) while
|
|
|
|
// heap_live excludes these objects (and hence only goes up
|
|
|
|
// between GCs).
|
|
|
|
//
|
|
|
|
// This is updated atomically without locking. To reduce
|
|
|
|
// contention, this is updated only when obtaining a span from
|
|
|
|
// an mcentral and at this point it counts all of the
|
|
|
|
// unallocated slots in that span (which will be allocated
|
|
|
|
// before that mcache obtains another span from that
|
|
|
|
// mcentral). Hence, it slightly overestimates the "true" live
|
|
|
|
// heap size. It's better to overestimate than to
|
|
|
|
// underestimate because 1) this triggers the GC earlier than
|
|
|
|
// necessary rather than potentially too late and 2) this
|
|
|
|
// leads to a conservative GC rate rather than a GC rate that
|
|
|
|
// is potentially too low.
|
|
|
|
//
|
2017-04-21 09:45:44 -06:00
|
|
|
// Reads should likewise be atomic (or during STW).
|
|
|
|
//
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
|
|
|
// Whenever this is updated, call traceHeapAlloc() and
|
|
|
|
// gcController.revise().
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
heap_live uint64
|
2015-03-12 15:56:14 -06:00
|
|
|
|
2015-05-04 14:10:49 -06:00
|
|
|
// heap_scan is the number of bytes of "scannable" heap. This
|
|
|
|
// is the live heap (as counted by heap_live), but omitting
|
|
|
|
// no-scan objects and no-scan tails of objects.
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
|
|
|
//
|
|
|
|
// Whenever this is updated, call gcController.revise().
|
2015-05-04 14:10:49 -06:00
|
|
|
heap_scan uint64
|
|
|
|
|
2015-03-12 15:56:14 -06:00
|
|
|
// heap_marked is the number of bytes marked by the previous
|
|
|
|
// GC. After mark termination, heap_live == heap_marked, but
|
|
|
|
// unlike heap_live, heap_marked does not change until the
|
|
|
|
// next mark termination.
|
|
|
|
heap_marked uint64
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
var memstats mstats
|
|
|
|
|
|
|
|
// A MemStats records statistics about the memory allocator.
|
|
|
|
type MemStats struct {
|
|
|
|
// General statistics.
|
2016-09-09 19:43:50 -06:00
|
|
|
|
|
|
|
// Alloc is bytes of allocated heap objects.
|
|
|
|
//
|
|
|
|
// This is the same as HeapAlloc (see below).
|
|
|
|
Alloc uint64
|
|
|
|
|
|
|
|
// TotalAlloc is cumulative bytes allocated for heap objects.
|
|
|
|
//
|
|
|
|
// TotalAlloc increases as heap objects are allocated, but
|
|
|
|
// unlike Alloc and HeapAlloc, it does not decrease when
|
|
|
|
// objects are freed.
|
|
|
|
TotalAlloc uint64
|
|
|
|
|
|
|
|
// Sys is the total bytes of memory obtained from the OS.
|
|
|
|
//
|
|
|
|
// Sys is the sum of the XSys fields below. Sys measures the
|
|
|
|
// virtual address space reserved by the Go runtime for the
|
|
|
|
// heap, stacks, and other internal data structures. It's
|
|
|
|
// likely that not all of the virtual address space is backed
|
|
|
|
// by physical memory at any given moment, though in general
|
|
|
|
// it all was at some point.
|
|
|
|
Sys uint64
|
|
|
|
|
|
|
|
// Lookups is the number of pointer lookups performed by the
|
|
|
|
// runtime.
|
|
|
|
//
|
|
|
|
// This is primarily useful for debugging runtime internals.
|
|
|
|
Lookups uint64
|
|
|
|
|
|
|
|
// Mallocs is the cumulative count of heap objects allocated.
|
2016-12-22 18:17:32 -07:00
|
|
|
// The number of live objects is Mallocs - Frees.
|
2016-09-09 19:43:50 -06:00
|
|
|
Mallocs uint64
|
|
|
|
|
|
|
|
// Frees is the cumulative count of heap objects freed.
|
|
|
|
Frees uint64
|
|
|
|
|
|
|
|
// Heap memory statistics.
|
|
|
|
//
|
|
|
|
// Interpreting the heap statistics requires some knowledge of
|
|
|
|
// how Go organizes memory. Go divides the virtual address
|
|
|
|
// space of the heap into "spans", which are contiguous
|
|
|
|
// regions of memory 8K or larger. A span may be in one of
|
|
|
|
// three states:
|
|
|
|
//
|
|
|
|
// An "idle" span contains no objects or other data. The
|
|
|
|
// physical memory backing an idle span can be released back
|
|
|
|
// to the OS (but the virtual address space never is), or it
|
|
|
|
// can be converted into an "in use" or "stack" span.
|
|
|
|
//
|
|
|
|
// An "in use" span contains at least one heap object and may
|
|
|
|
// have free space available to allocate more heap objects.
|
|
|
|
//
|
|
|
|
// A "stack" span is used for goroutine stacks. Stack spans
|
|
|
|
// are not considered part of the heap. A span can change
|
|
|
|
// between heap and stack memory; it is never used for both
|
|
|
|
// simultaneously.
|
|
|
|
|
|
|
|
// HeapAlloc is bytes of allocated heap objects.
|
|
|
|
//
|
|
|
|
// "Allocated" heap objects include all reachable objects, as
|
|
|
|
// well as unreachable objects that the garbage collector has
|
|
|
|
// not yet freed. Specifically, HeapAlloc increases as heap
|
|
|
|
// objects are allocated and decreases as the heap is swept
|
|
|
|
// and unreachable objects are freed. Sweeping occurs
|
|
|
|
// incrementally between GC cycles, so these two processes
|
|
|
|
// occur simultaneously, and as a result HeapAlloc tends to
|
|
|
|
// change smoothly (in contrast with the sawtooth that is
|
|
|
|
// typical of stop-the-world garbage collectors).
|
|
|
|
HeapAlloc uint64
|
|
|
|
|
|
|
|
// HeapSys is bytes of heap memory obtained from the OS.
|
|
|
|
//
|
|
|
|
// HeapSys measures the amount of virtual address space
|
|
|
|
// reserved for the heap. This includes virtual address space
|
|
|
|
// that has been reserved but not yet used, which consumes no
|
|
|
|
// physical memory, but tends to be small, as well as virtual
|
|
|
|
// address space for which the physical memory has been
|
|
|
|
// returned to the OS after it became unused (see HeapReleased
|
|
|
|
// for a measure of the latter).
|
|
|
|
//
|
|
|
|
// HeapSys estimates the largest size the heap has had.
|
|
|
|
HeapSys uint64
|
|
|
|
|
|
|
|
// HeapIdle is bytes in idle (unused) spans.
|
|
|
|
//
|
|
|
|
// Idle spans have no objects in them. These spans could be
|
|
|
|
// (and may already have been) returned to the OS, or they can
|
|
|
|
// be reused for heap allocations, or they can be reused as
|
|
|
|
// stack memory.
|
|
|
|
//
|
|
|
|
// HeapIdle minus HeapReleased estimates the amount of memory
|
|
|
|
// that could be returned to the OS, but is being retained by
|
|
|
|
// the runtime so it can grow the heap without requesting more
|
|
|
|
// memory from the OS. If this difference is significantly
|
|
|
|
// larger than the heap size, it indicates there was a recent
|
|
|
|
// transient spike in live heap size.
|
|
|
|
HeapIdle uint64
|
|
|
|
|
|
|
|
// HeapInuse is bytes in in-use spans.
|
|
|
|
//
|
|
|
|
// In-use spans have at least one object in them. These spans
|
|
|
|
// can only be used for other objects of roughly the same
|
|
|
|
// size.
|
|
|
|
//
|
|
|
|
// HeapInuse minus HeapAlloc esimates the amount of memory
|
|
|
|
// that has been dedicated to particular size classes, but is
|
|
|
|
// not currently being used. This is an upper bound on
|
|
|
|
// fragmentation, but in general this memory can be reused
|
|
|
|
// efficiently.
|
|
|
|
HeapInuse uint64
|
|
|
|
|
|
|
|
// HeapReleased is bytes of physical memory returned to the OS.
|
|
|
|
//
|
|
|
|
// This counts heap memory from idle spans that was returned
|
|
|
|
// to the OS and has not yet been reacquired for the heap.
|
|
|
|
HeapReleased uint64
|
|
|
|
|
|
|
|
// HeapObjects is the number of allocated heap objects.
|
|
|
|
//
|
|
|
|
// Like HeapAlloc, this increases as objects are allocated and
|
|
|
|
// decreases as the heap is swept and unreachable objects are
|
|
|
|
// freed.
|
|
|
|
HeapObjects uint64
|
|
|
|
|
|
|
|
// Stack memory statistics.
|
|
|
|
//
|
|
|
|
// Stacks are not considered part of the heap, but the runtime
|
|
|
|
// can reuse a span of heap memory for stack memory, and
|
|
|
|
// vice-versa.
|
|
|
|
|
|
|
|
// StackInuse is bytes in stack spans.
|
|
|
|
//
|
|
|
|
// In-use stack spans have at least one stack in them. These
|
|
|
|
// spans can only be used for other stacks of the same size.
|
|
|
|
//
|
|
|
|
// There is no StackIdle because unused stack spans are
|
|
|
|
// returned to the heap (and hence counted toward HeapIdle).
|
|
|
|
StackInuse uint64
|
|
|
|
|
|
|
|
// StackSys is bytes of stack memory obtained from the OS.
|
|
|
|
//
|
|
|
|
// StackSys is StackInuse, plus any memory obtained directly
|
|
|
|
// from the OS for OS thread stacks (which should be minimal).
|
|
|
|
StackSys uint64
|
|
|
|
|
|
|
|
// Off-heap memory statistics.
|
|
|
|
//
|
|
|
|
// The following statistics measure runtime-internal
|
|
|
|
// structures that are not allocated from heap memory (usually
|
|
|
|
// because they are part of implementing the heap). Unlike
|
|
|
|
// heap or stack memory, any memory allocated to these
|
|
|
|
// structures is dedicated to these structures.
|
|
|
|
//
|
|
|
|
// These are primarily useful for debugging runtime memory
|
|
|
|
// overheads.
|
|
|
|
|
|
|
|
// MSpanInuse is bytes of allocated mspan structures.
|
|
|
|
MSpanInuse uint64
|
|
|
|
|
|
|
|
// MSpanSys is bytes of memory obtained from the OS for mspan
|
|
|
|
// structures.
|
|
|
|
MSpanSys uint64
|
|
|
|
|
|
|
|
// MCacheInuse is bytes of allocated mcache structures.
|
|
|
|
MCacheInuse uint64
|
|
|
|
|
|
|
|
// MCacheSys is bytes of memory obtained from the OS for
|
|
|
|
// mcache structures.
|
|
|
|
MCacheSys uint64
|
|
|
|
|
|
|
|
// BuckHashSys is bytes of memory in profiling bucket hash tables.
|
|
|
|
BuckHashSys uint64
|
|
|
|
|
|
|
|
// GCSys is bytes of memory in garbage collection metadata.
|
|
|
|
GCSys uint64
|
|
|
|
|
|
|
|
// OtherSys is bytes of memory in miscellaneous off-heap
|
|
|
|
// runtime allocations.
|
|
|
|
OtherSys uint64
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Garbage collector statistics.
|
2016-09-09 19:43:50 -06:00
|
|
|
|
|
|
|
// NextGC is the target heap size of the next GC cycle.
|
|
|
|
//
|
|
|
|
// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
|
|
|
|
// At the end of each GC cycle, the target for the next cycle
|
|
|
|
// is computed based on the amount of reachable data and the
|
|
|
|
// value of GOGC.
|
|
|
|
NextGC uint64
|
|
|
|
|
|
|
|
// LastGC is the time the last garbage collection finished, as
|
|
|
|
// nanoseconds since 1970 (the UNIX epoch).
|
|
|
|
LastGC uint64
|
|
|
|
|
|
|
|
// PauseTotalNs is the cumulative nanoseconds in GC
|
|
|
|
// stop-the-world pauses since the program started.
|
|
|
|
//
|
|
|
|
// During a stop-the-world pause, all goroutines are paused
|
|
|
|
// and only the garbage collector can run.
|
|
|
|
PauseTotalNs uint64
|
|
|
|
|
|
|
|
// PauseNs is a circular buffer of recent GC stop-the-world
|
|
|
|
// pause times in nanoseconds.
|
|
|
|
//
|
|
|
|
// The most recent pause is at PauseNs[(NumGC+255)%256]. In
|
|
|
|
// general, PauseNs[N%256] records the time paused in the most
|
|
|
|
// recent N%256th GC cycle. There may be multiple pauses per
|
|
|
|
// GC cycle; this is the sum of all pauses during a cycle.
|
|
|
|
PauseNs [256]uint64
|
|
|
|
|
|
|
|
// PauseEnd is a circular buffer of recent GC pause end times,
|
|
|
|
// as nanoseconds since 1970 (the UNIX epoch).
|
|
|
|
//
|
|
|
|
// This buffer is filled the same way as PauseNs. There may be
|
|
|
|
// multiple pauses per GC cycle; this records the end of the
|
|
|
|
// last pause in a cycle.
|
|
|
|
PauseEnd [256]uint64
|
|
|
|
|
|
|
|
// NumGC is the number of completed GC cycles.
|
|
|
|
NumGC uint32
|
|
|
|
|
2016-12-06 15:42:42 -07:00
|
|
|
// NumForcedGC is the number of GC cycles that were forced by
|
|
|
|
// the application calling the GC function.
|
|
|
|
NumForcedGC uint32
|
|
|
|
|
2016-09-09 19:43:50 -06:00
|
|
|
// GCCPUFraction is the fraction of this program's available
|
|
|
|
// CPU time used by the GC since the program started.
|
|
|
|
//
|
|
|
|
// GCCPUFraction is expressed as a number between 0 and 1,
|
|
|
|
// where 0 means GC has consumed none of this program's CPU. A
|
|
|
|
// program's available CPU time is defined as the integral of
|
|
|
|
// GOMAXPROCS since the program started. That is, if
|
|
|
|
// GOMAXPROCS is 2 and a program has been running for 10
|
|
|
|
// seconds, its "available CPU" is 20 seconds. GCCPUFraction
|
|
|
|
// does not include CPU time used for write barrier activity.
|
|
|
|
//
|
|
|
|
// This is the same as the fraction of CPU reported by
|
|
|
|
// GODEBUG=gctrace=1.
|
|
|
|
GCCPUFraction float64
|
|
|
|
|
|
|
|
// EnableGC indicates that GC is enabled. It is always true,
|
|
|
|
// even if GOGC=off.
|
|
|
|
EnableGC bool
|
|
|
|
|
|
|
|
// DebugGC is currently unused.
|
|
|
|
DebugGC bool
|
|
|
|
|
|
|
|
// BySize reports per-size class allocation statistics.
|
|
|
|
//
|
|
|
|
// BySize[N] gives statistics for allocations of size S where
|
|
|
|
// BySize[N-1].Size < S ≤ BySize[N].Size.
|
|
|
|
//
|
|
|
|
// This does not report allocations larger than BySize[60].Size.
|
2015-02-19 11:38:46 -07:00
|
|
|
BySize [61]struct {
|
2016-12-22 18:17:32 -07:00
|
|
|
// Size is the maximum byte size of an object in this
|
|
|
|
// size class.
|
|
|
|
Size uint32
|
|
|
|
|
|
|
|
// Mallocs is the cumulative count of heap objects
|
|
|
|
// allocated in this size class. The cumulative bytes
|
|
|
|
// of allocation is Size*Mallocs. The number of live
|
|
|
|
// objects in this size class is Mallocs - Frees.
|
2015-02-19 11:38:46 -07:00
|
|
|
Mallocs uint64
|
2016-12-22 18:17:32 -07:00
|
|
|
|
|
|
|
// Frees is the cumulative count of heap objects freed
|
|
|
|
// in this size class.
|
|
|
|
Frees uint64
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-09 19:43:50 -06:00
|
|
|
// Size of the trailing by_size array differs between mstats and MemStats,
|
2015-02-19 11:38:46 -07:00
|
|
|
// and all data after by_size is local to runtime, not exported.
|
2016-09-09 19:43:50 -06:00
|
|
|
// NumSizeClasses was changed, but we cannot change MemStats because of backward compatibility.
|
|
|
|
// sizeof_C_MStats is the size of the prefix of mstats that
|
|
|
|
// corresponds to MemStats. It should match Sizeof(MemStats{}).
|
2015-02-19 11:38:46 -07:00
|
|
|
var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
var memStats MemStats
|
|
|
|
if sizeof_C_MStats != unsafe.Sizeof(memStats) {
|
|
|
|
println(sizeof_C_MStats, unsafe.Sizeof(memStats))
|
|
|
|
throw("MStats vs MemStatsType size mismatch")
|
|
|
|
}
|
2016-08-31 08:58:56 -06:00
|
|
|
|
|
|
|
if unsafe.Offsetof(memstats.heap_live)%8 != 0 {
|
|
|
|
println(unsafe.Offsetof(memstats.heap_live))
|
|
|
|
throw("memstats.heap_live not aligned to 8 bytes")
|
|
|
|
}
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// ReadMemStats populates m with memory allocator statistics.
|
2016-09-09 19:43:50 -06:00
|
|
|
//
|
|
|
|
// The returned memory allocator statistics are up to date as of the
|
|
|
|
// call to ReadMemStats. This is in contrast with a heap profile,
|
|
|
|
// which is a snapshot as of the most recently completed garbage
|
|
|
|
// collection cycle.
|
2015-02-19 11:38:46 -07:00
|
|
|
func ReadMemStats(m *MemStats) {
|
2015-05-15 14:00:50 -06:00
|
|
|
stopTheWorld("read mem stats")
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
systemstack(func() {
|
|
|
|
readmemstats_m(m)
|
|
|
|
})
|
|
|
|
|
2015-05-15 14:00:50 -06:00
|
|
|
startTheWorld()
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func readmemstats_m(stats *MemStats) {
|
2016-12-22 17:45:55 -07:00
|
|
|
updatememstats()
|
2015-02-19 11:38:46 -07:00
|
|
|
|
2016-09-09 19:43:50 -06:00
|
|
|
// The size of the trailing by_size array differs between
|
|
|
|
// mstats and MemStats. NumSizeClasses was changed, but we
|
|
|
|
// cannot change MemStats because of backward compatibility.
|
2015-02-19 11:38:46 -07:00
|
|
|
memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
|
|
|
|
|
2017-03-22 11:45:12 -06:00
|
|
|
// memstats.stacks_sys is only memory mapped directly for OS stacks.
|
|
|
|
// Add in heap-allocated stack memory for user consumption.
|
2015-04-27 12:50:42 -06:00
|
|
|
stats.StackSys += stats.StackInuse
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
//go:linkname readGCStats runtime/debug.readGCStats
|
|
|
|
func readGCStats(pauses *[]uint64) {
|
|
|
|
systemstack(func() {
|
|
|
|
readGCStats_m(pauses)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func readGCStats_m(pauses *[]uint64) {
|
|
|
|
p := *pauses
|
|
|
|
// Calling code in runtime/debug should make the slice large enough.
|
|
|
|
if cap(p) < len(memstats.pause_ns)+3 {
|
|
|
|
throw("short slice passed to readGCStats")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
|
|
|
|
lock(&mheap_.lock)
|
|
|
|
|
|
|
|
n := memstats.numgc
|
|
|
|
if n > uint32(len(memstats.pause_ns)) {
|
|
|
|
n = uint32(len(memstats.pause_ns))
|
|
|
|
}
|
|
|
|
|
|
|
|
// The pause buffer is circular. The most recent pause is at
|
|
|
|
// pause_ns[(numgc-1)%len(pause_ns)], and then backward
|
|
|
|
// from there to go back farther in time. We deliver the times
|
|
|
|
// most recent first (in p[0]).
|
|
|
|
p = p[:cap(p)]
|
|
|
|
for i := uint32(0); i < n; i++ {
|
|
|
|
j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
|
|
|
|
p[i] = memstats.pause_ns[j]
|
|
|
|
p[n+i] = memstats.pause_end[j]
|
|
|
|
}
|
|
|
|
|
2017-02-03 17:26:13 -07:00
|
|
|
p[n+n] = memstats.last_gc_unix
|
2015-02-19 11:38:46 -07:00
|
|
|
p[n+n+1] = uint64(memstats.numgc)
|
|
|
|
p[n+n+2] = memstats.pause_total_ns
|
|
|
|
unlock(&mheap_.lock)
|
|
|
|
*pauses = p[:n+n+3]
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
2016-12-22 17:45:55 -07:00
|
|
|
func updatememstats() {
|
2015-02-19 11:38:46 -07:00
|
|
|
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
|
|
|
|
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
|
|
|
|
memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
|
|
|
|
memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
|
|
|
|
|
2017-03-22 11:45:12 -06:00
|
|
|
// We also count stacks_inuse as sys memory.
|
|
|
|
memstats.sys += memstats.stacks_inuse
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
// Calculate memory allocator stats.
|
|
|
|
// During program execution we only count number of frees and amount of freed memory.
|
|
|
|
// Current number of alive object in the heap and amount of alive heap memory
|
|
|
|
// are calculated by scanning all spans.
|
|
|
|
// Total number of mallocs is calculated as number of frees plus number of alive objects.
|
|
|
|
// Similarly, total amount of allocated memory is calculated as amount of freed memory
|
|
|
|
// plus amount of alive heap memory.
|
|
|
|
memstats.alloc = 0
|
|
|
|
memstats.total_alloc = 0
|
|
|
|
memstats.nmalloc = 0
|
|
|
|
memstats.nfree = 0
|
|
|
|
for i := 0; i < len(memstats.by_size); i++ {
|
|
|
|
memstats.by_size[i].nmalloc = 0
|
|
|
|
memstats.by_size[i].nfree = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush MCache's to MCentral.
|
|
|
|
systemstack(flushallmcaches)
|
|
|
|
|
|
|
|
// Aggregate local stats.
|
|
|
|
cachestats()
|
|
|
|
|
2017-01-03 10:15:55 -07:00
|
|
|
// Collect allocation stats. This is safe and consistent
|
|
|
|
// because the world is stopped.
|
|
|
|
var smallFree, totalAlloc, totalFree uint64
|
2016-02-09 15:53:07 -07:00
|
|
|
// Collect per-spanclass stats.
|
|
|
|
for spc := range mheap_.central {
|
|
|
|
// The mcaches are now empty, so mcentral stats are
|
|
|
|
// up-to-date.
|
|
|
|
c := &mheap_.central[spc].mcentral
|
|
|
|
memstats.nmalloc += c.nmalloc
|
|
|
|
i := spanClass(spc).sizeclass()
|
|
|
|
memstats.by_size[i].nmalloc += c.nmalloc
|
|
|
|
totalAlloc += c.nmalloc * uint64(class_to_size[i])
|
|
|
|
}
|
|
|
|
// Collect per-sizeclass stats.
|
|
|
|
for i := 0; i < _NumSizeClasses; i++ {
|
2017-01-03 10:15:55 -07:00
|
|
|
if i == 0 {
|
|
|
|
memstats.nmalloc += mheap_.nlargealloc
|
|
|
|
totalAlloc += mheap_.largealloc
|
|
|
|
totalFree += mheap_.largefree
|
|
|
|
memstats.nfree += mheap_.nlargefree
|
2015-02-19 11:38:46 -07:00
|
|
|
continue
|
|
|
|
}
|
2017-01-03 10:15:55 -07:00
|
|
|
|
|
|
|
// The mcache stats have been flushed to mheap_.
|
2015-02-19 11:38:46 -07:00
|
|
|
memstats.nfree += mheap_.nsmallfree[i]
|
|
|
|
memstats.by_size[i].nfree = mheap_.nsmallfree[i]
|
2017-01-03 10:15:55 -07:00
|
|
|
smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
2017-01-03 10:15:55 -07:00
|
|
|
totalFree += smallFree
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
memstats.nfree += memstats.tinyallocs
|
2017-01-03 10:15:55 -07:00
|
|
|
memstats.nmalloc += memstats.tinyallocs
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Calculate derived stats.
|
2017-01-03 10:15:55 -07:00
|
|
|
memstats.total_alloc = totalAlloc
|
|
|
|
memstats.alloc = totalAlloc - totalFree
|
2015-02-19 11:38:46 -07:00
|
|
|
memstats.heap_alloc = memstats.alloc
|
|
|
|
memstats.heap_objects = memstats.nmalloc - memstats.nfree
|
|
|
|
}
|
|
|
|
|
2017-06-13 09:32:17 -06:00
|
|
|
// cachestats flushes all mcache stats.
|
|
|
|
//
|
|
|
|
// The world must be stopped.
|
|
|
|
//
|
2015-02-19 11:38:46 -07:00
|
|
|
//go:nowritebarrier
|
|
|
|
func cachestats() {
|
2017-06-13 09:32:17 -06:00
|
|
|
for _, p := range allp {
|
2015-02-19 11:38:46 -07:00
|
|
|
c := p.mcache
|
|
|
|
if c == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
purgecachedstats(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-25 11:56:37 -06:00
|
|
|
// flushmcache flushes the mcache of allp[i].
|
|
|
|
//
|
|
|
|
// The world must be stopped.
|
|
|
|
//
|
|
|
|
//go:nowritebarrier
|
|
|
|
func flushmcache(i int) {
|
|
|
|
p := allp[i]
|
|
|
|
c := p.mcache
|
|
|
|
if c == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
c.releaseAll()
|
|
|
|
stackcache_clear(c)
|
|
|
|
}
|
|
|
|
|
|
|
|
// flushallmcaches flushes the mcaches of all Ps.
|
|
|
|
//
|
|
|
|
// The world must be stopped.
|
|
|
|
//
|
2015-02-19 11:38:46 -07:00
|
|
|
//go:nowritebarrier
|
|
|
|
func flushallmcaches() {
|
2016-10-25 11:56:37 -06:00
|
|
|
for i := 0; i < int(gomaxprocs); i++ {
|
|
|
|
flushmcache(i)
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
func purgecachedstats(c *mcache) {
|
|
|
|
// Protected by either heap or GC lock.
|
|
|
|
h := &mheap_
|
2015-05-04 14:10:49 -06:00
|
|
|
memstats.heap_scan += uint64(c.local_scan)
|
|
|
|
c.local_scan = 0
|
2015-02-19 11:38:46 -07:00
|
|
|
memstats.tinyallocs += uint64(c.local_tinyallocs)
|
|
|
|
c.local_tinyallocs = 0
|
|
|
|
memstats.nlookup += uint64(c.local_nlookup)
|
|
|
|
c.local_nlookup = 0
|
|
|
|
h.largefree += uint64(c.local_largefree)
|
|
|
|
c.local_largefree = 0
|
|
|
|
h.nlargefree += uint64(c.local_nlargefree)
|
|
|
|
c.local_nlargefree = 0
|
|
|
|
for i := 0; i < len(c.local_nsmallfree); i++ {
|
|
|
|
h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
|
|
|
|
c.local_nsmallfree[i] = 0
|
|
|
|
}
|
|
|
|
}
|
2015-04-16 15:32:18 -06:00
|
|
|
|
2016-03-01 16:21:55 -07:00
|
|
|
// Atomically increases a given *system* memory stat. We are counting on this
|
2015-04-16 15:32:18 -06:00
|
|
|
// stat never overflowing a uintptr, so this function must only be used for
|
|
|
|
// system memory stats.
|
|
|
|
//
|
|
|
|
// The current implementation for little endian architectures is based on
|
|
|
|
// xadduintptr(), which is less than ideal: xadd64() should really be used.
|
|
|
|
// Using xadduintptr() is a stop-gap solution until arm supports xadd64() that
|
|
|
|
// doesn't use locks. (Locks are a problem as they require a valid G, which
|
|
|
|
// restricts their useability.)
|
|
|
|
//
|
|
|
|
// A side-effect of using xadduintptr() is that we need to check for
|
|
|
|
// overflow errors.
|
|
|
|
//go:nosplit
|
|
|
|
func mSysStatInc(sysStat *uint64, n uintptr) {
|
2015-11-11 10:39:30 -07:00
|
|
|
if sys.BigEndian != 0 {
|
2015-11-02 12:09:24 -07:00
|
|
|
atomic.Xadd64(sysStat, int64(n))
|
2015-04-16 15:32:18 -06:00
|
|
|
return
|
|
|
|
}
|
2015-11-02 12:09:24 -07:00
|
|
|
if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n {
|
2015-04-16 15:32:18 -06:00
|
|
|
print("runtime: stat overflow: val ", val, ", n ", n, "\n")
|
|
|
|
exit(2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-01 16:21:55 -07:00
|
|
|
// Atomically decreases a given *system* memory stat. Same comments as
|
2015-04-16 15:32:18 -06:00
|
|
|
// mSysStatInc apply.
|
|
|
|
//go:nosplit
|
|
|
|
func mSysStatDec(sysStat *uint64, n uintptr) {
|
2015-11-11 10:39:30 -07:00
|
|
|
if sys.BigEndian != 0 {
|
2015-11-02 12:09:24 -07:00
|
|
|
atomic.Xadd64(sysStat, -int64(n))
|
2015-04-16 15:32:18 -06:00
|
|
|
return
|
|
|
|
}
|
2015-11-02 12:09:24 -07:00
|
|
|
if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n {
|
2015-04-16 15:32:18 -06:00
|
|
|
print("runtime: stat underflow: val ", val, ", n ", n, "\n")
|
|
|
|
exit(2)
|
|
|
|
}
|
|
|
|
}
|