2015-02-19 11:38:46 -07:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
// Memory statistics
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
2015-11-02 12:09:24 -07:00
|
|
|
import (
|
|
|
|
"runtime/internal/atomic"
|
2015-11-11 10:39:30 -07:00
|
|
|
"runtime/internal/sys"
|
2015-11-02 12:09:24 -07:00
|
|
|
"unsafe"
|
|
|
|
)
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Statistics.
|
2015-07-29 12:02:34 -06:00
|
|
|
// If you edit this structure, also edit type MemStats below.
|
2015-02-19 11:38:46 -07:00
|
|
|
type mstats struct {
|
|
|
|
// General statistics.
|
2015-03-30 14:59:09 -06:00
|
|
|
alloc uint64 // bytes allocated and not yet freed
|
2015-02-19 11:38:46 -07:00
|
|
|
total_alloc uint64 // bytes allocated (even if freed)
|
|
|
|
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
|
|
|
|
nlookup uint64 // number of pointer lookups
|
|
|
|
nmalloc uint64 // number of mallocs
|
|
|
|
nfree uint64 // number of frees
|
|
|
|
|
|
|
|
// Statistics about malloc heap.
|
|
|
|
// protected by mheap.lock
|
2015-03-30 14:59:09 -06:00
|
|
|
heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above)
|
2015-02-19 11:38:46 -07:00
|
|
|
heap_sys uint64 // bytes obtained from system
|
|
|
|
heap_idle uint64 // bytes in idle spans
|
|
|
|
heap_inuse uint64 // bytes in non-idle spans
|
|
|
|
heap_released uint64 // bytes released to the os
|
|
|
|
heap_objects uint64 // total number of allocated objects
|
|
|
|
|
|
|
|
// Statistics about allocation of low-level fixed-size structures.
|
|
|
|
// Protected by FixAlloc locks.
|
|
|
|
stacks_inuse uint64 // this number is included in heap_inuse above
|
|
|
|
stacks_sys uint64 // always 0 in mstats
|
|
|
|
mspan_inuse uint64 // mspan structures
|
|
|
|
mspan_sys uint64
|
|
|
|
mcache_inuse uint64 // mcache structures
|
|
|
|
mcache_sys uint64
|
|
|
|
buckhash_sys uint64 // profiling bucket hash table
|
|
|
|
gc_sys uint64
|
|
|
|
other_sys uint64
|
|
|
|
|
|
|
|
// Statistics about garbage collector.
|
|
|
|
// Protected by mheap or stopping the world during GC.
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
|
|
|
next_gc uint64 // next gc (in heap_live time)
|
2015-07-29 12:02:34 -06:00
|
|
|
last_gc uint64 // last gc (in absolute time)
|
|
|
|
pause_total_ns uint64
|
|
|
|
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
|
|
|
|
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
|
|
|
|
numgc uint32
|
|
|
|
gc_cpu_fraction float64 // fraction of CPU time used by GC
|
|
|
|
enablegc bool
|
|
|
|
debuggc bool
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Statistics about allocation size classes.
|
|
|
|
|
|
|
|
by_size [_NumSizeClasses]struct {
|
|
|
|
size uint32
|
|
|
|
nmalloc uint64
|
|
|
|
nfree uint64
|
|
|
|
}
|
|
|
|
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
// Statistics below here are not exported to Go directly.
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
|
|
|
|
// heap_live is the number of bytes considered live by the GC.
|
|
|
|
// That is: retained by the most recent GC plus allocated
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
|
|
|
// since then. heap_live <= heap_alloc, since heap_alloc
|
|
|
|
// includes unmarked objects that have not yet been swept (and
|
|
|
|
// hence goes up as we allocate and down as we sweep) while
|
|
|
|
// heap_live excludes these objects (and hence only goes up
|
|
|
|
// between GCs).
|
|
|
|
//
|
|
|
|
// This is updated atomically without locking. To reduce
|
|
|
|
// contention, this is updated only when obtaining a span from
|
|
|
|
// an mcentral and at this point it counts all of the
|
|
|
|
// unallocated slots in that span (which will be allocated
|
|
|
|
// before that mcache obtains another span from that
|
|
|
|
// mcentral). Hence, it slightly overestimates the "true" live
|
|
|
|
// heap size. It's better to overestimate than to
|
|
|
|
// underestimate because 1) this triggers the GC earlier than
|
|
|
|
// necessary rather than potentially too late and 2) this
|
|
|
|
// leads to a conservative GC rate rather than a GC rate that
|
|
|
|
// is potentially too low.
|
|
|
|
//
|
|
|
|
// Whenever this is updated, call traceHeapAlloc() and
|
|
|
|
// gcController.revise().
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 16:01:32 -06:00
|
|
|
heap_live uint64
|
2015-03-12 15:56:14 -06:00
|
|
|
|
2015-05-04 14:10:49 -06:00
|
|
|
// heap_scan is the number of bytes of "scannable" heap. This
|
|
|
|
// is the live heap (as counted by heap_live), but omitting
|
|
|
|
// no-scan objects and no-scan tails of objects.
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 15:49:14 -07:00
|
|
|
//
|
|
|
|
// Whenever this is updated, call gcController.revise().
|
2015-05-04 14:10:49 -06:00
|
|
|
heap_scan uint64
|
|
|
|
|
2015-03-12 15:56:14 -06:00
|
|
|
// heap_marked is the number of bytes marked by the previous
|
|
|
|
// GC. After mark termination, heap_live == heap_marked, but
|
|
|
|
// unlike heap_live, heap_marked does not change until the
|
|
|
|
// next mark termination.
|
|
|
|
heap_marked uint64
|
runtime: use reachable heap estimate to set trigger/goal
Currently, we set the heap goal for the next GC cycle using the size
of the marked heap at the end of the current cycle. This can lead to a
bad feedback loop if the mutator is rapidly allocating and releasing
pointers that can significantly bloat heap size.
If the GC were STW, the marked heap size would be exactly the
reachable heap size (call it stwLive). However, in concurrent GC,
marked=stwLive+floatLive, where floatLive is the amount of "floating
garbage": objects that were reachable at some point during the cycle
and were marked, but which are no longer reachable by the end of the
cycle. If the GC cycle is short, then the mutator doesn't have much
time to create floating garbage, so marked≈stwLive. However, if the GC
cycle is long and the mutator is allocating and creating floating
garbage very rapidly, then it's possible that marked≫stwLive. Since
the runtime currently sets the heap goal based on marked, this will
cause it to set a high heap goal. This means that 1) the next GC cycle
will take longer because of the larger heap and 2) the assist ratio
will be low because of the large distance between the trigger and the
goal. The combination of these lets the mutator produce even more
floating garbage in the next cycle, which further exacerbates the
problem.
For example, on the garbage benchmark with GOMAXPROCS=1, this causes
the heap to grow to ~500MB and the garbage collector to retain upwards
of ~300MB of heap, while the true reachable heap size is ~32MB. This,
in turn, causes the GC cycle to take upwards of ~3 seconds.
Fix this bad feedback loop by estimating the true reachable heap size
(stwLive) and using this rather than the marked heap size
(stwLive+floatLive) as the basis for the GC trigger and heap goal.
This breaks the bad feedback loop and causes the mutator to assist
more, which decreases the rate at which it can create floating
garbage. On the same garbage benchmark, this reduces the maximum heap
size to ~73MB, the retained heap to ~40MB, and the duration of the GC
cycle to ~200ms.
Change-Id: I7712244c94240743b266f9eb720c03802799cdd1
Reviewed-on: https://go-review.googlesource.com/9177
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-21 12:24:25 -06:00
|
|
|
|
|
|
|
// heap_reachable is an estimate of the reachable heap bytes
|
|
|
|
// at the end of the previous GC.
|
|
|
|
heap_reachable uint64
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
var memstats mstats
|
|
|
|
|
|
|
|
// A MemStats records statistics about the memory allocator.
|
|
|
|
type MemStats struct {
|
|
|
|
// General statistics.
|
2015-03-30 14:59:09 -06:00
|
|
|
Alloc uint64 // bytes allocated and not yet freed
|
2015-02-19 11:38:46 -07:00
|
|
|
TotalAlloc uint64 // bytes allocated (even if freed)
|
|
|
|
Sys uint64 // bytes obtained from system (sum of XxxSys below)
|
|
|
|
Lookups uint64 // number of pointer lookups
|
|
|
|
Mallocs uint64 // number of mallocs
|
|
|
|
Frees uint64 // number of frees
|
|
|
|
|
|
|
|
// Main allocation heap statistics.
|
2015-03-30 14:59:09 -06:00
|
|
|
HeapAlloc uint64 // bytes allocated and not yet freed (same as Alloc above)
|
2015-02-19 11:38:46 -07:00
|
|
|
HeapSys uint64 // bytes obtained from system
|
|
|
|
HeapIdle uint64 // bytes in idle spans
|
|
|
|
HeapInuse uint64 // bytes in non-idle span
|
|
|
|
HeapReleased uint64 // bytes released to the OS
|
|
|
|
HeapObjects uint64 // total number of allocated objects
|
|
|
|
|
|
|
|
// Low-level fixed-size structure allocator statistics.
|
|
|
|
// Inuse is bytes used now.
|
|
|
|
// Sys is bytes obtained from system.
|
|
|
|
StackInuse uint64 // bytes used by stack allocator
|
|
|
|
StackSys uint64
|
|
|
|
MSpanInuse uint64 // mspan structures
|
|
|
|
MSpanSys uint64
|
|
|
|
MCacheInuse uint64 // mcache structures
|
|
|
|
MCacheSys uint64
|
|
|
|
BuckHashSys uint64 // profiling bucket hash table
|
|
|
|
GCSys uint64 // GC metadata
|
|
|
|
OtherSys uint64 // other system allocations
|
|
|
|
|
|
|
|
// Garbage collector statistics.
|
2015-07-29 12:02:34 -06:00
|
|
|
NextGC uint64 // next collection will happen when HeapAlloc ≥ this amount
|
|
|
|
LastGC uint64 // end time of last collection (nanoseconds since 1970)
|
|
|
|
PauseTotalNs uint64
|
|
|
|
PauseNs [256]uint64 // circular buffer of recent GC pause durations, most recent at [(NumGC+255)%256]
|
|
|
|
PauseEnd [256]uint64 // circular buffer of recent GC pause end times
|
|
|
|
NumGC uint32
|
|
|
|
GCCPUFraction float64 // fraction of CPU time used by GC
|
|
|
|
EnableGC bool
|
|
|
|
DebugGC bool
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Per-size allocation statistics.
|
|
|
|
// 61 is NumSizeClasses in the C code.
|
|
|
|
BySize [61]struct {
|
|
|
|
Size uint32
|
|
|
|
Mallocs uint64
|
|
|
|
Frees uint64
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Size of the trailing by_size array differs between Go and C,
|
|
|
|
// and all data after by_size is local to runtime, not exported.
|
|
|
|
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
|
|
|
|
// sizeof_C_MStats is what C thinks about size of Go struct.
|
|
|
|
var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
var memStats MemStats
|
|
|
|
if sizeof_C_MStats != unsafe.Sizeof(memStats) {
|
|
|
|
println(sizeof_C_MStats, unsafe.Sizeof(memStats))
|
|
|
|
throw("MStats vs MemStatsType size mismatch")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadMemStats populates m with memory allocator statistics.
|
|
|
|
func ReadMemStats(m *MemStats) {
|
2015-05-15 14:00:50 -06:00
|
|
|
stopTheWorld("read mem stats")
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
systemstack(func() {
|
|
|
|
readmemstats_m(m)
|
|
|
|
})
|
|
|
|
|
2015-05-15 14:00:50 -06:00
|
|
|
startTheWorld()
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func readmemstats_m(stats *MemStats) {
|
|
|
|
updatememstats(nil)
|
|
|
|
|
|
|
|
// Size of the trailing by_size array differs between Go and C,
|
|
|
|
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
|
|
|
|
memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
|
|
|
|
|
|
|
|
// Stack numbers are part of the heap numbers, separate those out for user consumption
|
2015-04-27 12:50:42 -06:00
|
|
|
stats.StackSys += stats.StackInuse
|
2015-02-19 11:38:46 -07:00
|
|
|
stats.HeapInuse -= stats.StackInuse
|
|
|
|
stats.HeapSys -= stats.StackInuse
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:linkname readGCStats runtime/debug.readGCStats
|
|
|
|
func readGCStats(pauses *[]uint64) {
|
|
|
|
systemstack(func() {
|
|
|
|
readGCStats_m(pauses)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func readGCStats_m(pauses *[]uint64) {
|
|
|
|
p := *pauses
|
|
|
|
// Calling code in runtime/debug should make the slice large enough.
|
|
|
|
if cap(p) < len(memstats.pause_ns)+3 {
|
|
|
|
throw("short slice passed to readGCStats")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
|
|
|
|
lock(&mheap_.lock)
|
|
|
|
|
|
|
|
n := memstats.numgc
|
|
|
|
if n > uint32(len(memstats.pause_ns)) {
|
|
|
|
n = uint32(len(memstats.pause_ns))
|
|
|
|
}
|
|
|
|
|
|
|
|
// The pause buffer is circular. The most recent pause is at
|
|
|
|
// pause_ns[(numgc-1)%len(pause_ns)], and then backward
|
|
|
|
// from there to go back farther in time. We deliver the times
|
|
|
|
// most recent first (in p[0]).
|
|
|
|
p = p[:cap(p)]
|
|
|
|
for i := uint32(0); i < n; i++ {
|
|
|
|
j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
|
|
|
|
p[i] = memstats.pause_ns[j]
|
|
|
|
p[n+i] = memstats.pause_end[j]
|
|
|
|
}
|
|
|
|
|
|
|
|
p[n+n] = memstats.last_gc
|
|
|
|
p[n+n+1] = uint64(memstats.numgc)
|
|
|
|
p[n+n+2] = memstats.pause_total_ns
|
|
|
|
unlock(&mheap_.lock)
|
|
|
|
*pauses = p[:n+n+3]
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func updatememstats(stats *gcstats) {
|
|
|
|
if stats != nil {
|
|
|
|
*stats = gcstats{}
|
|
|
|
}
|
|
|
|
for mp := allm; mp != nil; mp = mp.alllink {
|
|
|
|
if stats != nil {
|
|
|
|
src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
|
|
|
|
dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
|
|
|
|
for i, v := range src {
|
|
|
|
dst[i] += v
|
|
|
|
}
|
|
|
|
mp.gcstats = gcstats{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
|
|
|
|
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
|
|
|
|
memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
|
|
|
|
memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
|
|
|
|
|
|
|
|
// Calculate memory allocator stats.
|
|
|
|
// During program execution we only count number of frees and amount of freed memory.
|
|
|
|
// Current number of alive object in the heap and amount of alive heap memory
|
|
|
|
// are calculated by scanning all spans.
|
|
|
|
// Total number of mallocs is calculated as number of frees plus number of alive objects.
|
|
|
|
// Similarly, total amount of allocated memory is calculated as amount of freed memory
|
|
|
|
// plus amount of alive heap memory.
|
|
|
|
memstats.alloc = 0
|
|
|
|
memstats.total_alloc = 0
|
|
|
|
memstats.nmalloc = 0
|
|
|
|
memstats.nfree = 0
|
|
|
|
for i := 0; i < len(memstats.by_size); i++ {
|
|
|
|
memstats.by_size[i].nmalloc = 0
|
|
|
|
memstats.by_size[i].nfree = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush MCache's to MCentral.
|
|
|
|
systemstack(flushallmcaches)
|
|
|
|
|
|
|
|
// Aggregate local stats.
|
|
|
|
cachestats()
|
|
|
|
|
|
|
|
// Scan all spans and count number of alive objects.
|
|
|
|
lock(&mheap_.lock)
|
|
|
|
for i := uint32(0); i < mheap_.nspan; i++ {
|
|
|
|
s := h_allspans[i]
|
|
|
|
if s.state != mSpanInUse {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if s.sizeclass == 0 {
|
|
|
|
memstats.nmalloc++
|
|
|
|
memstats.alloc += uint64(s.elemsize)
|
|
|
|
} else {
|
|
|
|
memstats.nmalloc += uint64(s.ref)
|
|
|
|
memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
|
|
|
|
memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unlock(&mheap_.lock)
|
|
|
|
|
|
|
|
// Aggregate by size class.
|
|
|
|
smallfree := uint64(0)
|
|
|
|
memstats.nfree = mheap_.nlargefree
|
|
|
|
for i := 0; i < len(memstats.by_size); i++ {
|
|
|
|
memstats.nfree += mheap_.nsmallfree[i]
|
|
|
|
memstats.by_size[i].nfree = mheap_.nsmallfree[i]
|
|
|
|
memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
|
|
|
|
smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
|
|
|
|
}
|
|
|
|
memstats.nfree += memstats.tinyallocs
|
|
|
|
memstats.nmalloc += memstats.nfree
|
|
|
|
|
|
|
|
// Calculate derived stats.
|
|
|
|
memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
|
|
|
|
memstats.heap_alloc = memstats.alloc
|
|
|
|
memstats.heap_objects = memstats.nmalloc - memstats.nfree
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func cachestats() {
|
|
|
|
for i := 0; ; i++ {
|
|
|
|
p := allp[i]
|
|
|
|
if p == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
c := p.mcache
|
|
|
|
if c == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
purgecachedstats(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func flushallmcaches() {
|
|
|
|
for i := 0; ; i++ {
|
|
|
|
p := allp[i]
|
|
|
|
if p == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
c := p.mcache
|
|
|
|
if c == nil {
|
|
|
|
continue
|
|
|
|
}
|
2015-11-11 17:13:51 -07:00
|
|
|
c.releaseAll()
|
2015-02-19 11:38:46 -07:00
|
|
|
stackcache_clear(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
func purgecachedstats(c *mcache) {
|
|
|
|
// Protected by either heap or GC lock.
|
|
|
|
h := &mheap_
|
2015-05-04 14:10:49 -06:00
|
|
|
memstats.heap_scan += uint64(c.local_scan)
|
|
|
|
c.local_scan = 0
|
2015-02-19 11:38:46 -07:00
|
|
|
memstats.tinyallocs += uint64(c.local_tinyallocs)
|
|
|
|
c.local_tinyallocs = 0
|
|
|
|
memstats.nlookup += uint64(c.local_nlookup)
|
|
|
|
c.local_nlookup = 0
|
|
|
|
h.largefree += uint64(c.local_largefree)
|
|
|
|
c.local_largefree = 0
|
|
|
|
h.nlargefree += uint64(c.local_nlargefree)
|
|
|
|
c.local_nlargefree = 0
|
|
|
|
for i := 0; i < len(c.local_nsmallfree); i++ {
|
|
|
|
h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
|
|
|
|
c.local_nsmallfree[i] = 0
|
|
|
|
}
|
|
|
|
}
|
2015-04-16 15:32:18 -06:00
|
|
|
|
|
|
|
// Atomically increases a given *system* memory stat. We are counting on this
|
|
|
|
// stat never overflowing a uintptr, so this function must only be used for
|
|
|
|
// system memory stats.
|
|
|
|
//
|
|
|
|
// The current implementation for little endian architectures is based on
|
|
|
|
// xadduintptr(), which is less than ideal: xadd64() should really be used.
|
|
|
|
// Using xadduintptr() is a stop-gap solution until arm supports xadd64() that
|
|
|
|
// doesn't use locks. (Locks are a problem as they require a valid G, which
|
|
|
|
// restricts their useability.)
|
|
|
|
//
|
|
|
|
// A side-effect of using xadduintptr() is that we need to check for
|
|
|
|
// overflow errors.
|
|
|
|
//go:nosplit
|
|
|
|
func mSysStatInc(sysStat *uint64, n uintptr) {
|
2015-11-11 10:39:30 -07:00
|
|
|
if sys.BigEndian != 0 {
|
2015-11-02 12:09:24 -07:00
|
|
|
atomic.Xadd64(sysStat, int64(n))
|
2015-04-16 15:32:18 -06:00
|
|
|
return
|
|
|
|
}
|
2015-11-02 12:09:24 -07:00
|
|
|
if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n {
|
2015-04-16 15:32:18 -06:00
|
|
|
print("runtime: stat overflow: val ", val, ", n ", n, "\n")
|
|
|
|
exit(2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Atomically decreases a given *system* memory stat. Same comments as
|
|
|
|
// mSysStatInc apply.
|
|
|
|
//go:nosplit
|
|
|
|
func mSysStatDec(sysStat *uint64, n uintptr) {
|
2015-11-11 10:39:30 -07:00
|
|
|
if sys.BigEndian != 0 {
|
2015-11-02 12:09:24 -07:00
|
|
|
atomic.Xadd64(sysStat, -int64(n))
|
2015-04-16 15:32:18 -06:00
|
|
|
return
|
|
|
|
}
|
2015-11-02 12:09:24 -07:00
|
|
|
if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n {
|
2015-04-16 15:32:18 -06:00
|
|
|
print("runtime: stat underflow: val ", val, ", n ", n, "\n")
|
|
|
|
exit(2)
|
|
|
|
}
|
|
|
|
}
|