1
0
mirror of https://github.com/golang/go synced 2024-11-19 23:24:45 -07:00
go/src/runtime/mcache.go
Austin Clements 87d939dee8 runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.

This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.

This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.

Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.

Fixes #12199.

name                      old time/op    new time/op    delta
BinaryTree17-12              2.88s ± 4%     2.88s ± 1%    ~     (p=0.470 n=19+19)
Fannkuch11-12                2.48s ± 1%     2.48s ± 1%    ~     (p=0.243 n=16+19)
FmtFprintfEmpty-12          50.9ns ± 2%    50.7ns ± 1%    ~     (p=0.238 n=15+14)
FmtFprintfString-12          175ns ± 1%     171ns ± 1%  -2.48%  (p=0.000 n=18+18)
FmtFprintfInt-12             159ns ± 1%     158ns ± 1%  -0.78%  (p=0.000 n=19+18)
FmtFprintfIntInt-12          270ns ± 1%     265ns ± 2%  -1.67%  (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12     235ns ± 1%     234ns ± 0%    ~     (p=0.362 n=18+19)
FmtFprintfFloat-12           309ns ± 1%     308ns ± 1%  -0.41%  (p=0.001 n=18+19)
FmtManyArgs-12              1.10µs ± 1%    1.08µs ± 0%  -1.96%  (p=0.000 n=19+18)
GobDecode-12                7.81ms ± 1%    7.80ms ± 1%    ~     (p=0.425 n=18+19)
GobEncode-12                6.53ms ± 1%    6.53ms ± 1%    ~     (p=0.817 n=19+19)
Gzip-12                      312ms ± 1%     312ms ± 2%    ~     (p=0.967 n=19+20)
Gunzip-12                   42.0ms ± 1%    41.9ms ± 1%    ~     (p=0.172 n=19+19)
HTTPClientServer-12         63.7µs ± 1%    63.8µs ± 1%    ~     (p=0.639 n=19+19)
JSONEncode-12               16.4ms ± 1%    16.4ms ± 1%    ~     (p=0.954 n=19+19)
JSONDecode-12               58.5ms ± 1%    57.8ms ± 1%  -1.27%  (p=0.000 n=18+19)
Mandelbrot200-12            3.86ms ± 1%    3.88ms ± 0%  +0.44%  (p=0.000 n=18+18)
GoParse-12                  3.67ms ± 2%    3.66ms ± 1%  -0.52%  (p=0.001 n=18+19)
RegexpMatchEasy0_32-12       100ns ± 1%     100ns ± 0%    ~     (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12       347ns ± 1%     347ns ± 1%    ~     (p=0.527 n=18+18)
RegexpMatchEasy1_32-12      83.7ns ± 2%    83.1ns ± 2%    ~     (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12       509ns ± 1%     505ns ± 1%  -0.75%  (p=0.000 n=18+19)
RegexpMatchMedium_32-12      130ns ± 2%     129ns ± 1%    ~     (p=0.962 n=20+20)
RegexpMatchMedium_1K-12     39.5µs ± 2%    39.4µs ± 1%    ~     (p=0.376 n=20+19)
RegexpMatchHard_32-12       2.04µs ± 0%    2.04µs ± 1%    ~     (p=0.195 n=18+17)
RegexpMatchHard_1K-12       61.4µs ± 1%    61.4µs ± 1%    ~     (p=0.885 n=19+19)
Revcomp-12                   540ms ± 2%     542ms ± 4%    ~     (p=0.552 n=19+17)
Template-12                 69.6ms ± 1%    71.2ms ± 1%  +2.39%  (p=0.000 n=20+20)
TimeParse-12                 357ns ± 1%     357ns ± 1%    ~     (p=0.883 n=18+20)
TimeFormat-12                379ns ± 1%     362ns ± 1%  -4.53%  (p=0.000 n=18+19)
[Geo mean]                  62.0µs         61.8µs       -0.44%

name              old time/op  new time/op  delta
XBenchGarbage-12  5.89ms ± 2%  5.81ms ± 2%  -1.41%  (p=0.000 n=19+18)

Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-15 16:16:08 +00:00

144 lines
4.1 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import "unsafe"
// Per-thread (in Go, per-P) cache for small objects.
// No locking needed because it is per-thread (per-P).
//
// mcaches are allocated from non-GC'd memory, so any heap pointers
// must be specially handled.
type mcache struct {
// The following members are accessed on every malloc,
// so they are grouped here for better caching.
next_sample int32 // trigger heap sample after allocating this many bytes
local_scan uintptr // bytes of scannable heap allocated
// Allocator cache for tiny objects w/o pointers.
// See "Tiny allocator" comment in malloc.go.
// tiny points to the beginning of the current tiny block, or
// nil if there is no current tiny block.
//
// tiny is a heap pointer. Since mcache is in non-GC'd memory,
// we handle it by clearing it in releaseAll during mark
// termination.
tiny uintptr
tinyoffset uintptr
local_tinyallocs uintptr // number of tiny allocs not counted in other stats
// The rest is not accessed on every malloc.
alloc [_NumSizeClasses]*mspan // spans to allocate from
stackcache [_NumStackOrders]stackfreelist
// Local allocator stats, flushed during GC.
local_nlookup uintptr // number of pointer lookups
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
}
// A gclink is a node in a linked list of blocks, like mlink,
// but it is opaque to the garbage collector.
// The GC does not trace the pointers during collection,
// and the compiler does not emit write barriers for assignments
// of gclinkptr values. Code should store references to gclinks
// as gclinkptr, not as *gclink.
type gclink struct {
next gclinkptr
}
// A gclinkptr is a pointer to a gclink, but it is opaque
// to the garbage collector.
type gclinkptr uintptr
// ptr returns the *gclink form of p.
// The result should be used for accessing fields, not stored
// in other data structures.
func (p gclinkptr) ptr() *gclink {
return (*gclink)(unsafe.Pointer(p))
}
type stackfreelist struct {
list gclinkptr // linked list of free stacks
size uintptr // total size of stacks in list
}
// dummy MSpan that contains no free objects.
var emptymspan mspan
func allocmcache() *mcache {
lock(&mheap_.lock)
c := (*mcache)(mheap_.cachealloc.alloc())
unlock(&mheap_.lock)
memclr(unsafe.Pointer(c), unsafe.Sizeof(*c))
for i := 0; i < _NumSizeClasses; i++ {
c.alloc[i] = &emptymspan
}
c.next_sample = nextSample()
return c
}
func freemcache(c *mcache) {
systemstack(func() {
c.releaseAll()
stackcache_clear(c)
// NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate
// with the stealing of gcworkbufs during garbage collection to avoid
// a race where the workbuf is double-freed.
// gcworkbuffree(c.gcworkbuf)
lock(&mheap_.lock)
purgecachedstats(c)
mheap_.cachealloc.free(unsafe.Pointer(c))
unlock(&mheap_.lock)
})
}
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
func (c *mcache) refill(sizeclass int32) *mspan {
_g_ := getg()
_g_.m.locks++
// Return the current cached span to the central lists.
s := c.alloc[sizeclass]
if s.freelist.ptr() != nil {
throw("refill on a nonempty span")
}
if s != &emptymspan {
s.incache = false
}
// Get a new cached span from the central lists.
s = mheap_.central[sizeclass].mcentral.cacheSpan()
if s == nil {
throw("out of memory")
}
if s.freelist.ptr() == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
throw("empty span")
}
c.alloc[sizeclass] = s
_g_.m.locks--
return s
}
func (c *mcache) releaseAll() {
for i := 0; i < _NumSizeClasses; i++ {
s := c.alloc[i]
if s != &emptymspan {
mheap_.central[i].mcentral.uncacheSpan(s)
c.alloc[i] = &emptymspan
}
}
// Clear tinyalloc pool.
c.tiny = 0
c.tinyoffset = 0
}