mirror of
https://github.com/golang/go
synced 2024-11-19 23:24:45 -07:00
87d939dee8
Currently, we update memstats.heap_live from mcache.local_cachealloc whenever we lock the heap (e.g., to obtain a fresh span or to release an unused span). However, under the right circumstances, local_cachealloc can accumulate allocations up to the size of the *entire heap* without flushing them to heap_live. Specifically, since span allocations from an mcentral don't lock the heap, if a large number of pages are held in an mcentral and the application continues to use and free objects of that size class (e.g., the BinaryTree17 benchmark), local_cachealloc won't be flushed until the mcentral runs out of spans. This is a problem because, unlike many of the memory statistics that are purely informative, heap_live is used to determine when the garbage collector should start and how hard it should work. This commit eliminates local_cachealloc, instead atomically updating heap_live directly. To control contention, we do this only when obtaining a span from an mcentral. Furthermore, we make heap_live conservative: allocating a span assumes that all free slots in that span will be used and accounts for these when the span is allocated, *before* the objects themselves are. This is important because 1) this triggers the GC earlier than necessary rather than potentially too late and 2) this leads to a conservative GC rate rather than a GC rate that is potentially too low. Alternatively, we could have flushed local_cachealloc when it passed some threshold, but this would require determining a threshold and would cause heap_live to underestimate the true value rather than overestimate. Fixes #12199. name old time/op new time/op delta BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19) FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14) FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18) FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18) FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18) FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19) FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19) FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18) GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19) GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19) Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20) Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19) HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19) JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19) JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19) Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18) GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19) RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18) RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18) RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19) RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19) RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20) RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19) RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17) RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19) Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17) Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20) TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20) TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19) [Geo mean] 62.0µs 61.8µs -0.44% name old time/op new time/op delta XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18) Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0 Reviewed-on: https://go-review.googlesource.com/17748 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
144 lines
4.1 KiB
Go
144 lines
4.1 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package runtime
|
|
|
|
import "unsafe"
|
|
|
|
// Per-thread (in Go, per-P) cache for small objects.
|
|
// No locking needed because it is per-thread (per-P).
|
|
//
|
|
// mcaches are allocated from non-GC'd memory, so any heap pointers
|
|
// must be specially handled.
|
|
type mcache struct {
|
|
// The following members are accessed on every malloc,
|
|
// so they are grouped here for better caching.
|
|
next_sample int32 // trigger heap sample after allocating this many bytes
|
|
local_scan uintptr // bytes of scannable heap allocated
|
|
|
|
// Allocator cache for tiny objects w/o pointers.
|
|
// See "Tiny allocator" comment in malloc.go.
|
|
|
|
// tiny points to the beginning of the current tiny block, or
|
|
// nil if there is no current tiny block.
|
|
//
|
|
// tiny is a heap pointer. Since mcache is in non-GC'd memory,
|
|
// we handle it by clearing it in releaseAll during mark
|
|
// termination.
|
|
tiny uintptr
|
|
tinyoffset uintptr
|
|
local_tinyallocs uintptr // number of tiny allocs not counted in other stats
|
|
|
|
// The rest is not accessed on every malloc.
|
|
alloc [_NumSizeClasses]*mspan // spans to allocate from
|
|
|
|
stackcache [_NumStackOrders]stackfreelist
|
|
|
|
// Local allocator stats, flushed during GC.
|
|
local_nlookup uintptr // number of pointer lookups
|
|
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
|
|
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
|
|
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
|
|
}
|
|
|
|
// A gclink is a node in a linked list of blocks, like mlink,
|
|
// but it is opaque to the garbage collector.
|
|
// The GC does not trace the pointers during collection,
|
|
// and the compiler does not emit write barriers for assignments
|
|
// of gclinkptr values. Code should store references to gclinks
|
|
// as gclinkptr, not as *gclink.
|
|
type gclink struct {
|
|
next gclinkptr
|
|
}
|
|
|
|
// A gclinkptr is a pointer to a gclink, but it is opaque
|
|
// to the garbage collector.
|
|
type gclinkptr uintptr
|
|
|
|
// ptr returns the *gclink form of p.
|
|
// The result should be used for accessing fields, not stored
|
|
// in other data structures.
|
|
func (p gclinkptr) ptr() *gclink {
|
|
return (*gclink)(unsafe.Pointer(p))
|
|
}
|
|
|
|
type stackfreelist struct {
|
|
list gclinkptr // linked list of free stacks
|
|
size uintptr // total size of stacks in list
|
|
}
|
|
|
|
// dummy MSpan that contains no free objects.
|
|
var emptymspan mspan
|
|
|
|
func allocmcache() *mcache {
|
|
lock(&mheap_.lock)
|
|
c := (*mcache)(mheap_.cachealloc.alloc())
|
|
unlock(&mheap_.lock)
|
|
memclr(unsafe.Pointer(c), unsafe.Sizeof(*c))
|
|
for i := 0; i < _NumSizeClasses; i++ {
|
|
c.alloc[i] = &emptymspan
|
|
}
|
|
c.next_sample = nextSample()
|
|
return c
|
|
}
|
|
|
|
func freemcache(c *mcache) {
|
|
systemstack(func() {
|
|
c.releaseAll()
|
|
stackcache_clear(c)
|
|
|
|
// NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate
|
|
// with the stealing of gcworkbufs during garbage collection to avoid
|
|
// a race where the workbuf is double-freed.
|
|
// gcworkbuffree(c.gcworkbuf)
|
|
|
|
lock(&mheap_.lock)
|
|
purgecachedstats(c)
|
|
mheap_.cachealloc.free(unsafe.Pointer(c))
|
|
unlock(&mheap_.lock)
|
|
})
|
|
}
|
|
|
|
// Gets a span that has a free object in it and assigns it
|
|
// to be the cached span for the given sizeclass. Returns this span.
|
|
func (c *mcache) refill(sizeclass int32) *mspan {
|
|
_g_ := getg()
|
|
|
|
_g_.m.locks++
|
|
// Return the current cached span to the central lists.
|
|
s := c.alloc[sizeclass]
|
|
if s.freelist.ptr() != nil {
|
|
throw("refill on a nonempty span")
|
|
}
|
|
if s != &emptymspan {
|
|
s.incache = false
|
|
}
|
|
|
|
// Get a new cached span from the central lists.
|
|
s = mheap_.central[sizeclass].mcentral.cacheSpan()
|
|
if s == nil {
|
|
throw("out of memory")
|
|
}
|
|
if s.freelist.ptr() == nil {
|
|
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
|
|
throw("empty span")
|
|
}
|
|
c.alloc[sizeclass] = s
|
|
_g_.m.locks--
|
|
return s
|
|
}
|
|
|
|
func (c *mcache) releaseAll() {
|
|
for i := 0; i < _NumSizeClasses; i++ {
|
|
s := c.alloc[i]
|
|
if s != &emptymspan {
|
|
mheap_.central[i].mcentral.uncacheSpan(s)
|
|
c.alloc[i] = &emptymspan
|
|
}
|
|
}
|
|
// Clear tinyalloc pool.
|
|
c.tiny = 0
|
|
c.tinyoffset = 0
|
|
}
|