mirror of
https://github.com/golang/go
synced 2024-11-17 12:54:47 -07:00
runtime: flush mcaches lazily
Currently, all mcaches are flushed during STW mark termination as a root marking job. This is currently necessary because all spans must be out of these caches before sweeping begins to avoid races with allocation and to ensure the spans are in the state expected by sweeping. We do it as a root marking job because mcache flushing is somewhat expensive and O(GOMAXPROCS) and this parallelizes the work across the Ps. However, it's also the last remaining root marking job performed during mark termination. This CL moves mcache flushing out of mark termination and performs it lazily. We keep track of the last sweepgen at which each mcache was flushed and as each P is woken from STW, it observes that its mcache is out-of-date and flushes it. The introduces a complication for spans cached in stale mcaches. These may now be observed by background or proportional sweeping or when attempting to add a finalizer, but aren't in a stable state. For example, they are likely to be on the wrong mcentral list. To fix this, this CL extends the sweepgen protocol to also capture whether a span is cached and, if so, whether or not its cache is stale. This protocol blocks asynchronous sweeping from touching cached spans and makes it the responsibility of mcache flushing to sweep the flushed spans. This eliminates the last mark termination root marking job, which means we can now eliminate that entire infrastructure. Updates #26903. This implements lazy mcache flushing. Change-Id: Iadda7aabe540b2026cffc5195da7be37d5b4125e Reviewed-on: https://go-review.googlesource.com/c/134783 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
457c8f4fe9
commit
873bd47dfb
@ -4,7 +4,10 @@
|
|||||||
|
|
||||||
package runtime
|
package runtime
|
||||||
|
|
||||||
import "unsafe"
|
import (
|
||||||
|
"runtime/internal/atomic"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
// Per-thread (in Go, per-P) cache for small objects.
|
// Per-thread (in Go, per-P) cache for small objects.
|
||||||
// No locking needed because it is per-thread (per-P).
|
// No locking needed because it is per-thread (per-P).
|
||||||
@ -42,6 +45,12 @@ type mcache struct {
|
|||||||
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
|
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
|
||||||
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
|
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
|
||||||
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
|
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
|
||||||
|
|
||||||
|
// flushGen indicates the sweepgen during which this mcache
|
||||||
|
// was last flushed. If flushGen != mheap_.sweepgen, the spans
|
||||||
|
// in this mcache are stale and need to the flushed so they
|
||||||
|
// can be swept. This is done in acquirep.
|
||||||
|
flushGen uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
// A gclink is a node in a linked list of blocks, like mlink,
|
// A gclink is a node in a linked list of blocks, like mlink,
|
||||||
@ -76,6 +85,7 @@ var emptymspan mspan
|
|||||||
func allocmcache() *mcache {
|
func allocmcache() *mcache {
|
||||||
lock(&mheap_.lock)
|
lock(&mheap_.lock)
|
||||||
c := (*mcache)(mheap_.cachealloc.alloc())
|
c := (*mcache)(mheap_.cachealloc.alloc())
|
||||||
|
c.flushGen = mheap_.sweepgen
|
||||||
unlock(&mheap_.lock)
|
unlock(&mheap_.lock)
|
||||||
for i := range c.alloc {
|
for i := range c.alloc {
|
||||||
c.alloc[i] = &emptymspan
|
c.alloc[i] = &emptymspan
|
||||||
@ -113,9 +123,12 @@ func (c *mcache) refill(spc spanClass) {
|
|||||||
if uintptr(s.allocCount) != s.nelems {
|
if uintptr(s.allocCount) != s.nelems {
|
||||||
throw("refill of span with free space remaining")
|
throw("refill of span with free space remaining")
|
||||||
}
|
}
|
||||||
|
|
||||||
if s != &emptymspan {
|
if s != &emptymspan {
|
||||||
s.incache = false
|
// Mark this span as no longer cached.
|
||||||
|
if s.sweepgen != mheap_.sweepgen+3 {
|
||||||
|
throw("bad sweepgen in refill")
|
||||||
|
}
|
||||||
|
atomic.Store(&s.sweepgen, mheap_.sweepgen)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get a new cached span from the central lists.
|
// Get a new cached span from the central lists.
|
||||||
@ -128,6 +141,10 @@ func (c *mcache) refill(spc spanClass) {
|
|||||||
throw("span has no free space")
|
throw("span has no free space")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Indicate that this span is cached and prevent asynchronous
|
||||||
|
// sweeping in the next sweep phase.
|
||||||
|
s.sweepgen = mheap_.sweepgen + 3
|
||||||
|
|
||||||
c.alloc[spc] = s
|
c.alloc[spc] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,3 +160,26 @@ func (c *mcache) releaseAll() {
|
|||||||
c.tiny = 0
|
c.tiny = 0
|
||||||
c.tinyoffset = 0
|
c.tinyoffset = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prepareForSweep flushes c if the system has entered a new sweep phase
|
||||||
|
// since c was populated. This must happen between the sweep phase
|
||||||
|
// starting and the first allocation from c.
|
||||||
|
func (c *mcache) prepareForSweep() {
|
||||||
|
// Alternatively, instead of making sure we do this on every P
|
||||||
|
// between starting the world and allocating on that P, we
|
||||||
|
// could leave allocate-black on, allow allocation to continue
|
||||||
|
// as usual, use a ragged barrier at the beginning of sweep to
|
||||||
|
// ensure all cached spans are swept, and then disable
|
||||||
|
// allocate-black. However, with this approach it's difficult
|
||||||
|
// to avoid spilling mark bits into the *next* GC cycle.
|
||||||
|
sg := mheap_.sweepgen
|
||||||
|
if c.flushGen == sg {
|
||||||
|
return
|
||||||
|
} else if c.flushGen != sg-2 {
|
||||||
|
println("bad flushGen", c.flushGen, "in prepareForSweep; sweepgen", sg)
|
||||||
|
throw("bad flushGen")
|
||||||
|
}
|
||||||
|
c.releaseAll()
|
||||||
|
stackcache_clear(c)
|
||||||
|
atomic.Store(&c.flushGen, mheap_.sweepgen) // Synchronizes with gcStart
|
||||||
|
}
|
||||||
|
@ -135,7 +135,6 @@ havespan:
|
|||||||
// heap_live changed.
|
// heap_live changed.
|
||||||
gcController.revise()
|
gcController.revise()
|
||||||
}
|
}
|
||||||
s.incache = true
|
|
||||||
freeByteBase := s.freeindex &^ (64 - 1)
|
freeByteBase := s.freeindex &^ (64 - 1)
|
||||||
whichByte := freeByteBase / 8
|
whichByte := freeByteBase / 8
|
||||||
// Init alloc bits cache.
|
// Init alloc bits cache.
|
||||||
@ -150,29 +149,55 @@ havespan:
|
|||||||
|
|
||||||
// Return span from an MCache.
|
// Return span from an MCache.
|
||||||
func (c *mcentral) uncacheSpan(s *mspan) {
|
func (c *mcentral) uncacheSpan(s *mspan) {
|
||||||
lock(&c.lock)
|
|
||||||
|
|
||||||
s.incache = false
|
|
||||||
|
|
||||||
if s.allocCount == 0 {
|
if s.allocCount == 0 {
|
||||||
throw("uncaching span but s.allocCount == 0")
|
throw("uncaching span but s.allocCount == 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
cap := int32((s.npages << _PageShift) / s.elemsize)
|
cap := int32((s.npages << _PageShift) / s.elemsize)
|
||||||
n := cap - int32(s.allocCount)
|
n := cap - int32(s.allocCount)
|
||||||
|
|
||||||
|
// cacheSpan updated alloc assuming all objects on s were
|
||||||
|
// going to be allocated. Adjust for any that weren't. We must
|
||||||
|
// do this before potentially sweeping the span.
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
c.empty.remove(s)
|
|
||||||
c.nonempty.insert(s)
|
|
||||||
// mCentral_CacheSpan conservatively counted
|
|
||||||
// unallocated slots in heap_live. Undo this.
|
|
||||||
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
|
|
||||||
// cacheSpan updated alloc assuming all objects on s
|
|
||||||
// were going to be allocated. Adjust for any that
|
|
||||||
// weren't.
|
|
||||||
atomic.Xadd64(&c.nmalloc, -int64(n))
|
atomic.Xadd64(&c.nmalloc, -int64(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sg := mheap_.sweepgen
|
||||||
|
stale := s.sweepgen == sg+1
|
||||||
|
if stale {
|
||||||
|
// Span was cached before sweep began. It's our
|
||||||
|
// responsibility to sweep it.
|
||||||
|
//
|
||||||
|
// Set sweepgen to indicate it's not cached but needs
|
||||||
|
// sweeping. sweep will set s.sweepgen to indicate s
|
||||||
|
// is swept.
|
||||||
|
s.sweepgen = sg - 1
|
||||||
|
s.sweep(true)
|
||||||
|
// sweep may have freed objects, so recompute n.
|
||||||
|
n = cap - int32(s.allocCount)
|
||||||
|
} else {
|
||||||
|
// Indicate that s is no longer cached.
|
||||||
|
s.sweepgen = sg
|
||||||
|
}
|
||||||
|
|
||||||
|
if n > 0 {
|
||||||
|
lock(&c.lock)
|
||||||
|
c.empty.remove(s)
|
||||||
|
c.nonempty.insert(s)
|
||||||
|
if !stale {
|
||||||
|
// mCentral_CacheSpan conservatively counted
|
||||||
|
// unallocated slots in heap_live. Undo this.
|
||||||
|
//
|
||||||
|
// If this span was cached before sweep, then
|
||||||
|
// heap_live was totally recomputed since
|
||||||
|
// caching this span, so we don't do this for
|
||||||
|
// stale spans.
|
||||||
|
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
|
||||||
|
}
|
||||||
unlock(&c.lock)
|
unlock(&c.lock)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// freeSpan updates c and s after sweeping s.
|
// freeSpan updates c and s after sweeping s.
|
||||||
// It sets s's sweepgen to the latest generation,
|
// It sets s's sweepgen to the latest generation,
|
||||||
@ -183,13 +208,13 @@ func (c *mcentral) uncacheSpan(s *mspan) {
|
|||||||
// If preserve=true, it does not move s (the caller
|
// If preserve=true, it does not move s (the caller
|
||||||
// must take care of it).
|
// must take care of it).
|
||||||
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
|
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
|
||||||
if s.incache {
|
if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 {
|
||||||
throw("freeSpan given cached span")
|
throw("freeSpan given cached span")
|
||||||
}
|
}
|
||||||
s.needzero = 1
|
s.needzero = 1
|
||||||
|
|
||||||
if preserve {
|
if preserve {
|
||||||
// preserve is set only when called from MCentral_CacheSpan above,
|
// preserve is set only when called from (un)cacheSpan above,
|
||||||
// the span must be in the empty list.
|
// the span must be in the empty list.
|
||||||
if !s.inList() {
|
if !s.inList() {
|
||||||
throw("can't preserve unlinked span")
|
throw("can't preserve unlinked span")
|
||||||
|
@ -1262,6 +1262,14 @@ func gcStart(trigger gcTrigger) {
|
|||||||
traceGCStart()
|
traceGCStart()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that all Ps have finished deferred mcache flushes.
|
||||||
|
for _, p := range allp {
|
||||||
|
if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen {
|
||||||
|
println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen)
|
||||||
|
throw("p mcache not flushed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gcBgMarkStartWorkers()
|
gcBgMarkStartWorkers()
|
||||||
|
|
||||||
gcResetMarkState()
|
gcResetMarkState()
|
||||||
@ -1606,6 +1614,16 @@ func gcMarkTermination(nextTriggerRatio float64) {
|
|||||||
// Free stack spans. This must be done between GC cycles.
|
// Free stack spans. This must be done between GC cycles.
|
||||||
systemstack(freeStackSpans)
|
systemstack(freeStackSpans)
|
||||||
|
|
||||||
|
// Ensure all mcaches are flushed. Each P will flush its own
|
||||||
|
// mcache before allocating, but idle Ps may not. Since this
|
||||||
|
// is necessary to sweep all spans, we need to ensure all
|
||||||
|
// mcaches are flushed before we start the next GC cycle.
|
||||||
|
systemstack(func() {
|
||||||
|
forEachP(func(_p_ *p) {
|
||||||
|
_p_.mcache.prepareForSweep()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
// Print gctrace before dropping worldsema. As soon as we drop
|
// Print gctrace before dropping worldsema. As soon as we drop
|
||||||
// worldsema another cycle could start and smash the stats
|
// worldsema another cycle could start and smash the stats
|
||||||
// we're trying to print.
|
// we're trying to print.
|
||||||
|
@ -52,11 +52,7 @@ const (
|
|||||||
//
|
//
|
||||||
//go:nowritebarrier
|
//go:nowritebarrier
|
||||||
func gcMarkRootPrepare() {
|
func gcMarkRootPrepare() {
|
||||||
if gcphase == _GCmarktermination {
|
|
||||||
work.nFlushCacheRoots = int(gomaxprocs)
|
|
||||||
} else {
|
|
||||||
work.nFlushCacheRoots = 0
|
work.nFlushCacheRoots = 0
|
||||||
}
|
|
||||||
|
|
||||||
// Compute how many data and BSS root blocks there are.
|
// Compute how many data and BSS root blocks there are.
|
||||||
nBlocks := func(bytes uintptr) int {
|
nBlocks := func(bytes uintptr) int {
|
||||||
@ -344,7 +340,8 @@ func markrootSpans(gcw *gcWork, shard int) {
|
|||||||
if s.state != mSpanInUse {
|
if s.state != mSpanInUse {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if !useCheckmark && s.sweepgen != sg {
|
// Check that this span was swept (it may be cached or uncached).
|
||||||
|
if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
|
||||||
// sweepgen was updated (+2) during non-checkmark GC pass
|
// sweepgen was updated (+2) during non-checkmark GC pass
|
||||||
print("sweep ", s.sweepgen, " ", sg, "\n")
|
print("sweep ", s.sweepgen, " ", sg, "\n")
|
||||||
throw("gc: unswept span")
|
throw("gc: unswept span")
|
||||||
|
@ -161,7 +161,8 @@ func (s *mspan) ensureSwept() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sg := mheap_.sweepgen
|
sg := mheap_.sweepgen
|
||||||
if atomic.Load(&s.sweepgen) == sg {
|
spangen := atomic.Load(&s.sweepgen)
|
||||||
|
if spangen == sg || spangen == sg+3 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// The caller must be sure that the span is a mSpanInUse span.
|
// The caller must be sure that the span is a mSpanInUse span.
|
||||||
@ -170,7 +171,11 @@ func (s *mspan) ensureSwept() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
// unfortunate condition, and we don't have efficient means to wait
|
// unfortunate condition, and we don't have efficient means to wait
|
||||||
for atomic.Load(&s.sweepgen) != sg {
|
for {
|
||||||
|
spangen := atomic.Load(&s.sweepgen)
|
||||||
|
if spangen == sg || spangen == sg+3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
osyield()
|
osyield()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -317,6 +317,8 @@ type mspan struct {
|
|||||||
// if sweepgen == h->sweepgen - 2, the span needs sweeping
|
// if sweepgen == h->sweepgen - 2, the span needs sweeping
|
||||||
// if sweepgen == h->sweepgen - 1, the span is currently being swept
|
// if sweepgen == h->sweepgen - 1, the span is currently being swept
|
||||||
// if sweepgen == h->sweepgen, the span is swept and ready to use
|
// if sweepgen == h->sweepgen, the span is swept and ready to use
|
||||||
|
// if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
|
||||||
|
// if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
|
||||||
// h->sweepgen is incremented by 2 after every GC
|
// h->sweepgen is incremented by 2 after every GC
|
||||||
|
|
||||||
sweepgen uint32
|
sweepgen uint32
|
||||||
@ -324,7 +326,6 @@ type mspan struct {
|
|||||||
baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
|
baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
|
||||||
allocCount uint16 // number of allocated objects
|
allocCount uint16 // number of allocated objects
|
||||||
spanclass spanClass // size class and noscan (uint8)
|
spanclass spanClass // size class and noscan (uint8)
|
||||||
incache bool // being used by an mcache
|
|
||||||
state mSpanState // mspaninuse etc
|
state mSpanState // mspaninuse etc
|
||||||
needzero uint8 // needs to be zeroed before allocation
|
needzero uint8 // needs to be zeroed before allocation
|
||||||
divShift uint8 // for divide by elemsize - divMagic.shift
|
divShift uint8 // for divide by elemsize - divMagic.shift
|
||||||
@ -1185,7 +1186,6 @@ func (span *mspan) init(base uintptr, npages uintptr) {
|
|||||||
span.npages = npages
|
span.npages = npages
|
||||||
span.allocCount = 0
|
span.allocCount = 0
|
||||||
span.spanclass = 0
|
span.spanclass = 0
|
||||||
span.incache = false
|
|
||||||
span.elemsize = 0
|
span.elemsize = 0
|
||||||
span.state = mSpanDead
|
span.state = mSpanDead
|
||||||
span.unusedsince = 0
|
span.unusedsince = 0
|
||||||
|
@ -4119,6 +4119,7 @@ func procresize(nprocs int32) *p {
|
|||||||
if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
|
if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
|
||||||
// continue to use the current P
|
// continue to use the current P
|
||||||
_g_.m.p.ptr().status = _Prunning
|
_g_.m.p.ptr().status = _Prunning
|
||||||
|
_g_.m.p.ptr().mcache.prepareForSweep()
|
||||||
} else {
|
} else {
|
||||||
// release the current P and acquire allp[0]
|
// release the current P and acquire allp[0]
|
||||||
if _g_.m.p != 0 {
|
if _g_.m.p != 0 {
|
||||||
@ -4169,6 +4170,10 @@ func acquirep(_p_ *p) {
|
|||||||
_g_ := getg()
|
_g_ := getg()
|
||||||
_g_.m.mcache = _p_.mcache
|
_g_.m.mcache = _p_.mcache
|
||||||
|
|
||||||
|
// Perform deferred mcache flush before this P can allocate
|
||||||
|
// from a potentially stale mcache.
|
||||||
|
_p_.mcache.prepareForSweep()
|
||||||
|
|
||||||
if trace.enabled {
|
if trace.enabled {
|
||||||
traceProcStart()
|
traceProcStart()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user