diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 8486f69569..e20e92cdf4 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -4,7 +4,10 @@ package runtime -import "unsafe" +import ( + "runtime/internal/atomic" + "unsafe" +) // Per-thread (in Go, per-P) cache for small objects. // No locking needed because it is per-thread (per-P). @@ -42,6 +45,12 @@ type mcache struct { local_largefree uintptr // bytes freed for large objects (>maxsmallsize) local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) + + // flushGen indicates the sweepgen during which this mcache + // was last flushed. If flushGen != mheap_.sweepgen, the spans + // in this mcache are stale and need to the flushed so they + // can be swept. This is done in acquirep. + flushGen uint32 } // A gclink is a node in a linked list of blocks, like mlink, @@ -76,6 +85,7 @@ var emptymspan mspan func allocmcache() *mcache { lock(&mheap_.lock) c := (*mcache)(mheap_.cachealloc.alloc()) + c.flushGen = mheap_.sweepgen unlock(&mheap_.lock) for i := range c.alloc { c.alloc[i] = &emptymspan @@ -113,9 +123,12 @@ func (c *mcache) refill(spc spanClass) { if uintptr(s.allocCount) != s.nelems { throw("refill of span with free space remaining") } - if s != &emptymspan { - s.incache = false + // Mark this span as no longer cached. + if s.sweepgen != mheap_.sweepgen+3 { + throw("bad sweepgen in refill") + } + atomic.Store(&s.sweepgen, mheap_.sweepgen) } // Get a new cached span from the central lists. @@ -128,6 +141,10 @@ func (c *mcache) refill(spc spanClass) { throw("span has no free space") } + // Indicate that this span is cached and prevent asynchronous + // sweeping in the next sweep phase. + s.sweepgen = mheap_.sweepgen + 3 + c.alloc[spc] = s } @@ -143,3 +160,26 @@ func (c *mcache) releaseAll() { c.tiny = 0 c.tinyoffset = 0 } + +// prepareForSweep flushes c if the system has entered a new sweep phase +// since c was populated. This must happen between the sweep phase +// starting and the first allocation from c. +func (c *mcache) prepareForSweep() { + // Alternatively, instead of making sure we do this on every P + // between starting the world and allocating on that P, we + // could leave allocate-black on, allow allocation to continue + // as usual, use a ragged barrier at the beginning of sweep to + // ensure all cached spans are swept, and then disable + // allocate-black. However, with this approach it's difficult + // to avoid spilling mark bits into the *next* GC cycle. + sg := mheap_.sweepgen + if c.flushGen == sg { + return + } else if c.flushGen != sg-2 { + println("bad flushGen", c.flushGen, "in prepareForSweep; sweepgen", sg) + throw("bad flushGen") + } + c.releaseAll() + stackcache_clear(c) + atomic.Store(&c.flushGen, mheap_.sweepgen) // Synchronizes with gcStart +} diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index c1e0b472bc..9ca8e5d222 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -135,7 +135,6 @@ havespan: // heap_live changed. gcController.revise() } - s.incache = true freeByteBase := s.freeindex &^ (64 - 1) whichByte := freeByteBase / 8 // Init alloc bits cache. @@ -150,28 +149,54 @@ havespan: // Return span from an MCache. func (c *mcentral) uncacheSpan(s *mspan) { - lock(&c.lock) - - s.incache = false - if s.allocCount == 0 { throw("uncaching span but s.allocCount == 0") } cap := int32((s.npages << _PageShift) / s.elemsize) n := cap - int32(s.allocCount) + + // cacheSpan updated alloc assuming all objects on s were + // going to be allocated. Adjust for any that weren't. We must + // do this before potentially sweeping the span. if n > 0 { - c.empty.remove(s) - c.nonempty.insert(s) - // mCentral_CacheSpan conservatively counted - // unallocated slots in heap_live. Undo this. - atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) - // cacheSpan updated alloc assuming all objects on s - // were going to be allocated. Adjust for any that - // weren't. atomic.Xadd64(&c.nmalloc, -int64(n)) } - unlock(&c.lock) + + sg := mheap_.sweepgen + stale := s.sweepgen == sg+1 + if stale { + // Span was cached before sweep began. It's our + // responsibility to sweep it. + // + // Set sweepgen to indicate it's not cached but needs + // sweeping. sweep will set s.sweepgen to indicate s + // is swept. + s.sweepgen = sg - 1 + s.sweep(true) + // sweep may have freed objects, so recompute n. + n = cap - int32(s.allocCount) + } else { + // Indicate that s is no longer cached. + s.sweepgen = sg + } + + if n > 0 { + lock(&c.lock) + c.empty.remove(s) + c.nonempty.insert(s) + if !stale { + // mCentral_CacheSpan conservatively counted + // unallocated slots in heap_live. Undo this. + // + // If this span was cached before sweep, then + // heap_live was totally recomputed since + // caching this span, so we don't do this for + // stale spans. + atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) + } + unlock(&c.lock) + } } // freeSpan updates c and s after sweeping s. @@ -183,13 +208,13 @@ func (c *mcentral) uncacheSpan(s *mspan) { // If preserve=true, it does not move s (the caller // must take care of it). func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { - if s.incache { + if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 { throw("freeSpan given cached span") } s.needzero = 1 if preserve { - // preserve is set only when called from MCentral_CacheSpan above, + // preserve is set only when called from (un)cacheSpan above, // the span must be in the empty list. if !s.inList() { throw("can't preserve unlinked span") diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index d52c8cd791..83980e6020 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1262,6 +1262,14 @@ func gcStart(trigger gcTrigger) { traceGCStart() } + // Check that all Ps have finished deferred mcache flushes. + for _, p := range allp { + if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen { + println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen) + throw("p mcache not flushed") + } + } + gcBgMarkStartWorkers() gcResetMarkState() @@ -1606,6 +1614,16 @@ func gcMarkTermination(nextTriggerRatio float64) { // Free stack spans. This must be done between GC cycles. systemstack(freeStackSpans) + // Ensure all mcaches are flushed. Each P will flush its own + // mcache before allocating, but idle Ps may not. Since this + // is necessary to sweep all spans, we need to ensure all + // mcaches are flushed before we start the next GC cycle. + systemstack(func() { + forEachP(func(_p_ *p) { + _p_.mcache.prepareForSweep() + }) + }) + // Print gctrace before dropping worldsema. As soon as we drop // worldsema another cycle could start and smash the stats // we're trying to print. diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index cdbe988a1e..78a597f007 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -52,11 +52,7 @@ const ( // //go:nowritebarrier func gcMarkRootPrepare() { - if gcphase == _GCmarktermination { - work.nFlushCacheRoots = int(gomaxprocs) - } else { - work.nFlushCacheRoots = 0 - } + work.nFlushCacheRoots = 0 // Compute how many data and BSS root blocks there are. nBlocks := func(bytes uintptr) int { @@ -344,7 +340,8 @@ func markrootSpans(gcw *gcWork, shard int) { if s.state != mSpanInUse { continue } - if !useCheckmark && s.sweepgen != sg { + // Check that this span was swept (it may be cached or uncached). + if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { // sweepgen was updated (+2) during non-checkmark GC pass print("sweep ", s.sweepgen, " ", sg, "\n") throw("gc: unswept span") diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 5cdede002a..00950aede2 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -161,7 +161,8 @@ func (s *mspan) ensureSwept() { } sg := mheap_.sweepgen - if atomic.Load(&s.sweepgen) == sg { + spangen := atomic.Load(&s.sweepgen) + if spangen == sg || spangen == sg+3 { return } // The caller must be sure that the span is a mSpanInUse span. @@ -170,7 +171,11 @@ func (s *mspan) ensureSwept() { return } // unfortunate condition, and we don't have efficient means to wait - for atomic.Load(&s.sweepgen) != sg { + for { + spangen := atomic.Load(&s.sweepgen) + if spangen == sg || spangen == sg+3 { + break + } osyield() } } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 2dd66f7c2b..e29af677a2 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -317,6 +317,8 @@ type mspan struct { // if sweepgen == h->sweepgen - 2, the span needs sweeping // if sweepgen == h->sweepgen - 1, the span is currently being swept // if sweepgen == h->sweepgen, the span is swept and ready to use + // if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping + // if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached // h->sweepgen is incremented by 2 after every GC sweepgen uint32 @@ -324,7 +326,6 @@ type mspan struct { baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base allocCount uint16 // number of allocated objects spanclass spanClass // size class and noscan (uint8) - incache bool // being used by an mcache state mSpanState // mspaninuse etc needzero uint8 // needs to be zeroed before allocation divShift uint8 // for divide by elemsize - divMagic.shift @@ -1185,7 +1186,6 @@ func (span *mspan) init(base uintptr, npages uintptr) { span.npages = npages span.allocCount = 0 span.spanclass = 0 - span.incache = false span.elemsize = 0 span.state = mSpanDead span.unusedsince = 0 diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 0a7321254c..910918f4b4 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4119,6 +4119,7 @@ func procresize(nprocs int32) *p { if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs { // continue to use the current P _g_.m.p.ptr().status = _Prunning + _g_.m.p.ptr().mcache.prepareForSweep() } else { // release the current P and acquire allp[0] if _g_.m.p != 0 { @@ -4169,6 +4170,10 @@ func acquirep(_p_ *p) { _g_ := getg() _g_.m.mcache = _p_.mcache + // Perform deferred mcache flush before this P can allocate + // from a potentially stale mcache. + _p_.mcache.prepareForSweep() + if trace.enabled { traceProcStart() }