diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index db589c3f8fa..7747e5409c9 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1363,6 +1363,19 @@ func gcStart(trigger gcTrigger) { // This is protected by markDoneSema. var gcMarkDoneFlushed uint32 +// debugCachedWork enables extra checks for debugging premature mark +// termination. +// +// For debugging issue #27993. +const debugCachedWork = true + +// gcWorkPauseGen is for debugging the mark completion algorithm. +// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. +// Only used if debugCachedWork is true. +// +// For debugging issue #27993. +var gcWorkPauseGen uint32 = 1 + // gcMarkDone transitions the GC from mark to mark termination if all // reachable objects have been marked (that is, there are no grey // objects and can be no more in the future). Otherwise, it flushes @@ -1408,6 +1421,14 @@ top: // Flush the write barrier buffer, since this may add // work to the gcWork. wbBufFlush1(_p_) + // For debugging, shrink the write barrier + // buffer so it flushes immediately. + // wbBuf.reset will keep it at this size as + // long as throwOnGCWork is set. + if debugCachedWork { + b := &_p_.wbBuf + b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) + } // Flush the gcWork, since this may create global work // and set the flushedWork flag. // @@ -1418,11 +1439,23 @@ top: if _p_.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) _p_.gcw.flushedWork = false + } else if debugCachedWork { + // For debugging, freeze the gcWork + // until we know whether we've reached + // completion or not. If we think + // we've reached completion, but + // there's a paused gcWork, then + // that's a bug. + _p_.gcw.pauseGen = gcWorkPauseGen } }) }) if gcMarkDoneFlushed != 0 { + if debugCachedWork { + // Release paused gcWorks. + atomic.Xadd(&gcWorkPauseGen, 1) + } // More grey objects were discovered since the // previous termination check, so there may be more // work to do. Keep going. It's possible the @@ -1431,7 +1464,12 @@ top: goto top } - throwOnGCWork = true + if debugCachedWork { + throwOnGCWork = true + // Release paused gcWorks. If there are any, they + // should now observe throwOnGCWork and panic. + atomic.Xadd(&gcWorkPauseGen, 1) + } // There was no global work, no local work, and no Ps // communicated work since we took markDoneSema. Therefore @@ -1449,6 +1487,30 @@ top: // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. + if debugCachedWork { + // For debugging, double check that no work was added after we + // went around above and disable write barrier buffering. + for _, p := range allp { + gcw := &p.gcw + if !gcw.empty() { + printlock() + print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) + if gcw.wbuf1 == nil { + print(" wbuf1=") + } else { + print(" wbuf1.n=", gcw.wbuf1.nobj) + } + if gcw.wbuf2 == nil { + print(" wbuf2=") + } else { + print(" wbuf2.n=", gcw.wbuf2.nobj) + } + print("\n") + throw("throwOnGCWork") + } + } + } + // Disable assists and background workers. We must do // this before waking blocked assists. atomic.Store(&gcBlackenEnabled, 0) diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index da2129ee508..8a77ff55e4d 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -93,6 +93,10 @@ type gcWork struct { // termination check. Specifically, this indicates that this // gcWork may have communicated work to another gcWork. flushedWork bool + + // pauseGen causes put operations to spin while pauseGen == + // gcWorkPauseGen if debugCachedWork is true. + pauseGen uint32 } // Most of the methods of gcWork are go:nowritebarrierrec because the @@ -111,13 +115,21 @@ func (w *gcWork) init() { w.wbuf2 = wbuf2 } +func (w *gcWork) checkPut() { + if debugCachedWork { + for atomic.Load(&gcWorkPauseGen) == w.pauseGen { + } + if throwOnGCWork { + throw("throwOnGCWork") + } + } +} + // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object or an oblet. //go:nowritebarrierrec func (w *gcWork) put(obj uintptr) { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() flushed := false wbuf := w.wbuf1 @@ -153,9 +165,7 @@ func (w *gcWork) put(obj uintptr) { // otherwise it returns false and the caller needs to call put. //go:nowritebarrierrec func (w *gcWork) putFast(obj uintptr) bool { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() wbuf := w.wbuf1 if wbuf == nil { @@ -178,9 +188,7 @@ func (w *gcWork) putBatch(obj []uintptr) { return } - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() flushed := false wbuf := w.wbuf1 @@ -303,16 +311,12 @@ func (w *gcWork) balance() { return } if wbuf := w.wbuf2; wbuf.nobj != 0 { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() putfull(wbuf) w.flushedWork = true w.wbuf2 = getempty() } else if wbuf := w.wbuf1; wbuf.nobj > 4 { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() w.wbuf1 = handoff(wbuf) w.flushedWork = true // handoff did putfull } else { diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go index c91cea254e9..a698493a0a2 100644 --- a/src/runtime/mwbbuf.go +++ b/src/runtime/mwbbuf.go @@ -79,7 +79,7 @@ const ( func (b *wbBuf) reset() { start := uintptr(unsafe.Pointer(&b.buf[0])) b.next = start - if writeBarrier.cgo { + if writeBarrier.cgo || (debugCachedWork && throwOnGCWork) { // Effectively disable the buffer by forcing a flush // on every barrier. b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))