mirror of
https://github.com/golang/go
synced 2024-11-22 22:00:02 -07:00
runtime: check more work flushing races
This adds several new checks to help debug #27993. It adds a mechanism for freezing write barriers and gcWork puts during the mark completion algorithm. This way, if we do detect mark completion, we can catch any puts that happened during the completion algorithm. Based on build dashboard failures, this seems to be the window of time when these are happening. This also double-checks that all work buffers are empty immediately upon entering mark termination (much earlier than the current check). This is unlikely to trigger based on the current failures, but is a good safety net. Change-Id: I03f56c48c4322069e28c50fbc3c15b2fee2130c2 Reviewed-on: https://go-review.googlesource.com/c/151797 Run-TryBot: Austin Clements <austin@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
fbdaa96563
commit
438b9544a0
@ -1363,6 +1363,19 @@ func gcStart(trigger gcTrigger) {
|
||||
// This is protected by markDoneSema.
|
||||
var gcMarkDoneFlushed uint32
|
||||
|
||||
// debugCachedWork enables extra checks for debugging premature mark
|
||||
// termination.
|
||||
//
|
||||
// For debugging issue #27993.
|
||||
const debugCachedWork = true
|
||||
|
||||
// gcWorkPauseGen is for debugging the mark completion algorithm.
|
||||
// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen.
|
||||
// Only used if debugCachedWork is true.
|
||||
//
|
||||
// For debugging issue #27993.
|
||||
var gcWorkPauseGen uint32 = 1
|
||||
|
||||
// gcMarkDone transitions the GC from mark to mark termination if all
|
||||
// reachable objects have been marked (that is, there are no grey
|
||||
// objects and can be no more in the future). Otherwise, it flushes
|
||||
@ -1408,6 +1421,14 @@ top:
|
||||
// Flush the write barrier buffer, since this may add
|
||||
// work to the gcWork.
|
||||
wbBufFlush1(_p_)
|
||||
// For debugging, shrink the write barrier
|
||||
// buffer so it flushes immediately.
|
||||
// wbBuf.reset will keep it at this size as
|
||||
// long as throwOnGCWork is set.
|
||||
if debugCachedWork {
|
||||
b := &_p_.wbBuf
|
||||
b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
|
||||
}
|
||||
// Flush the gcWork, since this may create global work
|
||||
// and set the flushedWork flag.
|
||||
//
|
||||
@ -1418,11 +1439,23 @@ top:
|
||||
if _p_.gcw.flushedWork {
|
||||
atomic.Xadd(&gcMarkDoneFlushed, 1)
|
||||
_p_.gcw.flushedWork = false
|
||||
} else if debugCachedWork {
|
||||
// For debugging, freeze the gcWork
|
||||
// until we know whether we've reached
|
||||
// completion or not. If we think
|
||||
// we've reached completion, but
|
||||
// there's a paused gcWork, then
|
||||
// that's a bug.
|
||||
_p_.gcw.pauseGen = gcWorkPauseGen
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
if gcMarkDoneFlushed != 0 {
|
||||
if debugCachedWork {
|
||||
// Release paused gcWorks.
|
||||
atomic.Xadd(&gcWorkPauseGen, 1)
|
||||
}
|
||||
// More grey objects were discovered since the
|
||||
// previous termination check, so there may be more
|
||||
// work to do. Keep going. It's possible the
|
||||
@ -1431,7 +1464,12 @@ top:
|
||||
goto top
|
||||
}
|
||||
|
||||
if debugCachedWork {
|
||||
throwOnGCWork = true
|
||||
// Release paused gcWorks. If there are any, they
|
||||
// should now observe throwOnGCWork and panic.
|
||||
atomic.Xadd(&gcWorkPauseGen, 1)
|
||||
}
|
||||
|
||||
// There was no global work, no local work, and no Ps
|
||||
// communicated work since we took markDoneSema. Therefore
|
||||
@ -1449,6 +1487,30 @@ top:
|
||||
// below. The important thing is that the wb remains active until
|
||||
// all marking is complete. This includes writes made by the GC.
|
||||
|
||||
if debugCachedWork {
|
||||
// For debugging, double check that no work was added after we
|
||||
// went around above and disable write barrier buffering.
|
||||
for _, p := range allp {
|
||||
gcw := &p.gcw
|
||||
if !gcw.empty() {
|
||||
printlock()
|
||||
print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork)
|
||||
if gcw.wbuf1 == nil {
|
||||
print(" wbuf1=<nil>")
|
||||
} else {
|
||||
print(" wbuf1.n=", gcw.wbuf1.nobj)
|
||||
}
|
||||
if gcw.wbuf2 == nil {
|
||||
print(" wbuf2=<nil>")
|
||||
} else {
|
||||
print(" wbuf2.n=", gcw.wbuf2.nobj)
|
||||
}
|
||||
print("\n")
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disable assists and background workers. We must do
|
||||
// this before waking blocked assists.
|
||||
atomic.Store(&gcBlackenEnabled, 0)
|
||||
|
@ -93,6 +93,10 @@ type gcWork struct {
|
||||
// termination check. Specifically, this indicates that this
|
||||
// gcWork may have communicated work to another gcWork.
|
||||
flushedWork bool
|
||||
|
||||
// pauseGen causes put operations to spin while pauseGen ==
|
||||
// gcWorkPauseGen if debugCachedWork is true.
|
||||
pauseGen uint32
|
||||
}
|
||||
|
||||
// Most of the methods of gcWork are go:nowritebarrierrec because the
|
||||
@ -111,13 +115,21 @@ func (w *gcWork) init() {
|
||||
w.wbuf2 = wbuf2
|
||||
}
|
||||
|
||||
func (w *gcWork) checkPut() {
|
||||
if debugCachedWork {
|
||||
for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
|
||||
}
|
||||
if throwOnGCWork {
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// put enqueues a pointer for the garbage collector to trace.
|
||||
// obj must point to the beginning of a heap object or an oblet.
|
||||
//go:nowritebarrierrec
|
||||
func (w *gcWork) put(obj uintptr) {
|
||||
if throwOnGCWork {
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
w.checkPut()
|
||||
|
||||
flushed := false
|
||||
wbuf := w.wbuf1
|
||||
@ -153,9 +165,7 @@ func (w *gcWork) put(obj uintptr) {
|
||||
// otherwise it returns false and the caller needs to call put.
|
||||
//go:nowritebarrierrec
|
||||
func (w *gcWork) putFast(obj uintptr) bool {
|
||||
if throwOnGCWork {
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
w.checkPut()
|
||||
|
||||
wbuf := w.wbuf1
|
||||
if wbuf == nil {
|
||||
@ -178,9 +188,7 @@ func (w *gcWork) putBatch(obj []uintptr) {
|
||||
return
|
||||
}
|
||||
|
||||
if throwOnGCWork {
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
w.checkPut()
|
||||
|
||||
flushed := false
|
||||
wbuf := w.wbuf1
|
||||
@ -303,16 +311,12 @@ func (w *gcWork) balance() {
|
||||
return
|
||||
}
|
||||
if wbuf := w.wbuf2; wbuf.nobj != 0 {
|
||||
if throwOnGCWork {
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
w.checkPut()
|
||||
putfull(wbuf)
|
||||
w.flushedWork = true
|
||||
w.wbuf2 = getempty()
|
||||
} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
|
||||
if throwOnGCWork {
|
||||
throw("throwOnGCWork")
|
||||
}
|
||||
w.checkPut()
|
||||
w.wbuf1 = handoff(wbuf)
|
||||
w.flushedWork = true // handoff did putfull
|
||||
} else {
|
||||
|
@ -79,7 +79,7 @@ const (
|
||||
func (b *wbBuf) reset() {
|
||||
start := uintptr(unsafe.Pointer(&b.buf[0]))
|
||||
b.next = start
|
||||
if writeBarrier.cgo {
|
||||
if writeBarrier.cgo || (debugCachedWork && throwOnGCWork) {
|
||||
// Effectively disable the buffer by forcing a flush
|
||||
// on every barrier.
|
||||
b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
|
||||
|
Loading…
Reference in New Issue
Block a user