mirror of
https://github.com/golang/go
synced 2024-11-23 17:30:02 -07:00
runtime: don't take allglock in tracebackothers
tracebackothers is called from fatal throw/panic. A fatal throw may be taken with allglock held (notably in the allocator when allglock is held), which would cause a deadlock in tracebackothers when we try to take allglock again. Locking allglock here is also often a lock order violation w.r.t. the locks held when throw was called. Avoid the deadlock and ordering issues by skipping locking altogether. It is OK to miss concurrently created Gs (which are generally avoided by freezetheworld(), and which were possible previously anyways if created after the loop). Fatal throw/panic freezetheworld(), which should freeze other threads that may be racing to modify allgs. However, freezetheworld() does _not_ guarantee that it stops all other threads, so we can't simply drop the lock. Fixes #42669 Updates #43175 Change-Id: I657aec46ed35fd5d1b3f1ba25b500128ab26b088 Reviewed-on: https://go-review.googlesource.com/c/go/+/270861 Reviewed-by: Michael Knyszek <mknyszek@google.com> Trust: Michael Pratt <mpratt@google.com>
This commit is contained in:
parent
9eef49cfa6
commit
6b37b15d95
@ -132,7 +132,6 @@ fail:
|
||||
println("gp", gp, "goid", gp.goid,
|
||||
"status", readgstatus(gp),
|
||||
"gcscandone", gp.gcscandone)
|
||||
unlock(&allglock) // Avoid self-deadlock with traceback.
|
||||
throw("scan missed a g")
|
||||
}
|
||||
|
||||
|
@ -490,8 +490,29 @@ func lockedOSThread() bool {
|
||||
}
|
||||
|
||||
var (
|
||||
allgs []*g
|
||||
// allgs contains all Gs ever created (including dead Gs), and thus
|
||||
// never shrinks.
|
||||
//
|
||||
// Access via the slice is protected by allglock or stop-the-world.
|
||||
// Readers that cannot take the lock may (carefully!) use the atomic
|
||||
// variables below.
|
||||
allglock mutex
|
||||
allgs []*g
|
||||
|
||||
// allglen and allgptr are atomic variables that contain len(allg) and
|
||||
// &allg[0] respectively. Proper ordering depends on totally-ordered
|
||||
// loads and stores. Writes are protected by allglock.
|
||||
//
|
||||
// allgptr is updated before allglen. Readers should read allglen
|
||||
// before allgptr to ensure that allglen is always <= len(allgptr). New
|
||||
// Gs appended during the race can be missed. For a consistent view of
|
||||
// all Gs, allglock must be held.
|
||||
//
|
||||
// allgptr copies should always be stored as a concrete type or
|
||||
// unsafe.Pointer, not uintptr, to ensure that GC can still reach it
|
||||
// even if it points to a stale array.
|
||||
allglen uintptr
|
||||
allgptr **g
|
||||
)
|
||||
|
||||
func allgadd(gp *g) {
|
||||
@ -501,10 +522,25 @@ func allgadd(gp *g) {
|
||||
|
||||
lock(&allglock)
|
||||
allgs = append(allgs, gp)
|
||||
allglen = uintptr(len(allgs))
|
||||
if &allgs[0] != allgptr {
|
||||
atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0]))
|
||||
}
|
||||
atomic.Storeuintptr(&allglen, uintptr(len(allgs)))
|
||||
unlock(&allglock)
|
||||
}
|
||||
|
||||
// atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex.
|
||||
func atomicAllG() (**g, uintptr) {
|
||||
length := atomic.Loaduintptr(&allglen)
|
||||
ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr)))
|
||||
return ptr, length
|
||||
}
|
||||
|
||||
// atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG.
|
||||
func atomicAllGIndex(ptr **g, i uintptr) *g {
|
||||
return *(**g)(add(unsafe.Pointer(ptr), i*sys.PtrSize))
|
||||
}
|
||||
|
||||
const (
|
||||
// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
|
||||
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
|
||||
@ -4266,7 +4302,7 @@ func badunlockosthread() {
|
||||
}
|
||||
|
||||
func gcount() int32 {
|
||||
n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
|
||||
n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
|
||||
for _, _p_ := range allp {
|
||||
n -= _p_.gFree.n
|
||||
}
|
||||
@ -4970,7 +5006,6 @@ func checkdead() {
|
||||
case _Grunnable,
|
||||
_Grunning,
|
||||
_Gsyscall:
|
||||
unlock(&allglock)
|
||||
print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
|
||||
throw("checkdead: runnable g")
|
||||
}
|
||||
|
@ -1052,7 +1052,6 @@ func (w waitReason) String() string {
|
||||
}
|
||||
|
||||
var (
|
||||
allglen uintptr
|
||||
allm *m
|
||||
gomaxprocs int32
|
||||
ncpu int32
|
||||
|
@ -917,17 +917,25 @@ func tracebackothers(me *g) {
|
||||
level, _, _ := gotraceback()
|
||||
|
||||
// Show the current goroutine first, if we haven't already.
|
||||
g := getg()
|
||||
gp := g.m.curg
|
||||
if gp != nil && gp != me {
|
||||
curgp := getg().m.curg
|
||||
if curgp != nil && curgp != me {
|
||||
print("\n")
|
||||
goroutineheader(gp)
|
||||
traceback(^uintptr(0), ^uintptr(0), 0, gp)
|
||||
goroutineheader(curgp)
|
||||
traceback(^uintptr(0), ^uintptr(0), 0, curgp)
|
||||
}
|
||||
|
||||
lock(&allglock)
|
||||
for _, gp := range allgs {
|
||||
if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 {
|
||||
// We can't take allglock here because this may be during fatal
|
||||
// throw/panic, where locking allglock could be out-of-order or a
|
||||
// direct deadlock.
|
||||
//
|
||||
// Instead, use atomic access to allgs which requires no locking. We
|
||||
// don't lock against concurrent creation of new Gs, but even with
|
||||
// allglock we may miss Gs created after this loop.
|
||||
ptr, length := atomicAllG()
|
||||
for i := uintptr(0); i < length; i++ {
|
||||
gp := atomicAllGIndex(ptr, i)
|
||||
|
||||
if gp == me || gp == curgp || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 {
|
||||
continue
|
||||
}
|
||||
print("\n")
|
||||
@ -936,14 +944,13 @@ func tracebackothers(me *g) {
|
||||
// called from a signal handler initiated during a
|
||||
// systemstack call. The original G is still in the
|
||||
// running state, and we want to print its stack.
|
||||
if gp.m != g.m && readgstatus(gp)&^_Gscan == _Grunning {
|
||||
if gp.m != getg().m && readgstatus(gp)&^_Gscan == _Grunning {
|
||||
print("\tgoroutine running on other thread; stack unavailable\n")
|
||||
printcreatedby(gp)
|
||||
} else {
|
||||
traceback(^uintptr(0), ^uintptr(0), 0, gp)
|
||||
}
|
||||
}
|
||||
unlock(&allglock)
|
||||
}
|
||||
|
||||
// tracebackHexdump hexdumps part of stk around frame.sp and frame.fp
|
||||
|
Loading…
Reference in New Issue
Block a user