1
0
mirror of https://github.com/golang/go synced 2024-11-15 00:40:31 -07:00

runtime: ensure startm new M is consistently visible to checkdead

If no M is available, startm first grabs an idle P, then drops
sched.lock and calls newm to start a new M to run than P.

Unfortunately, that leaves a window in which a G (e.g., returning from a
syscall) may find no idle P, add to the global runq, and then in stopm
discover that there are no running M's, a condition that should be
impossible with runnable G's.

To avoid this condition, we pre-allocate the new M ID in startm before
dropping sched.lock. This ensures that checkdead will see the M as
running, and since that new M must eventually run the scheduler, it will
handle any pending work as necessary.

Outside of startm, most other calls to newm/allocm don't have a P at
all. The only exception is startTheWorldWithSema, which always has an M
if there is 1 P (i.e., the currently running M), and if there is >1 P
the findrunnable spinning dance ensures the problem never occurs.

This has been tested with strategically placed sleeps in the runtime to
help induce the correct race ordering, but the timing on this is too
narrow for a test that can be checked in.

Fixes #40368

Change-Id: If5e0293a430cc85154b7ed55bc6dadf9b340abe2
Reviewed-on: https://go-review.googlesource.com/c/go/+/245018
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Michael Pratt 2020-07-27 15:04:17 -04:00
parent c4fed25553
commit 85afa2eb19

View File

@ -132,7 +132,7 @@ func main() {
if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
systemstack(func() {
newm(sysmon, nil)
newm(sysmon, nil, -1)
})
}
@ -562,7 +562,7 @@ func schedinit() {
stackinit()
mallocinit()
fastrandinit() // must run before mcommoninit
mcommoninit(_g_.m)
mcommoninit(_g_.m, -1)
cpuinit() // must run before alginit
alginit() // maps must not be used before this call
modulesinit() // provides activeModules
@ -623,7 +623,22 @@ func checkmcount() {
}
}
func mcommoninit(mp *m) {
// mReserveID returns the next ID to use for a new m. This new m is immediately
// considered 'running' by checkdead.
//
// sched.lock must be held.
func mReserveID() int64 {
if sched.mnext+1 < sched.mnext {
throw("runtime: thread ID overflow")
}
id := sched.mnext
sched.mnext++
checkmcount()
return id
}
// Pre-allocated ID may be passed as 'id', or omitted by passing -1.
func mcommoninit(mp *m, id int64) {
_g_ := getg()
// g0 stack won't make sense for user (and is not necessary unwindable).
@ -632,12 +647,12 @@ func mcommoninit(mp *m) {
}
lock(&sched.lock)
if sched.mnext+1 < sched.mnext {
throw("runtime: thread ID overflow")
if id >= 0 {
mp.id = id
} else {
mp.id = mReserveID()
}
mp.id = sched.mnext
sched.mnext++
checkmcount()
mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
@ -1068,7 +1083,7 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
notewakeup(&mp.park)
} else {
// Start M to run P. Do not start another M below.
newm(nil, p)
newm(nil, p, -1)
}
}
@ -1413,12 +1428,13 @@ type cgothreadstart struct {
// Allocate a new m unassociated with any thread.
// Can use p for allocation context if needed.
// fn is recorded as the new m's m.mstartfn.
// id is optional pre-allocated m ID. Omit by passing -1.
//
// This function is allowed to have write barriers even if the caller
// isn't because it borrows _p_.
//
//go:yeswritebarrierrec
func allocm(_p_ *p, fn func()) *m {
func allocm(_p_ *p, fn func(), id int64) *m {
_g_ := getg()
acquirem() // disable GC because it can be called from sysmon
if _g_.m.p == 0 {
@ -1447,7 +1463,7 @@ func allocm(_p_ *p, fn func()) *m {
mp := new(m)
mp.mstartfn = fn
mcommoninit(mp)
mcommoninit(mp, id)
// In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack.
// Windows and Plan 9 will layout sched stack on OS stack.
@ -1586,7 +1602,7 @@ func oneNewExtraM() {
// The sched.pc will never be returned to, but setting it to
// goexit makes clear to the traceback routines where
// the goroutine stack ends.
mp := allocm(nil, nil)
mp := allocm(nil, nil, -1)
gp := malg(4096)
gp.sched.pc = funcPC(goexit) + sys.PCQuantum
gp.sched.sp = gp.stack.hi
@ -1757,9 +1773,11 @@ var newmHandoff struct {
// Create a new m. It will start off with a call to fn, or else the scheduler.
// fn needs to be static and not a heap allocated closure.
// May run with m.p==nil, so write barriers are not allowed.
//
// id is optional pre-allocated m ID. Omit by passing -1.
//go:nowritebarrierrec
func newm(fn func(), _p_ *p) {
mp := allocm(_p_, fn)
func newm(fn func(), _p_ *p, id int64) {
mp := allocm(_p_, fn, id)
mp.nextp.set(_p_)
mp.sigmask = initSigmask
if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
@ -1828,7 +1846,7 @@ func startTemplateThread() {
releasem(mp)
return
}
newm(templateThread, nil)
newm(templateThread, nil, -1)
releasem(mp)
}
@ -1923,16 +1941,31 @@ func startm(_p_ *p, spinning bool) {
}
}
mp := mget()
unlock(&sched.lock)
if mp == nil {
// No M is available, we must drop sched.lock and call newm.
// However, we already own a P to assign to the M.
//
// Once sched.lock is released, another G (e.g., in a syscall),
// could find no idle P while checkdead finds a runnable G but
// no running M's because this new M hasn't started yet, thus
// throwing in an apparent deadlock.
//
// Avoid this situation by pre-allocating the ID for the new M,
// thus marking it as 'running' before we drop sched.lock. This
// new M will eventually run the scheduler to execute any
// queued G's.
id := mReserveID()
unlock(&sched.lock)
var fn func()
if spinning {
// The caller incremented nmspinning, so set m.spinning in the new M.
fn = mspinning
}
newm(fn, _p_)
newm(fn, _p_, id)
return
}
unlock(&sched.lock)
if mp.spinning {
throw("startm: m is spinning")
}