1
0
mirror of https://github.com/golang/go synced 2024-11-23 23:20:08 -07:00

runtime: fix net poll races

The netpoll code was written long ago, when the
only multiprocessors that Go ran on were x86.
It assumed that an atomic store would trigger a
full memory barrier and then used that barrier
to order otherwise racy access to a handful of fields,
including pollDesc.closing.

On ARM64, this code has finally failed, because
the atomic store is on a value completely unrelated
to any of the racily-accessed fields, and the ARMv8
hardware, unlike x86, is clever enough not to do a
full memory barrier for a simple atomic store.
We are seeing a constant background rate of trybot
failures where the net/http tests deadlock - a netpollblock
has clearly happened after the pollDesc has begun to close.

The code that does the racy reads is netpollcheckerr,
which needs to be able to run without acquiring a lock.
This CL fixes the race, without introducing unnecessary
inefficiency or deadlock, by arranging for every updater
of the relevant fields to publish a summary as a single
atomic uint32, and then having netpollcheckerr use a
single atomic load to fetch the relevant bits and then
proceed as before.

Fixes #45211 (until proven otherwise!).

Change-Id: Ib6788c8da4d00b7bda84d55ca3fdffb5a64c1a0a
Reviewed-on: https://go-review.googlesource.com/c/go/+/378234
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
Trust: Bryan Mills <bcmills@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Russ Cox 2022-01-12 17:22:09 -05:00
parent b41185c5c3
commit 17b2fb1b65
6 changed files with 119 additions and 57 deletions

View File

@ -71,31 +71,99 @@ const pollBlockSize = 4 * 1024
//go:notinheap //go:notinheap
type pollDesc struct { type pollDesc struct {
link *pollDesc // in pollcache, protected by pollcache.lock link *pollDesc // in pollcache, protected by pollcache.lock
fd uintptr // constant for pollDesc usage lifetime
// atomicInfo holds bits from closing, rd, and wd,
// which are only ever written while holding the lock,
// summarized for use by netpollcheckerr,
// which cannot acquire the lock.
// After writing these fields under lock in a way that
// might change the summary, code must call publishInfo
// before releasing the lock.
// Code that changes fields and then calls netpollunblock
// (while still holding the lock) must call publishInfo
// before calling netpollunblock, because publishInfo is what
// stops netpollblock from blocking anew
// (by changing the result of netpollcheckerr).
// atomicInfo also holds the eventErr bit,
// recording whether a poll event on the fd got an error;
// atomicInfo is the only source of truth for that bit.
atomicInfo atomic.Uint32 // atomic pollInfo
// rg, wg are accessed atomically and hold g pointers.
// (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil
// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
// in a lock-free way by all operations.
// TODO(golang.org/issue/49008): audit these lock-free fields for continued correctness.
// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
// that will blow up when GC starts moving objects.
lock mutex // protects the following fields lock mutex // protects the following fields
fd uintptr
closing bool closing bool
everr bool // marks event scanning error happened
user uint32 // user settable cookie user uint32 // user settable cookie
rseq uintptr // protects from stale read timers rseq uintptr // protects from stale read timers
rg uintptr // pdReady, pdWait, G waiting for read or nil. Accessed atomically.
rt timer // read deadline timer (set if rt.f != nil) rt timer // read deadline timer (set if rt.f != nil)
rd int64 // read deadline rd int64 // read deadline (a nanotime in the future, -1 when expired)
wseq uintptr // protects from stale write timers wseq uintptr // protects from stale write timers
wg uintptr // pdReady, pdWait, G waiting for write or nil. Accessed atomically.
wt timer // write deadline timer wt timer // write deadline timer
wd int64 // write deadline wd int64 // write deadline (a nanotime in the future, -1 when expired)
self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg. self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
} }
// pollInfo is the bits needed by netpollcheckerr, stored atomically,
// mostly duplicating state that is manipulated under lock in pollDesc.
// The one exception is the pollEventErr bit, which is maintained only
// in the pollInfo.
type pollInfo uint32
const (
pollClosing = 1 << iota
pollEventErr
pollExpiredReadDeadline
pollExpiredWriteDeadline
)
func (i pollInfo) closing() bool { return i&pollClosing != 0 }
func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 }
func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 }
func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }
// info returns the pollInfo corresponding to pd.
func (pd *pollDesc) info() pollInfo {
return pollInfo(pd.atomicInfo.Load())
}
// publishInfo updates pd.atomicInfo (returned by pd.info)
// using the other values in pd.
// It must be called while holding pd.lock,
// and it must be called after changing anything
// that might affect the info bits.
// In practice this means after changing closing
// or changing rd or wd from < 0 to >= 0.
func (pd *pollDesc) publishInfo() {
var info uint32
if pd.closing {
info |= pollClosing
}
if pd.rd < 0 {
info |= pollExpiredReadDeadline
}
if pd.wd < 0 {
info |= pollExpiredWriteDeadline
}
// Set all of x except the pollEventErr bit.
x := pd.atomicInfo.Load()
for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) {
x = pd.atomicInfo.Load()
}
}
// setEventErr sets the result of pd.info().eventErr() to b.
func (pd *pollDesc) setEventErr(b bool) {
x := pd.atomicInfo.Load()
for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
x = pd.atomicInfo.Load()
}
}
type pollCache struct { type pollCache struct {
lock mutex lock mutex
first *pollDesc first *pollDesc
@ -147,24 +215,25 @@ func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) { func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
pd := pollcache.alloc() pd := pollcache.alloc()
lock(&pd.lock) lock(&pd.lock)
wg := atomic.Loaduintptr(&pd.wg) wg := pd.wg.Load()
if wg != 0 && wg != pdReady { if wg != 0 && wg != pdReady {
throw("runtime: blocked write on free polldesc") throw("runtime: blocked write on free polldesc")
} }
rg := atomic.Loaduintptr(&pd.rg) rg := pd.rg.Load()
if rg != 0 && rg != pdReady { if rg != 0 && rg != pdReady {
throw("runtime: blocked read on free polldesc") throw("runtime: blocked read on free polldesc")
} }
pd.fd = fd pd.fd = fd
pd.closing = false pd.closing = false
pd.everr = false pd.setEventErr(false)
pd.rseq++ pd.rseq++
atomic.Storeuintptr(&pd.rg, 0) pd.rg.Store(0)
pd.rd = 0 pd.rd = 0
pd.wseq++ pd.wseq++
atomic.Storeuintptr(&pd.wg, 0) pd.wg.Store(0)
pd.wd = 0 pd.wd = 0
pd.self = pd pd.self = pd
pd.publishInfo()
unlock(&pd.lock) unlock(&pd.lock)
errno := netpollopen(fd, pd) errno := netpollopen(fd, pd)
@ -180,11 +249,11 @@ func poll_runtime_pollClose(pd *pollDesc) {
if !pd.closing { if !pd.closing {
throw("runtime: close polldesc w/o unblock") throw("runtime: close polldesc w/o unblock")
} }
wg := atomic.Loaduintptr(&pd.wg) wg := pd.wg.Load()
if wg != 0 && wg != pdReady { if wg != 0 && wg != pdReady {
throw("runtime: blocked write on closing polldesc") throw("runtime: blocked write on closing polldesc")
} }
rg := atomic.Loaduintptr(&pd.rg) rg := pd.rg.Load()
if rg != 0 && rg != pdReady { if rg != 0 && rg != pdReady {
throw("runtime: blocked read on closing polldesc") throw("runtime: blocked read on closing polldesc")
} }
@ -209,9 +278,9 @@ func poll_runtime_pollReset(pd *pollDesc, mode int) int {
return errcode return errcode
} }
if mode == 'r' { if mode == 'r' {
atomic.Storeuintptr(&pd.rg, 0) pd.rg.Store(0)
} else if mode == 'w' { } else if mode == 'w' {
atomic.Storeuintptr(&pd.wg, 0) pd.wg.Store(0)
} }
return pollNoError return pollNoError
} }
@ -273,6 +342,7 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
if mode == 'w' || mode == 'r'+'w' { if mode == 'w' || mode == 'r'+'w' {
pd.wd = d pd.wd = d
} }
pd.publishInfo()
combo := pd.rd > 0 && pd.rd == pd.wd combo := pd.rd > 0 && pd.rd == pd.wd
rtf := netpollReadDeadline rtf := netpollReadDeadline
if combo { if combo {
@ -314,15 +384,13 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
} }
} }
// If we set the new deadline in the past, unblock currently pending IO if any. // If we set the new deadline in the past, unblock currently pending IO if any.
// Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
var rg, wg *g var rg, wg *g
if pd.rd < 0 || pd.wd < 0 { if pd.rd < 0 {
atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock rg = netpollunblock(pd, 'r', false)
if pd.rd < 0 { }
rg = netpollunblock(pd, 'r', false) if pd.wd < 0 {
} wg = netpollunblock(pd, 'w', false)
if pd.wd < 0 {
wg = netpollunblock(pd, 'w', false)
}
} }
unlock(&pd.lock) unlock(&pd.lock)
if rg != nil { if rg != nil {
@ -343,7 +411,7 @@ func poll_runtime_pollUnblock(pd *pollDesc) {
pd.rseq++ pd.rseq++
pd.wseq++ pd.wseq++
var rg, wg *g var rg, wg *g
atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock pd.publishInfo()
rg = netpollunblock(pd, 'r', false) rg = netpollunblock(pd, 'r', false)
wg = netpollunblock(pd, 'w', false) wg = netpollunblock(pd, 'w', false)
if pd.rt.f != nil { if pd.rt.f != nil {
@ -388,16 +456,17 @@ func netpollready(toRun *gList, pd *pollDesc, mode int32) {
} }
func netpollcheckerr(pd *pollDesc, mode int32) int { func netpollcheckerr(pd *pollDesc, mode int32) int {
if pd.closing { info := pd.info()
if info.closing() {
return pollErrClosing return pollErrClosing
} }
if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) { if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) {
return pollErrTimeout return pollErrTimeout
} }
// Report an event scanning error only on a read event. // Report an event scanning error only on a read event.
// An error on a write event will be captured in a subsequent // An error on a write event will be captured in a subsequent
// write call that is able to report a more specific error. // write call that is able to report a more specific error.
if mode == 'r' && pd.everr { if mode == 'r' && info.eventErr() {
return pollErrNotPollable return pollErrNotPollable
} }
return pollNoError return pollNoError
@ -432,28 +501,28 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
// set the gpp semaphore to pdWait // set the gpp semaphore to pdWait
for { for {
// Consume notification if already ready. // Consume notification if already ready.
if atomic.Casuintptr(gpp, pdReady, 0) { if gpp.CompareAndSwap(pdReady, 0) {
return true return true
} }
if atomic.Casuintptr(gpp, 0, pdWait) { if gpp.CompareAndSwap(0, pdWait) {
break break
} }
// Double check that this isn't corrupt; otherwise we'd loop // Double check that this isn't corrupt; otherwise we'd loop
// forever. // forever.
if v := atomic.Loaduintptr(gpp); v != pdReady && v != 0 { if v := gpp.Load(); v != pdReady && v != 0 {
throw("runtime: double wait") throw("runtime: double wait")
} }
} }
// need to recheck error states after setting gpp to pdWait // need to recheck error states after setting gpp to pdWait
// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg // do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
if waitio || netpollcheckerr(pd, mode) == pollNoError { if waitio || netpollcheckerr(pd, mode) == pollNoError {
gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5) gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
} }
// be careful to not lose concurrent pdReady notification // be careful to not lose concurrent pdReady notification
old := atomic.Xchguintptr(gpp, 0) old := gpp.Swap(0)
if old > pdWait { if old > pdWait {
throw("runtime: corrupted polldesc") throw("runtime: corrupted polldesc")
} }
@ -467,7 +536,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
} }
for { for {
old := atomic.Loaduintptr(gpp) old := gpp.Load()
if old == pdReady { if old == pdReady {
return nil return nil
} }
@ -480,7 +549,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
if ioready { if ioready {
new = pdReady new = pdReady
} }
if atomic.Casuintptr(gpp, old, new) { if gpp.CompareAndSwap(old, new) {
if old == pdWait { if old == pdWait {
old = 0 old = 0
} }
@ -508,7 +577,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
throw("runtime: inconsistent read deadline") throw("runtime: inconsistent read deadline")
} }
pd.rd = -1 pd.rd = -1
atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock pd.publishInfo()
rg = netpollunblock(pd, 'r', false) rg = netpollunblock(pd, 'r', false)
} }
var wg *g var wg *g
@ -517,7 +586,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
throw("runtime: inconsistent write deadline") throw("runtime: inconsistent write deadline")
} }
pd.wd = -1 pd.wd = -1
atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock pd.publishInfo()
wg = netpollunblock(pd, 'w', false) wg = netpollunblock(pd, 'w', false)
} }
unlock(&pd.lock) unlock(&pd.lock)

View File

@ -212,10 +212,7 @@ retry:
pfd.events &= ^_POLLOUT pfd.events &= ^_POLLOUT
} }
if mode != 0 { if mode != 0 {
pds[i].everr = false pds[i].setEventErr(pfd.revents == _POLLERR)
if pfd.revents == _POLLERR {
pds[i].everr = true
}
netpollready(&toRun, pds[i], mode) netpollready(&toRun, pds[i], mode)
n-- n--
} }

View File

@ -168,10 +168,7 @@ retry:
} }
if mode != 0 { if mode != 0 {
pd := *(**pollDesc)(unsafe.Pointer(&ev.data)) pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
pd.everr = false pd.setEventErr(ev.events == _EPOLLERR)
if ev.events == _EPOLLERR {
pd.everr = true
}
netpollready(&toRun, pd, mode) netpollready(&toRun, pd, mode)
} }
} }

View File

@ -179,10 +179,7 @@ retry:
} }
if mode != 0 { if mode != 0 {
pd := (*pollDesc)(unsafe.Pointer(ev.udata)) pd := (*pollDesc)(unsafe.Pointer(ev.udata))
pd.everr = false pd.setEventErr(ev.flags == _EV_ERROR)
if ev.flags == _EV_ERROR {
pd.everr = true
}
netpollready(&toRun, pd, mode) netpollready(&toRun, pd, mode)
} }
} }

View File

@ -158,7 +158,7 @@ func netpollclose(fd uintptr) int32 {
// this call, port_getn will return one and only one event for that // this call, port_getn will return one and only one event for that
// particular descriptor, so this function needs to be called again. // particular descriptor, so this function needs to be called again.
func netpollupdate(pd *pollDesc, set, clear uint32) { func netpollupdate(pd *pollDesc, set, clear uint32) {
if pd.closing { if pd.info().closing() {
return return
} }

View File

@ -255,6 +255,8 @@ func efaceOf(ep *any) *eface {
// so I can't see them ever moving. If we did want to start moving data // so I can't see them ever moving. If we did want to start moving data
// in the GC, we'd need to allocate the goroutine structs from an // in the GC, we'd need to allocate the goroutine structs from an
// alternate arena. Using guintptr doesn't make that problem any worse. // alternate arena. Using guintptr doesn't make that problem any worse.
// Note that pollDesc.rg, pollDesc.wg also store g in uintptr form,
// so they would need to be updated too if g's start moving.
type guintptr uintptr type guintptr uintptr
//go:nosplit //go:nosplit