1
0
mirror of https://github.com/golang/go synced 2024-11-26 20:11:26 -07:00

runtime: add and use pollDesc fd sequence field

It is possible for a netpoll file to be closed and for the pollDesc
to be reused while a netpoll is running. This normally only causes
spurious wakeups, but if there is an error on the old file then the
new file can be incorrectly marked as having an error.

Fix this problem on most systems by introducing an fd sequence field
and using that as a tag in a taggedPointer. The taggedPointer is
stored in epoll or kqueue or whatever is being used. If the taggedPointer
returned by the kernel has a tag that does not match the fd
sequence field, the notification is for a closed file, and we
can ignore it. We check the tag stored in the pollDesc, and we also
check the tag stored in the pollDesc.atomicInfo.

This approach does not work on 32-bit systems where the kernel
only provides a 32-bit field to hold a user value. On those systems
we continue to use the older method without the sequence protection.
This is not ideal, but it is not an issue on Linux because the kernel
provides a 64-bit field, and it is not an issue on Windows because
there are no poller errors on Windows. It is potentially an issue
on *BSD systems, but on those systems we already call fstat in newFile
in os/file_unix.go to avoid adding non-pollable files to kqueue.
So we currently don't know of any cases that will fail.

Fixes #59545

Change-Id: I9a61e20dc39b4266a7a2978fc16446567fe683ac
Reviewed-on: https://go-review.googlesource.com/c/go/+/484837
Auto-Submit: Ian Lance Taylor <iant@golang.org>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Orlando Labao <orlando.labao43@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Bypass: Ian Lance Taylor <iant@google.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Bypass: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Ian Lance Taylor 2023-04-14 15:34:10 -07:00 committed by Gopher Robot
parent d959dd97b1
commit d5514013b6
7 changed files with 189 additions and 16 deletions

View File

@ -7,8 +7,13 @@
package os_test package os_test
import ( import (
"errors"
"internal/testenv"
"io/fs"
"os" "os"
"path/filepath" "path/filepath"
"strconv"
"sync"
"syscall" "syscall"
"testing" "testing"
) )
@ -59,3 +64,94 @@ func TestFifoEOF(t *testing.T) {
testPipeEOF(t, r, w) testPipeEOF(t, r, w)
} }
// Issue #59545.
func TestNonPollable(t *testing.T) {
if testing.Short() {
t.Skip("skipping test with tight loops in short mode")
}
// We need to open a non-pollable file.
// This is almost certainly Linux-specific,
// but if other systems have non-pollable files,
// we can add them here.
const nonPollable = "/dev/net/tun"
f, err := os.OpenFile(nonPollable, os.O_RDWR, 0)
if err != nil {
if errors.Is(err, fs.ErrExist) || errors.Is(err, fs.ErrPermission) || testenv.SyscallIsNotSupported(err) {
t.Skipf("can't open %q: %v", nonPollable, err)
}
t.Fatal(err)
}
f.Close()
// On a Linux laptop, before the problem was fixed,
// this test failed about 50% of the time with this
// number of iterations.
// It takes about 1/2 second when it passes.
const attempts = 20000
start := make(chan bool)
var wg sync.WaitGroup
wg.Add(1)
defer wg.Wait()
go func() {
defer wg.Done()
close(start)
for i := 0; i < attempts; i++ {
f, err := os.OpenFile(nonPollable, os.O_RDWR, 0)
if err != nil {
t.Error(err)
return
}
if err := f.Close(); err != nil {
t.Error(err)
return
}
}
}()
dir := t.TempDir()
<-start
for i := 0; i < attempts; i++ {
name := filepath.Join(dir, strconv.Itoa(i))
if err := syscall.Mkfifo(name, 0o600); err != nil {
t.Fatal(err)
}
// The problem only occurs if we use O_NONBLOCK here.
rd, err := os.OpenFile(name, os.O_RDONLY|syscall.O_NONBLOCK, 0o600)
if err != nil {
t.Fatal(err)
}
wr, err := os.OpenFile(name, os.O_WRONLY|syscall.O_NONBLOCK, 0o600)
if err != nil {
t.Fatal(err)
}
const msg = "message"
if _, err := wr.Write([]byte(msg)); err != nil {
if errors.Is(err, syscall.EAGAIN) || errors.Is(err, syscall.ENOBUFS) {
t.Logf("ignoring write error %v", err)
rd.Close()
wr.Close()
continue
}
t.Fatalf("write to fifo %d failed: %v", i, err)
}
if _, err := rd.Read(make([]byte, len(msg))); err != nil {
if errors.Is(err, syscall.EAGAIN) || errors.Is(err, syscall.ENOBUFS) {
t.Logf("ignoring read error %v", err)
rd.Close()
wr.Close()
continue
}
t.Fatalf("read from fifo %d failed; %v", i, err)
}
if err := rd.Close(); err != nil {
t.Fatal(err)
}
if err := wr.Close(); err != nil {
t.Fatal(err)
}
}
}

View File

@ -74,6 +74,7 @@ type pollDesc struct {
_ sys.NotInHeap _ sys.NotInHeap
link *pollDesc // in pollcache, protected by pollcache.lock link *pollDesc // in pollcache, protected by pollcache.lock
fd uintptr // constant for pollDesc usage lifetime fd uintptr // constant for pollDesc usage lifetime
fdseq atomic.Uintptr // protects against stale pollDesc
// atomicInfo holds bits from closing, rd, and wd, // atomicInfo holds bits from closing, rd, and wd,
// which are only ever written while holding the lock, // which are only ever written while holding the lock,
@ -120,6 +121,12 @@ const (
pollEventErr pollEventErr
pollExpiredReadDeadline pollExpiredReadDeadline
pollExpiredWriteDeadline pollExpiredWriteDeadline
pollFDSeq // 20 bit field, low 20 bits of fdseq field
)
const (
pollFDSeqBits = 20 // number of bits in pollFDSeq
pollFDSeqMask = 1<<pollFDSeqBits - 1 // mask for pollFDSeq
) )
func (i pollInfo) closing() bool { return i&pollClosing != 0 } func (i pollInfo) closing() bool { return i&pollClosing != 0 }
@ -150,6 +157,7 @@ func (pd *pollDesc) publishInfo() {
if pd.wd < 0 { if pd.wd < 0 {
info |= pollExpiredWriteDeadline info |= pollExpiredWriteDeadline
} }
info |= uint32(pd.fdseq.Load()&pollFDSeqMask) << pollFDSeq
// Set all of x except the pollEventErr bit. // Set all of x except the pollEventErr bit.
x := pd.atomicInfo.Load() x := pd.atomicInfo.Load()
@ -159,10 +167,21 @@ func (pd *pollDesc) publishInfo() {
} }
// setEventErr sets the result of pd.info().eventErr() to b. // setEventErr sets the result of pd.info().eventErr() to b.
func (pd *pollDesc) setEventErr(b bool) { // We only change the error bit if seq == 0 or if seq matches pollFDSeq
// (issue #59545).
func (pd *pollDesc) setEventErr(b bool, seq uintptr) {
mSeq := uint32(seq & pollFDSeqMask)
x := pd.atomicInfo.Load() x := pd.atomicInfo.Load()
xSeq := (x >> pollFDSeq) & pollFDSeqMask
if seq != 0 && xSeq != mSeq {
return
}
for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) { for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
x = pd.atomicInfo.Load() x = pd.atomicInfo.Load()
xSeq := (x >> pollFDSeq) & pollFDSeqMask
if seq != 0 && xSeq != mSeq {
return
}
} }
} }
@ -226,8 +245,12 @@ func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
throw("runtime: blocked read on free polldesc") throw("runtime: blocked read on free polldesc")
} }
pd.fd = fd pd.fd = fd
if pd.fdseq.Load() == 0 {
// The value 0 is special in setEventErr, so don't use it.
pd.fdseq.Store(1)
}
pd.closing = false pd.closing = false
pd.setEventErr(false) pd.setEventErr(false, 0)
pd.rseq++ pd.rseq++
pd.rg.Store(pdNil) pd.rg.Store(pdNil)
pd.rd = 0 pd.rd = 0
@ -264,6 +287,12 @@ func poll_runtime_pollClose(pd *pollDesc) {
} }
func (c *pollCache) free(pd *pollDesc) { func (c *pollCache) free(pd *pollDesc) {
// Increment the fdseq field, so that any currently
// running netpoll calls will not mark pd as ready.
fdseq := pd.fdseq.Load()
fdseq = (fdseq + 1) & (1<<taggedPointerBits - 1)
pd.fdseq.Store(fdseq)
lock(&c.lock) lock(&c.lock)
pd.link = c.first pd.link = c.first
c.first = pd c.first = pd

View File

@ -88,6 +88,9 @@ func netpollopen(fd uintptr, pd *pollDesc) int32 {
lock(&mtxset) lock(&mtxset)
unlock(&mtxpoll) unlock(&mtxpoll)
// We don't worry about pd.fdseq here,
// as mtxset protects us from stale pollDescs.
pd.user = uint32(len(pfds)) pd.user = uint32(len(pfds))
pfds = append(pfds, pollfd{fd: int32(fd)}) pfds = append(pfds, pollfd{fd: int32(fd)})
pds = append(pds, pd) pds = append(pds, pd)
@ -216,7 +219,7 @@ retry:
pfd.events &= ^_POLLOUT pfd.events &= ^_POLLOUT
} }
if mode != 0 { if mode != 0 {
pds[i].setEventErr(pfd.revents == _POLLERR) pds[i].setEventErr(pfd.revents == _POLLERR, 0)
netpollready(&toRun, pds[i], mode) netpollready(&toRun, pds[i], mode)
n-- n--
} }

View File

@ -52,7 +52,8 @@ func netpollIsPollDescriptor(fd uintptr) bool {
func netpollopen(fd uintptr, pd *pollDesc) uintptr { func netpollopen(fd uintptr, pd *pollDesc) uintptr {
var ev syscall.EpollEvent var ev syscall.EpollEvent
ev.Events = syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLET ev.Events = syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLET
*(**pollDesc)(unsafe.Pointer(&ev.Data)) = pd tp := taggedPointerPack(unsafe.Pointer(pd), pd.fdseq.Load())
*(*taggedPointer)(unsafe.Pointer(&ev.Data)) = tp
return syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, int32(fd), &ev) return syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, int32(fd), &ev)
} }
@ -158,10 +159,14 @@ retry:
mode += 'w' mode += 'w'
} }
if mode != 0 { if mode != 0 {
pd := *(**pollDesc)(unsafe.Pointer(&ev.Data)) tp := *(*taggedPointer)(unsafe.Pointer(&ev.Data))
pd.setEventErr(ev.Events == syscall.EPOLLERR) pd := (*pollDesc)(tp.pointer())
tag := tp.tag()
if pd.fdseq.Load() == tag {
pd.setEventErr(ev.Events == syscall.EPOLLERR, tag)
netpollready(&toRun, pd, mode) netpollready(&toRun, pd, mode)
} }
} }
}
return toRun return toRun
} }

View File

@ -9,6 +9,7 @@ package runtime
// Integrated network poller (kqueue-based implementation). // Integrated network poller (kqueue-based implementation).
import ( import (
"internal/goarch"
"runtime/internal/atomic" "runtime/internal/atomic"
"unsafe" "unsafe"
) )
@ -61,7 +62,17 @@ func netpollopen(fd uintptr, pd *pollDesc) int32 {
ev[0].flags = _EV_ADD | _EV_CLEAR ev[0].flags = _EV_ADD | _EV_CLEAR
ev[0].fflags = 0 ev[0].fflags = 0
ev[0].data = 0 ev[0].data = 0
if goarch.PtrSize == 4 {
// We only have a pointer-sized field to store into,
// so on a 32-bit system we get no sequence protection.
// TODO(iant): If we notice any problems we could at leaset
// steal the low-order 2 bits for a tiny sequence number.
ev[0].udata = (*byte)(unsafe.Pointer(pd)) ev[0].udata = (*byte)(unsafe.Pointer(pd))
} else {
tp := taggedPointerPack(unsafe.Pointer(pd), pd.fdseq.Load())
ev[0].udata = (*byte)(unsafe.Pointer(uintptr(tp)))
}
ev[1] = ev[0] ev[1] = ev[0]
ev[1].filter = _EVFILT_WRITE ev[1].filter = _EVFILT_WRITE
n := kevent(kq, &ev[0], 2, nil, 0, nil) n := kevent(kq, &ev[0], 2, nil, 0, nil)
@ -181,8 +192,22 @@ retry:
mode += 'w' mode += 'w'
} }
if mode != 0 { if mode != 0 {
pd := (*pollDesc)(unsafe.Pointer(ev.udata)) var pd *pollDesc
pd.setEventErr(ev.flags == _EV_ERROR) var tag uintptr
if goarch.PtrSize == 4 {
// No sequence protection on 32-bit systems.
// See netpollopen for details.
pd = (*pollDesc)(unsafe.Pointer(ev.udata))
tag = 0
} else {
tp := taggedPointer(uintptr(unsafe.Pointer(ev.udata)))
pd = (*pollDesc)(tp.pointer())
tag = tp.tag()
if pd.fdseq.Load() != tag {
continue
}
}
pd.setEventErr(ev.flags == _EV_ERROR, tag)
netpollready(&toRun, pd, mode) netpollready(&toRun, pd, mode)
} }
} }

View File

@ -5,6 +5,7 @@
package runtime package runtime
import ( import (
"internal/goarch"
"runtime/internal/atomic" "runtime/internal/atomic"
"unsafe" "unsafe"
) )
@ -145,7 +146,14 @@ func netpollopen(fd uintptr, pd *pollDesc) int32 {
// with the interested event set) will unblock port_getn right away // with the interested event set) will unblock port_getn right away
// because of the I/O readiness notification. // because of the I/O readiness notification.
pd.user = 0 pd.user = 0
r := port_associate(portfd, _PORT_SOURCE_FD, fd, 0, uintptr(unsafe.Pointer(pd))) tp := taggedPointerPack(unsafe.Pointer(pd), pd.fdseq.Load())
// Note that this won't work on a 32-bit system,
// as taggedPointer is always 64-bits but uintptr will be 32 bits.
// Fortunately we only support Solaris on amd64.
if goarch.PtrSize != 8 {
throw("runtime: netpollopen: unsupported pointer size")
}
r := port_associate(portfd, _PORT_SOURCE_FD, fd, 0, uintptr(tp))
unlock(&pd.lock) unlock(&pd.lock)
return r return r
} }
@ -168,7 +176,8 @@ func netpollupdate(pd *pollDesc, set, clear uint32) {
return return
} }
if events != 0 && port_associate(portfd, _PORT_SOURCE_FD, pd.fd, events, uintptr(unsafe.Pointer(pd))) != 0 { tp := taggedPointerPack(unsafe.Pointer(pd), pd.fdseq.Load())
if events != 0 && port_associate(portfd, _PORT_SOURCE_FD, pd.fd, events, uintptr(tp)) != 0 {
print("runtime: port_associate failed (errno=", errno(), ")\n") print("runtime: port_associate failed (errno=", errno(), ")\n")
throw("runtime: netpollupdate failed") throw("runtime: netpollupdate failed")
} }
@ -285,7 +294,12 @@ retry:
if ev.portev_events == 0 { if ev.portev_events == 0 {
continue continue
} }
pd := (*pollDesc)(unsafe.Pointer(ev.portev_user))
tp := taggedPointer(uintptr(unsafe.Pointer(ev.portev_user)))
pd := (*pollDesc)(tp.pointer())
if pd.fdseq.Load() != tp.tag() {
continue
}
var mode, clear int32 var mode, clear int32
if (ev.portev_events & (_POLLIN | _POLLHUP | _POLLERR)) != 0 { if (ev.portev_events & (_POLLIN | _POLLHUP | _POLLERR)) != 0 {

View File

@ -51,6 +51,7 @@ func netpollIsPollDescriptor(fd uintptr) bool {
} }
func netpollopen(fd uintptr, pd *pollDesc) int32 { func netpollopen(fd uintptr, pd *pollDesc) int32 {
// TODO(iant): Consider using taggedPointer on 64-bit systems.
if stdcall4(_CreateIoCompletionPort, fd, iocphandle, uintptr(unsafe.Pointer(pd)), 0) == 0 { if stdcall4(_CreateIoCompletionPort, fd, iocphandle, uintptr(unsafe.Pointer(pd)), 0) == 0 {
return int32(getlasterror()) return int32(getlasterror())
} }