mirror of
https://github.com/golang/go
synced 2024-11-22 14:04:48 -07:00
runtime: use a high res timer to signal io completion ports on windows
GetQueuedCompletionStatusEx has a ~16ms timeout resolution. Use a WaitCompletionPacket associated with the I/O Completion Port (IOCP) and a high resolution timer so the IOCP is signaled on timer expiry, therefore improving the GetQueuedCompletionStatusEx timeout resolution. BenchmarkSleep from the time package shows an important improvement: goos: windows goarch: amd64 pkg: time cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Sleep-12 1258.5µ ± 5% 250.7µ ± 1% -80.08% (p=0.000 n=20) Fixes #44343. Change-Id: I79fc09e34dddfc49e0e23c3d1d0603926c22a11d Reviewed-on: https://go-review.googlesource.com/c/go/+/488675 Reviewed-by: Alex Brainman <alex.brainman@gmail.com> Run-TryBot: Quim Muntal <quimmuntal@gmail.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
5c92f43c51
commit
cf52e70997
@ -21,6 +21,7 @@ const _INVALID_HANDLE_VALUE = ^uintptr(0)
|
||||
const (
|
||||
netpollSourceReady = iota + 1
|
||||
netpollSourceBreak
|
||||
netpollSourceTimer
|
||||
)
|
||||
|
||||
const (
|
||||
@ -148,31 +149,45 @@ func netpollBreak() {
|
||||
// delay == 0: does not block, just polls
|
||||
// delay > 0: block for up to that many nanoseconds
|
||||
func netpoll(delay int64) (gList, int32) {
|
||||
var entries [64]overlappedEntry
|
||||
var wait, n, i uint32
|
||||
var errno int32
|
||||
var toRun gList
|
||||
|
||||
mp := getg().m
|
||||
|
||||
if iocphandle == _INVALID_HANDLE_VALUE {
|
||||
return gList{}, 0
|
||||
}
|
||||
|
||||
var entries [64]overlappedEntry
|
||||
var wait uint32
|
||||
var toRun gList
|
||||
mp := getg().m
|
||||
|
||||
if delay >= 1e15 {
|
||||
// An arbitrary cap on how long to wait for a timer.
|
||||
// 1e15 ns == ~11.5 days.
|
||||
delay = 1e15
|
||||
}
|
||||
|
||||
if delay > 0 && mp.waitIocpHandle != 0 {
|
||||
// GetQueuedCompletionStatusEx doesn't use a high resolution timer internally,
|
||||
// so we use a separate higher resolution timer associated with a wait completion
|
||||
// packet to wake up the poller. Note that the completion packet can be delivered
|
||||
// to another thread, and the Go scheduler expects netpoll to only block up to delay,
|
||||
// so we still need to use a timeout with GetQueuedCompletionStatusEx.
|
||||
// TODO: Improve the Go scheduler to support non-blocking timers.
|
||||
signaled := netpollQueueTimer(delay)
|
||||
if signaled {
|
||||
// There is a small window between the SetWaitableTimer and the NtAssociateWaitCompletionPacket
|
||||
// where the timer can expire. We can return immediately in this case.
|
||||
return gList{}, 0
|
||||
}
|
||||
}
|
||||
if delay < 0 {
|
||||
wait = _INFINITE
|
||||
} else if delay == 0 {
|
||||
wait = 0
|
||||
} else if delay < 1e6 {
|
||||
wait = 1
|
||||
} else if delay < 1e15 {
|
||||
wait = uint32(delay / 1e6)
|
||||
} else {
|
||||
// An arbitrary cap on how long to wait for a timer.
|
||||
// 1e9 ms == ~11.5 days.
|
||||
wait = 1e9
|
||||
wait = uint32(delay / 1e6)
|
||||
}
|
||||
|
||||
n = uint32(len(entries) / int(gomaxprocs))
|
||||
n := len(entries) / int(gomaxprocs)
|
||||
if n < 8 {
|
||||
n = 8
|
||||
}
|
||||
@ -181,7 +196,7 @@ func netpoll(delay int64) (gList, int32) {
|
||||
}
|
||||
if stdcall6(_GetQueuedCompletionStatusEx, iocphandle, uintptr(unsafe.Pointer(&entries[0])), uintptr(n), uintptr(unsafe.Pointer(&n)), uintptr(wait), 0) == 0 {
|
||||
mp.blocked = false
|
||||
errno = int32(getlasterror())
|
||||
errno := getlasterror()
|
||||
if errno == _WAIT_TIMEOUT {
|
||||
return gList{}, 0
|
||||
}
|
||||
@ -190,7 +205,7 @@ func netpoll(delay int64) (gList, int32) {
|
||||
}
|
||||
mp.blocked = false
|
||||
delta := int32(0)
|
||||
for i = 0; i < n; i++ {
|
||||
for i := 0; i < n; i++ {
|
||||
e := &entries[i]
|
||||
switch unpackNetpollSource(e.key) {
|
||||
case netpollSourceReady:
|
||||
@ -212,6 +227,8 @@ func netpoll(delay int64) (gList, int32) {
|
||||
// Forward the notification to the blocked poller.
|
||||
netpollBreak()
|
||||
}
|
||||
case netpollSourceTimer:
|
||||
// TODO: We could avoid calling NtCancelWaitCompletionPacket for expired wait completion packets.
|
||||
default:
|
||||
println("runtime: GetQueuedCompletionStatusEx returned net_op with invalid key=", e.key)
|
||||
throw("runtime: netpoll failed")
|
||||
@ -219,3 +236,49 @@ func netpoll(delay int64) (gList, int32) {
|
||||
}
|
||||
return toRun, delta
|
||||
}
|
||||
|
||||
// netpollQueueTimer queues a timer to wake up the poller after the given delay.
|
||||
// It returns true if the timer expired during this call.
|
||||
func netpollQueueTimer(delay int64) (signaled bool) {
|
||||
const (
|
||||
STATUS_SUCCESS = 0x00000000
|
||||
STATUS_PENDING = 0x00000103
|
||||
STATUS_CANCELLED = 0xC0000120
|
||||
)
|
||||
mp := getg().m
|
||||
// A wait completion packet can only be associated with one timer at a time,
|
||||
// so we need to cancel the previous one if it exists. This wouldn't be necessary
|
||||
// if the poller would only be woken up by the timer, in which case the association
|
||||
// would be automatically cancelled, but it can also be woken up by other events,
|
||||
// such as a netpollBreak, so we can get to this point with a timer that hasn't
|
||||
// expired yet. In this case, the completion packet can still be picked up by
|
||||
// another thread, so defer the cancellation until it is really necessary.
|
||||
errno := stdcall2(_NtCancelWaitCompletionPacket, mp.waitIocpHandle, 1)
|
||||
switch errno {
|
||||
case STATUS_CANCELLED:
|
||||
// STATUS_CANCELLED is returned when the associated timer has already expired,
|
||||
// in which automatically cancels the wait completion packet.
|
||||
fallthrough
|
||||
case STATUS_SUCCESS:
|
||||
dt := -delay / 100 // relative sleep (negative), 100ns units
|
||||
if stdcall6(_SetWaitableTimer, mp.waitIocpTimer, uintptr(unsafe.Pointer(&dt)), 0, 0, 0, 0) == 0 {
|
||||
println("runtime: SetWaitableTimer failed; errno=", getlasterror())
|
||||
throw("runtime: netpoll failed")
|
||||
}
|
||||
key := packNetpollKey(netpollSourceTimer, nil)
|
||||
if errno := stdcall8(_NtAssociateWaitCompletionPacket, mp.waitIocpHandle, iocphandle, mp.waitIocpTimer, key, 0, 0, 0, uintptr(unsafe.Pointer(&signaled))); errno != 0 {
|
||||
println("runtime: NtAssociateWaitCompletionPacket failed; errno=", errno)
|
||||
throw("runtime: netpoll failed")
|
||||
}
|
||||
case STATUS_PENDING:
|
||||
// STATUS_PENDING is returned if the wait operation can't be cancelled yet.
|
||||
// This can happen if this thread was woken up by another event, such as a netpollBreak,
|
||||
// and the timer expired just while calling NtCancelWaitCompletionPacket, in which case
|
||||
// this call fails to cancel the association to avoid a race condition.
|
||||
// This is a rare case, so we can just avoid using the high resolution timer this time.
|
||||
default:
|
||||
println("runtime: NtCancelWaitCompletionPacket failed; errno=", errno)
|
||||
throw("runtime: netpoll failed")
|
||||
}
|
||||
return signaled
|
||||
}
|
||||
|
@ -12,6 +12,8 @@ package runtime
|
||||
// timer precision to keep the timer error acceptable.
|
||||
const osRelaxMinNS = 0
|
||||
|
||||
var haveHighResSleep = true
|
||||
|
||||
// osRelax is called by the scheduler when transitioning to and from
|
||||
// all Ps being idle.
|
||||
func osRelax(relax bool) {}
|
||||
|
@ -131,8 +131,11 @@ var (
|
||||
// Load ntdll.dll manually during startup, otherwise Mingw
|
||||
// links wrong printf function to cgo executable (see issue
|
||||
// 12030 for details).
|
||||
_RtlGetCurrentPeb stdFunction
|
||||
_RtlGetNtVersionNumbers stdFunction
|
||||
_NtCreateWaitCompletionPacket stdFunction
|
||||
_NtAssociateWaitCompletionPacket stdFunction
|
||||
_NtCancelWaitCompletionPacket stdFunction
|
||||
_RtlGetCurrentPeb stdFunction
|
||||
_RtlGetNtVersionNumbers stdFunction
|
||||
|
||||
// These are from non-kernel32.dll, so we prefer to LoadLibraryEx them.
|
||||
_timeBeginPeriod,
|
||||
@ -161,7 +164,9 @@ type mOS struct {
|
||||
waitsema uintptr // semaphore for parking on locks
|
||||
resumesema uintptr // semaphore to indicate suspend/resume
|
||||
|
||||
highResTimer uintptr // high resolution timer handle used in usleep
|
||||
highResTimer uintptr // high resolution timer handle used in usleep
|
||||
waitIocpTimer uintptr // high resolution timer handle used in netpoll
|
||||
waitIocpHandle uintptr // wait completion handle used in netpoll
|
||||
|
||||
// preemptExtLock synchronizes preemptM with entry/exit from
|
||||
// external C code.
|
||||
@ -250,6 +255,18 @@ func loadOptionalSyscalls() {
|
||||
if n32 == 0 {
|
||||
throw("ntdll.dll not found")
|
||||
}
|
||||
_NtCreateWaitCompletionPacket = windowsFindfunc(n32, []byte("NtCreateWaitCompletionPacket\000"))
|
||||
if _NtCreateWaitCompletionPacket != nil {
|
||||
// These functions should exists if NtCreateWaitCompletionPacket exists.
|
||||
_NtAssociateWaitCompletionPacket = windowsFindfunc(n32, []byte("NtAssociateWaitCompletionPacket\000"))
|
||||
if _NtAssociateWaitCompletionPacket == nil {
|
||||
throw("NtCreateWaitCompletionPacket exists but NtAssociateWaitCompletionPacket does not")
|
||||
}
|
||||
_NtCancelWaitCompletionPacket = windowsFindfunc(n32, []byte("NtCancelWaitCompletionPacket\000"))
|
||||
if _NtCancelWaitCompletionPacket == nil {
|
||||
throw("NtCreateWaitCompletionPacket exists but NtCancelWaitCompletionPacket does not")
|
||||
}
|
||||
}
|
||||
_RtlGetCurrentPeb = windowsFindfunc(n32, []byte("RtlGetCurrentPeb\000"))
|
||||
_RtlGetNtVersionNumbers = windowsFindfunc(n32, []byte("RtlGetNtVersionNumbers\000"))
|
||||
}
|
||||
@ -374,6 +391,13 @@ func osRelax(relax bool) uint32 {
|
||||
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available.
|
||||
var haveHighResTimer = false
|
||||
|
||||
// haveHighResSleep indicates that NtCreateWaitCompletionPacket
|
||||
// exists and haveHighResTimer is true.
|
||||
// NtCreateWaitCompletionPacket has been available since Windows 10,
|
||||
// but has just been publicly documented, so some platforms, like Wine,
|
||||
// doesn't support it yet.
|
||||
var haveHighResSleep = false
|
||||
|
||||
// createHighResTimer calls CreateWaitableTimerEx with
|
||||
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag to create high
|
||||
// resolution timer. createHighResTimer returns new timer
|
||||
@ -397,6 +421,7 @@ func initHighResTimer() {
|
||||
h := createHighResTimer()
|
||||
if h != 0 {
|
||||
haveHighResTimer = true
|
||||
haveHighResSleep = _NtCreateWaitCompletionPacket != nil
|
||||
stdcall1(_CloseHandle, h)
|
||||
} else {
|
||||
// Only load winmm.dll if we need it.
|
||||
@ -797,7 +822,7 @@ func sigblock(exiting bool) {
|
||||
}
|
||||
|
||||
// Called to initialize a new m (including the bootstrap m).
|
||||
// Called on the new thread, cannot allocate memory.
|
||||
// Called on the new thread, cannot allocate Go memory.
|
||||
func minit() {
|
||||
var thandle uintptr
|
||||
if stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS) == 0 {
|
||||
@ -818,6 +843,19 @@ func minit() {
|
||||
throw("CreateWaitableTimerEx when creating timer failed")
|
||||
}
|
||||
}
|
||||
if mp.waitIocpHandle == 0 && haveHighResSleep {
|
||||
mp.waitIocpTimer = createHighResTimer()
|
||||
if mp.waitIocpTimer == 0 {
|
||||
print("runtime: CreateWaitableTimerEx failed; errno=", getlasterror(), "\n")
|
||||
throw("CreateWaitableTimerEx when creating timer failed")
|
||||
}
|
||||
const GENERIC_ALL = 0x10000000
|
||||
errno := stdcall3(_NtCreateWaitCompletionPacket, uintptr(unsafe.Pointer(&mp.waitIocpHandle)), GENERIC_ALL, 0)
|
||||
if mp.waitIocpHandle == 0 {
|
||||
print("runtime: NtCreateWaitCompletionPacket failed; errno=", errno, "\n")
|
||||
throw("NtCreateWaitCompletionPacket failed")
|
||||
}
|
||||
}
|
||||
unlock(&mp.threadLock)
|
||||
|
||||
// Query the true stack base from the OS. Currently we're
|
||||
@ -872,6 +910,14 @@ func mdestroy(mp *m) {
|
||||
stdcall1(_CloseHandle, mp.highResTimer)
|
||||
mp.highResTimer = 0
|
||||
}
|
||||
if mp.waitIocpTimer != 0 {
|
||||
stdcall1(_CloseHandle, mp.waitIocpTimer)
|
||||
mp.waitIocpTimer = 0
|
||||
}
|
||||
if mp.waitIocpHandle != 0 {
|
||||
stdcall1(_CloseHandle, mp.waitIocpHandle)
|
||||
mp.waitIocpHandle = 0
|
||||
}
|
||||
if mp.waitsema != 0 {
|
||||
stdcall1(_CloseHandle, mp.waitsema)
|
||||
mp.waitsema = 0
|
||||
|
@ -15,15 +15,33 @@ import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
. "time"
|
||||
_ "unsafe" // for go:linkname
|
||||
)
|
||||
|
||||
// haveHighResSleep is true if the system supports at least ~1ms sleeps.
|
||||
//
|
||||
//go:linkname haveHighResSleep runtime.haveHighResSleep
|
||||
var haveHighResSleep bool
|
||||
|
||||
// adjustDelay returns an adjusted delay based on the system sleep resolution.
|
||||
// Go runtime uses different Windows timers for time.Now and sleeping.
|
||||
// These can tick at different frequencies and can arrive out of sync.
|
||||
// The effect can be seen, for example, as time.Sleep(100ms) is actually
|
||||
// shorter then 100ms when measured as difference between time.Now before and
|
||||
// after time.Sleep call. This was observed on Windows XP SP3 (windows/386).
|
||||
// windowsInaccuracy is to ignore such errors.
|
||||
const windowsInaccuracy = 17 * Millisecond
|
||||
func adjustDelay(t *testing.T, delay Duration) Duration {
|
||||
if haveHighResSleep {
|
||||
return delay
|
||||
}
|
||||
t.Log("adjusting delay for low resolution sleep")
|
||||
switch runtime.GOOS {
|
||||
case "windows":
|
||||
return delay - 17*Millisecond
|
||||
default:
|
||||
t.Fatal("adjustDelay unimplemented on " + runtime.GOOS)
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func TestSleep(t *testing.T) {
|
||||
const delay = 100 * Millisecond
|
||||
@ -33,10 +51,7 @@ func TestSleep(t *testing.T) {
|
||||
}()
|
||||
start := Now()
|
||||
Sleep(delay)
|
||||
delayadj := delay
|
||||
if runtime.GOOS == "windows" {
|
||||
delayadj -= windowsInaccuracy
|
||||
}
|
||||
delayadj := adjustDelay(t, delay)
|
||||
duration := Now().Sub(start)
|
||||
if duration < delayadj {
|
||||
t.Fatalf("Sleep(%s) slept for only %s", delay, duration)
|
||||
@ -247,10 +262,7 @@ func TestAfter(t *testing.T) {
|
||||
const delay = 100 * Millisecond
|
||||
start := Now()
|
||||
end := <-After(delay)
|
||||
delayadj := delay
|
||||
if runtime.GOOS == "windows" {
|
||||
delayadj -= windowsInaccuracy
|
||||
}
|
||||
delayadj := adjustDelay(t, delay)
|
||||
if duration := Now().Sub(start); duration < delayadj {
|
||||
t.Fatalf("After(%s) slept for only %d ns", delay, duration)
|
||||
}
|
||||
|
@ -81,8 +81,9 @@
|
||||
//
|
||||
// Timer resolution varies depending on the Go runtime, the operating system
|
||||
// and the underlying hardware.
|
||||
// On Unix, the resolution is approximately 1ms.
|
||||
// On Windows, the default resolution is approximately 16ms, but
|
||||
// On Unix, the resolution is ~1ms.
|
||||
// On Windows version 1803 and newer, the resolution is ~0.5ms.
|
||||
// On older Windows versions, the default resolution is ~16ms, but
|
||||
// a higher resolution may be requested using [golang.org/x/sys/windows.TimeBeginPeriod].
|
||||
package time
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user