mirror of
https://github.com/golang/go
synced 2024-11-19 12:34:47 -07:00
runtime: use vDSO on linux/386 to improve time.Now performance
This change adds support for accelerating time.Now by using the __vdso_clock_gettime fast-path via the vDSO on linux/386 if it is available. When the vDSO path to the clocks is available, it is typically 5x-10x faster than the syscall path (see benchmark extract below). Two such calls are made for each time.Now() call on most platforms as of go 1.9. - Add vdso_linux_386.go, containing the ELF32 definitions for use by vdso_linux.go, the maximum array size, and the symbols to be located in the vDSO. - Modify runtime.walltime and runtime.nanotime to check for and use the vDSO fast-path if available, or fall back to the existing syscall path. - Reduce the stack reservations for runtime.walltime and runtime.monotime from 32 to 16 bytes. It appears the syscall path actually only needed 8 bytes, but 16 is now needed to cover the syscall and vDSO paths. - Remove clearing DX from the syscall paths as clock_gettime only takes 2 args (BX, CX in syscall calling convention), so there should be no need to clear DX. The included BenchmarkTimeNow was run with -cpu=1 -count=20 on an "Intel(R) Celeron(R) CPU J1900 @ 1.99GHz", comparing released go 1.9.1 vs this change. This shows a gain in performance on linux/386 (6.89x), and that no regression occurred on linux/amd64 due to this change. Kernel: linux/i686, GOOS=linux GOARCH=386 name old time/op new time/op delta TimeNow 978ns ± 0% 142ns ± 0% -85.48% (p=0.000 n=16+20) Kernel: linux/x86_64, GOOS=linux GOARCH=amd64 name old time/op new time/op delta TimeNow 125ns ± 0% 125ns ± 0% ~ (all equal) Gains are more dramatic in virtualized environments, presumably due to the overhead of virtualizing the syscall. Fixes #22190 Change-Id: I2f83ce60cb1b8b310c9ced0706bb463c1b3aedf8 Reviewed-on: https://go-review.googlesource.com/69390 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
bf237f534c
commit
af40cbe83c
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
|
||||
// +build !386,!amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
|
||||
|
||||
package runtime
|
||||
|
||||
|
@ -203,12 +203,34 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16
|
||||
RET
|
||||
|
||||
// func walltime() (sec int64, nsec int32)
|
||||
TEXT runtime·walltime(SB), NOSPLIT, $32
|
||||
TEXT runtime·walltime(SB), NOSPLIT, $16
|
||||
// Stack layout, depending on call path:
|
||||
// x(SP) vDSO INVOKE_SYSCALL
|
||||
// 12 ts.tv_nsec ts.tv_nsec
|
||||
// 8 ts.tv_sec ts.tv_sec
|
||||
// 4 &ts -
|
||||
// 0 CLOCK_<id> -
|
||||
//
|
||||
// If we take the vDSO path, we're calling a function with gcc calling convention.
|
||||
// We're guaranteed 128 bytes on entry. We've taken 16, and the call uses another 4,
|
||||
// leaving 108 for __vdso_clock_gettime to use.
|
||||
MOVL runtime·__vdso_clock_gettime_sym(SB), AX
|
||||
CMPL AX, $0
|
||||
JEQ fallback
|
||||
|
||||
LEAL 8(SP), BX // &ts (struct timespec)
|
||||
MOVL BX, 4(SP)
|
||||
MOVL $0, 0(SP) // CLOCK_REALTIME
|
||||
CALL AX
|
||||
JMP finish
|
||||
|
||||
fallback:
|
||||
MOVL $SYS_clock_gettime, AX
|
||||
MOVL $0, BX // CLOCK_REALTIME
|
||||
LEAL 8(SP), CX
|
||||
MOVL $0, DX
|
||||
INVOKE_SYSCALL
|
||||
|
||||
finish:
|
||||
MOVL 8(SP), AX // sec
|
||||
MOVL 12(SP), BX // nsec
|
||||
|
||||
@ -220,12 +242,25 @@ TEXT runtime·walltime(SB), NOSPLIT, $32
|
||||
|
||||
// int64 nanotime(void) so really
|
||||
// void nanotime(int64 *nsec)
|
||||
TEXT runtime·nanotime(SB), NOSPLIT, $32
|
||||
TEXT runtime·nanotime(SB), NOSPLIT, $16
|
||||
// See comments above in walltime() about stack space usage and layout.
|
||||
MOVL runtime·__vdso_clock_gettime_sym(SB), AX
|
||||
CMPL AX, $0
|
||||
JEQ fallback
|
||||
|
||||
LEAL 8(SP), BX // &ts (struct timespec)
|
||||
MOVL BX, 4(SP)
|
||||
MOVL $1, 0(SP) // CLOCK_MONOTONIC
|
||||
CALL AX
|
||||
JMP finish
|
||||
|
||||
fallback:
|
||||
MOVL $SYS_clock_gettime, AX
|
||||
MOVL $1, BX // CLOCK_MONOTONIC
|
||||
LEAL 8(SP), CX
|
||||
MOVL $0, DX
|
||||
INVOKE_SYSCALL
|
||||
|
||||
finish:
|
||||
MOVL 8(SP), AX // sec
|
||||
MOVL 12(SP), BX // nsec
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build linux
|
||||
// +build amd64
|
||||
// +build 386 amd64
|
||||
|
||||
package runtime
|
||||
|
||||
|
93
src/runtime/vdso_linux_386.go
Normal file
93
src/runtime/vdso_linux_386.go
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runtime
|
||||
|
||||
// ELF32 structure definitions for use by the Linux vDSO loader
|
||||
|
||||
type elfSym struct {
|
||||
st_name uint32
|
||||
st_value uint32
|
||||
st_size uint32
|
||||
st_info byte
|
||||
st_other byte
|
||||
st_shndx uint16
|
||||
}
|
||||
|
||||
type elfVerdef struct {
|
||||
vd_version uint16 /* Version revision */
|
||||
vd_flags uint16 /* Version information */
|
||||
vd_ndx uint16 /* Version Index */
|
||||
vd_cnt uint16 /* Number of associated aux entries */
|
||||
vd_hash uint32 /* Version name hash value */
|
||||
vd_aux uint32 /* Offset in bytes to verdaux array */
|
||||
vd_next uint32 /* Offset in bytes to next verdef entry */
|
||||
}
|
||||
|
||||
type elfEhdr struct {
|
||||
e_ident [_EI_NIDENT]byte /* Magic number and other info */
|
||||
e_type uint16 /* Object file type */
|
||||
e_machine uint16 /* Architecture */
|
||||
e_version uint32 /* Object file version */
|
||||
e_entry uint32 /* Entry point virtual address */
|
||||
e_phoff uint32 /* Program header table file offset */
|
||||
e_shoff uint32 /* Section header table file offset */
|
||||
e_flags uint32 /* Processor-specific flags */
|
||||
e_ehsize uint16 /* ELF header size in bytes */
|
||||
e_phentsize uint16 /* Program header table entry size */
|
||||
e_phnum uint16 /* Program header table entry count */
|
||||
e_shentsize uint16 /* Section header table entry size */
|
||||
e_shnum uint16 /* Section header table entry count */
|
||||
e_shstrndx uint16 /* Section header string table index */
|
||||
}
|
||||
|
||||
type elfPhdr struct {
|
||||
p_type uint32 /* Segment type */
|
||||
p_offset uint32 /* Segment file offset */
|
||||
p_vaddr uint32 /* Segment virtual address */
|
||||
p_paddr uint32 /* Segment physical address */
|
||||
p_filesz uint32 /* Segment size in file */
|
||||
p_memsz uint32 /* Segment size in memory */
|
||||
p_flags uint32 /* Segment flags */
|
||||
p_align uint32 /* Segment alignment */
|
||||
}
|
||||
|
||||
type elfShdr struct {
|
||||
sh_name uint32 /* Section name (string tbl index) */
|
||||
sh_type uint32 /* Section type */
|
||||
sh_flags uint32 /* Section flags */
|
||||
sh_addr uint32 /* Section virtual addr at execution */
|
||||
sh_offset uint32 /* Section file offset */
|
||||
sh_size uint32 /* Section size in bytes */
|
||||
sh_link uint32 /* Link to another section */
|
||||
sh_info uint32 /* Additional section information */
|
||||
sh_addralign uint32 /* Section alignment */
|
||||
sh_entsize uint32 /* Entry size if section holds table */
|
||||
}
|
||||
|
||||
type elfDyn struct {
|
||||
d_tag int32 /* Dynamic entry type */
|
||||
d_val uint32 /* Integer value */
|
||||
}
|
||||
|
||||
type elfVerdaux struct {
|
||||
vda_name uint32 /* Version or dependency names */
|
||||
vda_next uint32 /* Offset in bytes to next verdaux entry */
|
||||
}
|
||||
|
||||
const (
|
||||
// vdsoArrayMax is the byte-size of a maximally sized array on this architecture.
|
||||
// See cmd/compile/internal/x86/galign.go arch.MAXWIDTH initialization, but must also
|
||||
// be constrained to max +ve int.
|
||||
vdsoArrayMax = 1<<31 - 1
|
||||
)
|
||||
|
||||
var sym_keys = []symbol_key{
|
||||
{"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &__vdso_clock_gettime_sym},
|
||||
}
|
||||
|
||||
// initialize to fall back to syscall
|
||||
var (
|
||||
__vdso_clock_gettime_sym uintptr = 0
|
||||
)
|
63
src/runtime/vdso_linux_test.go
Normal file
63
src/runtime/vdso_linux_test.go
Normal file
@ -0,0 +1,63 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build linux
|
||||
// +build 386 amd64
|
||||
|
||||
package runtime_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
_ "unsafe"
|
||||
)
|
||||
|
||||
// These tests are a little risky because they overwrite the __vdso_clock_gettime_sym value.
|
||||
// It's normally initialized at startup and remains unchanged after that.
|
||||
|
||||
//go:linkname __vdso_clock_gettime_sym runtime.__vdso_clock_gettime_sym
|
||||
var __vdso_clock_gettime_sym uintptr
|
||||
|
||||
func TestClockVDSOAndFallbackPaths(t *testing.T) {
|
||||
// Check that we can call walltime() and nanotime() with and without their (1st) fast-paths.
|
||||
// This just checks that fast and fallback paths can be called, rather than testing their
|
||||
// results.
|
||||
//
|
||||
// Call them indirectly via time.Now(), so we don't need auxiliary .s files to allow us to
|
||||
// use go:linkname to refer to the functions directly.
|
||||
|
||||
save := __vdso_clock_gettime_sym
|
||||
if save == 0 {
|
||||
t.Log("__vdso_clock_gettime symbol not found; fallback path will be used by default")
|
||||
}
|
||||
|
||||
// Call with fast-path enabled (if vDSO symbol found at startup)
|
||||
time.Now()
|
||||
|
||||
// Call with fast-path disabled
|
||||
__vdso_clock_gettime_sym = 0
|
||||
time.Now()
|
||||
__vdso_clock_gettime_sym = save
|
||||
}
|
||||
|
||||
func BenchmarkClockVDSOAndFallbackPaths(b *testing.B) {
|
||||
run := func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
// Call via time.Now() - see comment in test above.
|
||||
time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
save := __vdso_clock_gettime_sym
|
||||
b.Run("vDSO", run)
|
||||
__vdso_clock_gettime_sym = 0
|
||||
b.Run("Fallback", run)
|
||||
__vdso_clock_gettime_sym = save
|
||||
}
|
||||
|
||||
func BenchmarkTimeNow(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
time.Now()
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user