1
0
mirror of https://github.com/golang/go synced 2024-09-29 14:14:29 -06:00

runtime: change time.now to ABIInternal

This reduces the number of instructions executed for time.now by nine,
by eliminating the wrapper. Somehow BenchmarkNow is 0.2ns slower.
On the other hand BenchmarkNowUnixNano is 0.8ns faster.

name                                                       old time/op      new time/op      delta
AfterFunc-12                                                   66.7µs ± 4%      67.3µs ± 2%     ~     (p=0.573 n=20+18)
After-12                                                       97.6µs ± 4%      97.4µs ± 4%     ~     (p=0.758 n=20+20)
Stop-12                                                        66.7µs ±12%      64.8µs ±10%     ~     (p=0.072 n=20+20)
SimultaneousAfterFunc-12                                        109µs ± 0%       110µs ± 1%   +1.47%  (p=0.000 n=17+20)
StartStop-12                                                   31.9µs ±15%      32.7µs ±14%     ~     (p=0.799 n=20+20)
Reset-12                                                       3.67µs ± 2%      3.68µs ± 2%     ~     (p=0.132 n=20+20)
Sleep-12                                                        132µs ± 2%       133µs ± 2%   +0.70%  (p=0.035 n=20+19)
Ticker-12                                                      32.4µs ± 1%      32.3µs ± 2%     ~     (p=0.270 n=20+19)
TickerReset-12                                                 3.71µs ± 2%      3.74µs ± 2%   +0.89%  (p=0.012 n=20+20)
TickerResetNaive-12                                            65.7µs ±10%      67.2µs ±10%     ~     (p=0.174 n=20+20)
Now-12                                                         29.6ns ± 1%      29.8ns ± 0%   +0.78%  (p=0.000 n=17+17)
NowUnixNano-12                                                 31.1ns ± 1%      30.3ns ± 0%   -2.69%  (p=0.000 n=19+18)
NowUnixMilli-12                                                30.9ns ± 0%      31.1ns ± 0%   +0.90%  (p=0.000 n=18+20)
NowUnixMicro-12                                                30.9ns ± 0%      31.1ns ± 1%   +0.68%  (p=0.000 n=20+18)
Format-12                                                       304ns ± 1%       301ns ± 2%   -0.81%  (p=0.004 n=18+19)
FormatNow-12                                                    187ns ± 2%       185ns ± 2%   -0.90%  (p=0.036 n=20+18)
MarshalJSON-12                                                  267ns ± 3%       265ns ± 3%   -1.00%  (p=0.004 n=18+18)
MarshalText-12                                                  267ns ± 2%       265ns ± 3%   -0.87%  (p=0.038 n=19+20)
Parse-12                                                        150ns ± 1%       149ns ± 1%   -0.83%  (p=0.000 n=18+20)
ParseDuration-12                                               79.6ns ± 0%      80.1ns ± 1%   +0.61%  (p=0.000 n=20+20)
Hour-12                                                        4.42ns ± 1%      4.45ns ± 0%   +0.83%  (p=0.000 n=20+20)
Second-12                                                      4.42ns ± 0%      4.42ns ± 1%     ~     (p=0.075 n=18+20)
Year-12                                                        11.1ns ± 1%      11.1ns ± 1%     ~     (p=0.489 n=20+19)
Day-12                                                         14.8ns ± 1%      14.8ns ± 0%     ~     (p=0.616 n=20+18)
ISOWeek-12                                                     17.2ns ± 1%      17.2ns ± 0%     ~     (p=0.179 n=20+19)

name                                                       old avg-late-ns  new avg-late-ns  delta
ParallelTimerLatency-12                                          380k ± 4%        379k ± 3%     ~     (p=0.879 n=20+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12         137k ± 3%        137k ± 2%     ~     (p=0.261 n=19+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12         106k ±16%         95k ± 8%   -9.76%  (p=0.003 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12        88.6k ±22%       74.6k ± 3%  -15.78%  (p=0.000 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12        76.1k ±18%       70.8k ± 5%   -7.04%  (p=0.020 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12        67.3k ±27%       65.6k ±13%     ~     (p=0.211 n=16+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12        59.5k ±24%       57.3k ±32%     ~     (p=0.607 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12        41.8k ±34%       46.2k ±33%  +10.54%  (p=0.039 n=17+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12        57.5k ±37%       65.6k ±46%     ~     (p=0.283 n=17+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12         118k ±60%        136k ±59%     ~     (p=0.169 n=19+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12      3.66M ±236%       2.55M ±36%     ~     (p=0.158 n=16+20)
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12          81.7k ± 4%       80.7k ± 5%     ~     (p=0.107 n=20+19)

name                                                       old max-late-ns  new max-late-ns  delta
ParallelTimerLatency-12                                        5.88M ±124%      7.28M ±183%     ~     (p=0.640 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12         384k ±17%        371k ±11%     ~     (p=0.540 n=17+17)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12        503k ±180%        373k ±19%     ~     (p=0.057 n=17+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12        519k ±129%        340k ±17%  -34.47%  (p=0.000 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12        491k ±141%        341k ±26%  -30.52%  (p=0.015 n=18+17)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12        457k ±123%        405k ±48%     ~     (p=0.786 n=17+17)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12         491k ±85%        502k ±74%     ~     (p=0.916 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12        572k ±100%        574k ±65%     ~     (p=0.858 n=18+17)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12       1.95M ±205%      1.65M ±155%     ~     (p=0.641 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12       7.77M ±104%      8.72M ±103%     ~     (p=0.512 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12      29.5M ±187%       18.5M ±43%     ~     (p=0.186 n=18+20)
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12           981k ±14%       1033k ±12%   +5.30%  (p=0.048 n=20+18)

Change-Id: Ie794a932a929b46053a6c3020b67d640b98d2335
Reviewed-on: https://go-review.googlesource.com/c/go/+/315369
Trust: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Ian Lance Taylor 2021-04-29 19:08:54 -07:00
parent 146e8d4994
commit 71adc658de

View File

@ -12,14 +12,11 @@
#define SYS_clock_gettime 228
// func time.now() (sec int64, nsec int32, mono int64)
TEXT time·now(SB),NOSPLIT,$16-24
TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24
MOVQ SP, R12 // Save old SP; R12 unchanged by C code.
MOVQ g_m(R14), BX // BX unchanged by C code.
// Store CLOCK_REALTIME results directly to return space.
LEAQ sec+0(FP), SI
// Set vdsoPC and vdsoSP for SIGPROF traceback.
// Save the old values on stack and restore them on exit,
// so this function is reentrant.
@ -28,9 +25,10 @@ TEXT time·now(SB),NOSPLIT,$16-24
MOVQ CX, 0(SP)
MOVQ DX, 8(SP)
MOVQ -8(SI), CX // Sets CX to function return address.
LEAQ sec+0(FP), DX
MOVQ -8(DX), CX // Sets CX to function return address.
MOVQ CX, m_vdsoPC(BX)
MOVQ SI, m_vdsoSP(BX)
MOVQ DX, m_vdsoSP(BX)
CMPQ R14, m_curg(BX) // Only switch if on curg.
JNE noswitch
@ -39,10 +37,11 @@ TEXT time·now(SB),NOSPLIT,$16-24
MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
noswitch:
SUBQ $16, SP // Space for monotonic time results
SUBQ $32, SP // Space for two time results
ANDQ $~15, SP // Align for C code
MOVL $0, DI // CLOCK_REALTIME
LEAQ 16(SP), SI
MOVQ runtime·vdsoClockgettimeSym(SB), AX
CMPQ AX, $0
JEQ fallback
@ -54,25 +53,27 @@ noswitch:
CALL AX
ret:
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
MOVQ 16(SP), AX // realtime sec
MOVQ 24(SP), DI // realtime nsec (moved to BX below)
MOVQ 0(SP), CX // monotonic sec
IMULQ $1000000000, CX
MOVQ 8(SP), DX // monotonic nsec
MOVQ R12, SP // Restore real SP
// Restore vdsoPC, vdsoSP
// We don't worry about being signaled between the two stores.
// If we are not in a signal handler, we'll restore vdsoSP to 0,
// and no one will care about vdsoPC. If we are in a signal handler,
// we cannot receive another signal.
MOVQ 8(SP), CX
MOVQ CX, m_vdsoSP(BX)
MOVQ 0(SP), CX
MOVQ CX, m_vdsoPC(BX)
MOVQ 8(SP), SI
MOVQ SI, m_vdsoSP(BX)
MOVQ 0(SP), SI
MOVQ SI, m_vdsoPC(BX)
// sec is in AX, nsec in DX
// return nsec in AX
IMULQ $1000000000, AX
ADDQ DX, AX
MOVQ AX, mono+16(FP)
// set result registers; AX is already correct
MOVQ DI, BX
ADDQ DX, CX
RET
fallback: