1
0
mirror of https://github.com/golang/go synced 2024-11-26 04:58:00 -07:00

internal/bytealg: optimize Equal for arm64 target

Remove redundant intermediate jump in runtime.memequal
Remove redundant a.ptr==b.ptr check in runtime.memequal_varlen
Add 16-bytes alignment before some labels in runtime.memequal

goos: linux
goarch: arm64
pkg: bytes
                                │ ./master.log │              ./opt.log              │
                                │    sec/op    │    sec/op     vs base               │
Equal/0-4                         0.8342n ± 0%   0.5254n ± 3%  -37.01% (p=0.000 n=8)
Equal/same/1-4                     2.720n ± 0%    2.720n ± 2%        ~ (p=0.779 n=8)
Equal/same/6-4                     2.720n ± 5%    2.720n ± 2%        ~ (p=0.908 n=8)
Equal/same/9-4                     2.722n ± 2%    2.721n ± 2%        ~ (p=0.779 n=8)
Equal/same/15-4                    2.719n ± 0%    2.719n ± 0%        ~ (p=0.641 n=8)
Equal/same/16-4                    2.721n ± 2%    2.719n ± 0%   -0.07% (p=0.014 n=8)
Equal/same/20-4                    2.720n ± 0%    2.721n ± 2%        ~ (p=0.236 n=8)
Equal/same/32-4                    2.720n ± 1%    2.720n ± 0%        ~ (p=0.396 n=8)
Equal/same/4K-4                    2.719n ± 0%    2.720n ± 0%        ~ (p=0.663 n=8)
Equal/same/4M-4                    2.721n ± 0%    2.720n ± 0%        ~ (p=0.075 n=8)
Equal/same/64M-4                   2.720n ± 0%    2.720n ± 2%        ~ (p=0.806 n=8)
Equal/1-4                          6.671n ± 0%    5.449n ± 0%  -18.33% (p=0.000 n=8)
Equal/6-4                          8.761n ± 2%    7.508n ± 0%  -14.30% (p=0.000 n=8)
Equal/9-4                          8.343n ± 0%    7.091n ± 0%  -15.01% (p=0.000 n=8)
Equal/15-4                         8.339n ± 2%    7.090n ± 0%  -14.98% (p=0.000 n=8)
Equal/16-4                         9.173n ± 0%    7.925n ± 2%  -13.61% (p=0.000 n=8)
Equal/20-4                         11.26n ± 0%    10.01n ± 0%  -11.10% (p=0.000 n=8)
Equal/32-4                        10.425n ± 0%    9.176n ± 0%  -11.98% (p=0.000 n=8)
Equal/4K-4                         192.9n ± 0%    192.7n ± 0%   -0.10% (p=0.044 n=8)
Equal/4M-4                         191.3µ ± 0%    191.3µ ± 0%        ~ (p=0.798 n=8)
Equal/64M-4                        3.066m ± 2%    3.065m ± 0%        ~ (p=0.083 n=8)
EqualBothUnaligned/64_0-4          7.506n ± 2%    7.090n ± 2%   -5.55% (p=0.000 n=8)
EqualBothUnaligned/64_1-4          7.850n ± 1%    7.423n ± 0%   -5.43% (p=0.000 n=8)
EqualBothUnaligned/64_4-4          7.505n ± 0%    7.088n ± 0%   -5.56% (p=0.000 n=8)
EqualBothUnaligned/64_7-4          7.840n ± 0%    7.413n ± 0%   -5.44% (p=0.000 n=8)
EqualBothUnaligned/4096_0-4        193.0n ± 4%    190.9n ± 0%   -1.09% (p=0.004 n=8)
EqualBothUnaligned/4096_1-4        223.9n ± 0%    223.1n ± 0%   -0.36% (p=0.000 n=8)
EqualBothUnaligned/4096_4-4        191.9n ± 2%    191.5n ± 0%   -0.21% (p=0.004 n=8)
EqualBothUnaligned/4096_7-4        223.8n ± 0%    223.1n ± 1%        ~ (p=0.098 n=8)
EqualBothUnaligned/4194304_0-4     191.8µ ± 0%    191.8µ ± 0%        ~ (p=0.504 n=8)
EqualBothUnaligned/4194304_1-4     225.4µ ± 2%    225.5µ ± 0%        ~ (p=0.065 n=8)
EqualBothUnaligned/4194304_4-4     192.6µ ± 0%    192.7µ ± 2%   +0.06% (p=0.041 n=8)
EqualBothUnaligned/4194304_7-4     225.4µ ± 0%    225.5µ ± 0%   +0.05% (p=0.050 n=8)
EqualBothUnaligned/67108864_0-4    3.069m ± 0%    3.069m ± 0%        ~ (p=0.314 n=8)
EqualBothUnaligned/67108864_1-4    3.589m ± 0%    3.588m ± 0%        ~ (p=0.959 n=8)
EqualBothUnaligned/67108864_4-4    3.083m ± 0%    3.083m ± 2%        ~ (p=0.505 n=8)
EqualBothUnaligned/67108864_7-4    3.588m ± 0%    3.588m ± 0%        ~ (p=1.000 n=8)
geomean                            199.9n         190.5n        -4.70%

Change-Id: Ib8d0d4006dd39162a600ac98a5f44a0f05136ed3
Reviewed-on: https://go-review.googlesource.com/c/go/+/601135
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
This commit is contained in:
Vasily Leonenko 2024-07-23 23:23:33 +03:00 committed by Gopher Robot
parent 1f0c044d60
commit b915399e7e

View File

@ -5,25 +5,11 @@
#include "go_asm.h" #include "go_asm.h"
#include "textflag.h" #include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
// short path to handle 0-byte case
CBZ R2, equal
// short path to handle equal pointers
CMP R0, R1
BEQ equal
B memeqbody<>(SB)
equal:
MOVD $1, R0
RET
// memequal_varlen(a, b unsafe.Pointer) bool // memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
CMP R0, R1
BEQ eq
MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure
CBZ R2, eq CBZ R2, eq
B memeqbody<>(SB) B runtime·memequal<ABIInternal>(SB)
eq: eq:
MOVD $1, R0 MOVD $1, R0
RET RET
@ -33,7 +19,13 @@ eq:
// R1: pointer b // R1: pointer b
// R2: data len // R2: data len
// at return: result in R0 // at return: result in R0
TEXT memeqbody<>(SB),NOSPLIT,$0 // memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
// short path to handle 0-byte case
CBZ R2, equal
// short path to handle equal pointers
CMP R0, R1
BEQ equal
CMP $1, R2 CMP $1, R2
// handle 1-byte special case for better performance // handle 1-byte special case for better performance
BEQ one BEQ one
@ -91,6 +83,7 @@ tail:
EOR R4, R5 EOR R4, R5
CBNZ R5, not_equal CBNZ R5, not_equal
B equal B equal
PCALIGN $16
lt_8: lt_8:
TBZ $2, R2, lt_4 TBZ $2, R2, lt_4
MOVWU (R0), R4 MOVWU (R0), R4
@ -103,6 +96,7 @@ lt_8:
EOR R4, R5 EOR R4, R5
CBNZ R5, not_equal CBNZ R5, not_equal
B equal B equal
PCALIGN $16
lt_4: lt_4:
TBZ $1, R2, lt_2 TBZ $1, R2, lt_2
MOVHU.P 2(R0), R4 MOVHU.P 2(R0), R4