From b915399e7e91cba31d4031df507b9c97c33f35d8 Mon Sep 17 00:00:00 2001 From: Vasily Leonenko Date: Tue, 23 Jul 2024 23:23:33 +0300 Subject: [PATCH] internal/bytealg: optimize Equal for arm64 target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove redundant intermediate jump in runtime.memequal Remove redundant a.ptr==b.ptr check in runtime.memequal_varlen Add 16-bytes alignment before some labels in runtime.memequal goos: linux goarch: arm64 pkg: bytes │ ./master.log │ ./opt.log │ │ sec/op │ sec/op vs base │ Equal/0-4 0.8342n ± 0% 0.5254n ± 3% -37.01% (p=0.000 n=8) Equal/same/1-4 2.720n ± 0% 2.720n ± 2% ~ (p=0.779 n=8) Equal/same/6-4 2.720n ± 5% 2.720n ± 2% ~ (p=0.908 n=8) Equal/same/9-4 2.722n ± 2% 2.721n ± 2% ~ (p=0.779 n=8) Equal/same/15-4 2.719n ± 0% 2.719n ± 0% ~ (p=0.641 n=8) Equal/same/16-4 2.721n ± 2% 2.719n ± 0% -0.07% (p=0.014 n=8) Equal/same/20-4 2.720n ± 0% 2.721n ± 2% ~ (p=0.236 n=8) Equal/same/32-4 2.720n ± 1% 2.720n ± 0% ~ (p=0.396 n=8) Equal/same/4K-4 2.719n ± 0% 2.720n ± 0% ~ (p=0.663 n=8) Equal/same/4M-4 2.721n ± 0% 2.720n ± 0% ~ (p=0.075 n=8) Equal/same/64M-4 2.720n ± 0% 2.720n ± 2% ~ (p=0.806 n=8) Equal/1-4 6.671n ± 0% 5.449n ± 0% -18.33% (p=0.000 n=8) Equal/6-4 8.761n ± 2% 7.508n ± 0% -14.30% (p=0.000 n=8) Equal/9-4 8.343n ± 0% 7.091n ± 0% -15.01% (p=0.000 n=8) Equal/15-4 8.339n ± 2% 7.090n ± 0% -14.98% (p=0.000 n=8) Equal/16-4 9.173n ± 0% 7.925n ± 2% -13.61% (p=0.000 n=8) Equal/20-4 11.26n ± 0% 10.01n ± 0% -11.10% (p=0.000 n=8) Equal/32-4 10.425n ± 0% 9.176n ± 0% -11.98% (p=0.000 n=8) Equal/4K-4 192.9n ± 0% 192.7n ± 0% -0.10% (p=0.044 n=8) Equal/4M-4 191.3µ ± 0% 191.3µ ± 0% ~ (p=0.798 n=8) Equal/64M-4 3.066m ± 2% 3.065m ± 0% ~ (p=0.083 n=8) EqualBothUnaligned/64_0-4 7.506n ± 2% 7.090n ± 2% -5.55% (p=0.000 n=8) EqualBothUnaligned/64_1-4 7.850n ± 1% 7.423n ± 0% -5.43% (p=0.000 n=8) EqualBothUnaligned/64_4-4 7.505n ± 0% 7.088n ± 0% -5.56% (p=0.000 n=8) EqualBothUnaligned/64_7-4 7.840n ± 0% 7.413n ± 0% -5.44% (p=0.000 n=8) EqualBothUnaligned/4096_0-4 193.0n ± 4% 190.9n ± 0% -1.09% (p=0.004 n=8) EqualBothUnaligned/4096_1-4 223.9n ± 0% 223.1n ± 0% -0.36% (p=0.000 n=8) EqualBothUnaligned/4096_4-4 191.9n ± 2% 191.5n ± 0% -0.21% (p=0.004 n=8) EqualBothUnaligned/4096_7-4 223.8n ± 0% 223.1n ± 1% ~ (p=0.098 n=8) EqualBothUnaligned/4194304_0-4 191.8µ ± 0% 191.8µ ± 0% ~ (p=0.504 n=8) EqualBothUnaligned/4194304_1-4 225.4µ ± 2% 225.5µ ± 0% ~ (p=0.065 n=8) EqualBothUnaligned/4194304_4-4 192.6µ ± 0% 192.7µ ± 2% +0.06% (p=0.041 n=8) EqualBothUnaligned/4194304_7-4 225.4µ ± 0% 225.5µ ± 0% +0.05% (p=0.050 n=8) EqualBothUnaligned/67108864_0-4 3.069m ± 0% 3.069m ± 0% ~ (p=0.314 n=8) EqualBothUnaligned/67108864_1-4 3.589m ± 0% 3.588m ± 0% ~ (p=0.959 n=8) EqualBothUnaligned/67108864_4-4 3.083m ± 0% 3.083m ± 2% ~ (p=0.505 n=8) EqualBothUnaligned/67108864_7-4 3.588m ± 0% 3.588m ± 0% ~ (p=1.000 n=8) geomean 199.9n 190.5n -4.70% Change-Id: Ib8d0d4006dd39162a600ac98a5f44a0f05136ed3 Reviewed-on: https://go-review.googlesource.com/c/go/+/601135 Reviewed-by: Keith Randall Reviewed-by: Cherry Mui Auto-Submit: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Auto-Submit: Keith Randall --- src/internal/bytealg/equal_arm64.s | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/internal/bytealg/equal_arm64.s b/src/internal/bytealg/equal_arm64.s index 4db9515474..408ab374e6 100644 --- a/src/internal/bytealg/equal_arm64.s +++ b/src/internal/bytealg/equal_arm64.s @@ -5,25 +5,11 @@ #include "go_asm.h" #include "textflag.h" -// memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - // short path to handle 0-byte case - CBZ R2, equal - // short path to handle equal pointers - CMP R0, R1 - BEQ equal - B memeqbody<>(SB) -equal: - MOVD $1, R0 - RET - // memequal_varlen(a, b unsafe.Pointer) bool TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 - CMP R0, R1 - BEQ eq MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure CBZ R2, eq - B memeqbody<>(SB) + B runtime·memequal(SB) eq: MOVD $1, R0 RET @@ -33,7 +19,13 @@ eq: // R1: pointer b // R2: data len // at return: result in R0 -TEXT memeqbody<>(SB),NOSPLIT,$0 +// memequal(a, b unsafe.Pointer, size uintptr) bool +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 + // short path to handle 0-byte case + CBZ R2, equal + // short path to handle equal pointers + CMP R0, R1 + BEQ equal CMP $1, R2 // handle 1-byte special case for better performance BEQ one @@ -91,6 +83,7 @@ tail: EOR R4, R5 CBNZ R5, not_equal B equal + PCALIGN $16 lt_8: TBZ $2, R2, lt_4 MOVWU (R0), R4 @@ -103,6 +96,7 @@ lt_8: EOR R4, R5 CBNZ R5, not_equal B equal + PCALIGN $16 lt_4: TBZ $1, R2, lt_2 MOVHU.P 2(R0), R4