mirror of
https://github.com/golang/go
synced 2024-11-23 20:10:08 -07:00
internal/bytealg: use word-wise comparison for Compare on arm
Use word-wise comparison for aligned buffers, otherwise fall back to the current byte-wise comparison. name old time/op new time/op delta BytesCompare/1-4 41.3ns ± 0% 36.4ns ± 1% -11.73% (p=0.008 n=5+5) BytesCompare/2-4 39.5ns ± 0% 39.5ns ± 1% ~ (p=0.960 n=5+5) BytesCompare/4-4 45.3ns ± 0% 41.0ns ± 1% -9.40% (p=0.008 n=5+5) BytesCompare/8-4 64.8ns ± 1% 44.7ns ± 0% -31.12% (p=0.008 n=5+5) BytesCompare/16-4 86.3ns ± 0% 55.1ns ± 0% -36.21% (p=0.008 n=5+5) BytesCompare/32-4 135ns ± 0% 70ns ± 1% -47.73% (p=0.008 n=5+5) BytesCompare/64-4 231ns ± 1% 99ns ± 0% -57.27% (p=0.016 n=5+4) BytesCompare/128-4 424ns ± 0% 147ns ± 0% -65.31% (p=0.000 n=4+5) BytesCompare/256-4 810ns ± 0% 243ns ± 0% -69.96% (p=0.008 n=5+5) BytesCompare/512-4 1.59µs ± 0% 0.44µs ± 0% -72.43% (p=0.008 n=5+5) BytesCompare/1024-4 3.14µs ± 1% 0.83µs ± 1% -73.56% (p=0.008 n=5+5) BytesCompare/2048-4 6.23µs ± 0% 1.61µs ± 1% -74.21% (p=0.008 n=5+5) CompareBytesEqual-4 79.4ns ± 0% 52.2ns ± 0% -34.23% (p=0.008 n=5+5) CompareBytesToNil-4 31.0ns ± 0% 30.3ns ± 0% -2.32% (p=0.008 n=5+5) CompareBytesEmpty-4 25.7ns ± 0% 25.7ns ± 0% ~ (p=0.556 n=4+5) CompareBytesIdentical-4 25.7ns ± 0% 25.7ns ± 0% ~ (p=1.000 n=5+5) CompareBytesSameLength-4 49.1ns ± 0% 48.5ns ± 0% -1.26% (p=0.008 n=5+5) CompareBytesDifferentLength-4 49.8ns ± 1% 49.3ns ± 0% -1.08% (p=0.008 n=5+5) CompareBytesBigUnaligned-4 5.71ms ± 1% 5.68ms ± 1% ~ (p=0.222 n=5+5) CompareBytesBig-4 4.95ms ± 0% 2.28ms ± 1% -53.81% (p=0.008 n=5+5) CompareBytesBigIdentical-4 27.2ns ± 1% 27.3ns ± 1% ~ (p=0.310 n=5+5) name old speed new speed delta CompareBytesBigUnaligned-4 184MB/s ± 1% 185MB/s ± 1% ~ (p=0.222 n=5+5) CompareBytesBig-4 212MB/s ± 0% 459MB/s ± 1% +116.51% (p=0.008 n=5+5) CompareBytesBigIdentical-4 38.5TB/s ± 0% 38.4TB/s ± 1% ~ (p=0.421 n=5+5) Also, this reduces time for TestCompareBytes by about 20 sec on a linux-arm builder via gomote. Updates #29001 Change-Id: I25f148739b9ccb7cb1fc97b3d8763549b0a66c16 Reviewed-on: https://go-review.googlesource.com/c/go/+/165338 Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
f0749c74fd
commit
029a5af6a1
@ -29,27 +29,44 @@ TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
|
||||
// R7 points to return value (-1/0/1 will be written here)
|
||||
//
|
||||
// On exit:
|
||||
// R4, R5, and R6 are clobbered
|
||||
// R4, R5, R6 and R8 are clobbered
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMP R2, R3
|
||||
BEQ samebytes
|
||||
CMP R0, R1
|
||||
MOVW R0, R6
|
||||
MOVW.LT R1, R6 // R6 is min(R0, R1)
|
||||
MOVW.LT R1, R6 // R6 is min(R0, R1)
|
||||
|
||||
ADD R2, R6 // R2 is current byte in a, R6 is last byte in a to compare
|
||||
loop:
|
||||
CMP $0, R6
|
||||
BEQ samebytes
|
||||
CMP $4, R6
|
||||
ADD R2, R6 // R2 is current byte in a, R6 is the end of the range to compare
|
||||
BLT byte_loop // length < 4
|
||||
AND $3, R2, R8
|
||||
CMP $0, R8
|
||||
BNE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a)
|
||||
aligned_a:
|
||||
AND $3, R3, R8
|
||||
CMP $0, R8
|
||||
BNE byte_loop // unaligned b, use byte-wise compare
|
||||
AND $0xfffffffc, R6, R8
|
||||
// length >= 4
|
||||
chunk4_loop:
|
||||
MOVW.P 4(R2), R4
|
||||
MOVW.P 4(R3), R5
|
||||
CMP R4, R5
|
||||
BNE cmp
|
||||
CMP R2, R8
|
||||
BNE chunk4_loop
|
||||
CMP R2, R6
|
||||
BEQ samebytes // all compared bytes were the same; compare lengths
|
||||
BEQ samebytes // all compared bytes were the same; compare lengths
|
||||
byte_loop:
|
||||
MOVBU.P 1(R2), R4
|
||||
MOVBU.P 1(R3), R5
|
||||
CMP R4, R5
|
||||
BEQ loop
|
||||
// bytes differed
|
||||
MOVW.LT $1, R0
|
||||
MOVW.GT $-1, R0
|
||||
MOVW R0, (R7)
|
||||
RET
|
||||
BNE ret
|
||||
CMP R2, R6
|
||||
BNE byte_loop
|
||||
samebytes:
|
||||
CMP R0, R1
|
||||
MOVW.LT $1, R0
|
||||
@ -57,3 +74,13 @@ samebytes:
|
||||
MOVW.EQ $0, R0
|
||||
MOVW R0, (R7)
|
||||
RET
|
||||
ret:
|
||||
// bytes differed
|
||||
MOVW.LT $1, R0
|
||||
MOVW.GT $-1, R0
|
||||
MOVW R0, (R7)
|
||||
RET
|
||||
cmp:
|
||||
SUB $4, R2, R2
|
||||
SUB $4, R3, R3
|
||||
B byte_loop
|
||||
|
Loading…
Reference in New Issue
Block a user