1
0
mirror of https://github.com/golang/go synced 2024-09-28 20:14:28 -06:00

internal/bytealg: optimize indexbyte in amd64

goos: windows
goarch: amd64
pkg: bytes
cpu: AMD Ryzen 7 7840HS w/ Radeon 780M Graphics
                         │   old.txt   │               new.txt               │
                         │   sec/op    │   sec/op     vs base                │
IndexByte/10-16            2.613n ± 1%   2.558n ± 1%   -2.09% (p=0.014 n=10)
IndexByte/32-16            3.034n ± 1%   3.010n ± 2%        ~ (p=0.305 n=10)
IndexByte/4K-16            57.20n ± 2%   39.58n ± 2%  -30.81% (p=0.000 n=10)
IndexByte/4M-16            34.48µ ± 1%   33.83µ ± 2%   -1.87% (p=0.023 n=10)
IndexByte/64M-16           1.493m ± 2%   1.450m ± 2%   -2.89% (p=0.000 n=10)
IndexBytePortable/10-16    3.172n ± 4%   3.163n ± 2%        ~ (p=0.684 n=10)
IndexBytePortable/32-16    8.465n ± 2%   8.375n ± 3%        ~ (p=0.631 n=10)
IndexBytePortable/4K-16    852.0n ± 1%   846.6n ± 3%        ~ (p=0.971 n=10)
IndexBytePortable/4M-16    868.2µ ± 2%   856.6µ ± 2%        ~ (p=0.393 n=10)
IndexBytePortable/64M-16   13.81m ± 2%   13.88m ± 3%        ~ (p=0.684 n=10)
geomean                    1.204µ        1.148µ        -4.63%

                         │   old.txt    │               new.txt                │
                         │     B/s      │     B/s       vs base                │
IndexByte/10-16            3.565Gi ± 1%   3.641Gi ± 1%   +2.15% (p=0.015 n=10)
IndexByte/32-16            9.821Gi ± 1%   9.899Gi ± 2%        ~ (p=0.315 n=10)
IndexByte/4K-16            66.70Gi ± 2%   96.39Gi ± 2%  +44.52% (p=0.000 n=10)
IndexByte/4M-16            113.3Gi ± 1%   115.5Gi ± 2%   +1.91% (p=0.023 n=10)
IndexByte/64M-16           41.85Gi ± 2%   43.10Gi ± 2%   +2.98% (p=0.000 n=10)
IndexBytePortable/10-16    2.936Gi ± 4%   2.945Gi ± 2%        ~ (p=0.684 n=10)
IndexBytePortable/32-16    3.521Gi ± 2%   3.559Gi ± 3%        ~ (p=0.631 n=10)
IndexBytePortable/4K-16    4.477Gi ± 1%   4.506Gi ± 3%        ~ (p=0.971 n=10)
IndexBytePortable/4M-16    4.499Gi ± 2%   4.560Gi ± 2%        ~ (p=0.393 n=10)
IndexBytePortable/64M-16   4.525Gi ± 2%   4.504Gi ± 3%        ~ (p=0.684 n=10)
geomean                    10.04Gi        10.53Gi        +4.86%
This commit is contained in:
qiulaidongfeng 2023-10-31 21:53:44 +08:00
parent 0aa2197279
commit 7e95b8bfb0

View File

@ -45,6 +45,7 @@ sse:
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
JMP sseloopentry
PCALIGN $16
sseloop:
// Move the next 16-byte chunk of the data into X1.
MOVOU (DI), X1
@ -124,6 +125,8 @@ avx2:
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
VPBROADCASTB X0, Y1
PCALIGN $32
avx2_loop:
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3