mirror of
https://github.com/golang/go
synced 2024-11-19 16:24:45 -07:00
5cf281a9b7
Use movups to copy 16 bytes at a time. Results (haswell): name old time/op new time/op delta CopyFat8-48 0.62ns ± 3% 0.63ns ± 3% ~ (p=0.535 n=20+20) CopyFat12-48 0.92ns ± 2% 0.93ns ± 3% ~ (p=0.594 n=17+18) CopyFat16-48 1.23ns ± 2% 1.23ns ± 2% ~ (p=0.839 n=20+19) CopyFat24-48 1.85ns ± 2% 1.84ns ± 0% -0.48% (p=0.014 n=19+20) CopyFat32-48 2.45ns ± 0% 2.45ns ± 1% ~ (p=1.000 n=16+16) CopyFat64-48 3.30ns ± 2% 2.14ns ± 1% -35.00% (p=0.000 n=20+18) CopyFat128-48 6.05ns ± 0% 3.98ns ± 0% -34.22% (p=0.000 n=18+17) CopyFat256-48 11.9ns ± 3% 7.7ns ± 0% -35.87% (p=0.000 n=20+17) CopyFat512-48 23.0ns ± 2% 15.1ns ± 2% -34.52% (p=0.000 n=20+18) CopyFat1024-48 44.8ns ± 1% 29.8ns ± 2% -33.48% (p=0.000 n=17+19) Change-Id: I8a78773c656d400726a020894461e00c59f896bf Reviewed-on: https://go-review.googlesource.com/14836 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
428 lines
5.5 KiB
ArmAsm
428 lines
5.5 KiB
ArmAsm
// AUTO-GENERATED by mkduff.go
|
|
// Run go generate from src/runtime to update.
|
|
// See mkduff.go for comments.
|
|
|
|
#include "textflag.h"
|
|
|
|
TEXT runtime·duffzero(SB), NOSPLIT, $0-0
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
MOVUPS X0,(DI)
|
|
MOVUPS X0,16(DI)
|
|
MOVUPS X0,32(DI)
|
|
MOVUPS X0,48(DI)
|
|
ADDQ $64,DI
|
|
|
|
RET
|
|
|
|
TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
MOVUPS (SI), X0
|
|
ADDQ $16, SI
|
|
MOVUPS X0, (DI)
|
|
ADDQ $16, DI
|
|
|
|
RET
|