1
0
mirror of https://github.com/golang/go synced 2024-11-19 16:24:45 -07:00
go/src/runtime/duff_amd64.s
Ilya Tocar 5cf281a9b7 runtime: optimize duffcopy on amd64
Use movups to copy 16 bytes at a time.
Results (haswell):

name            old time/op  new time/op  delta
CopyFat8-48     0.62ns ± 3%  0.63ns ± 3%     ~     (p=0.535 n=20+20)
CopyFat12-48    0.92ns ± 2%  0.93ns ± 3%     ~     (p=0.594 n=17+18)
CopyFat16-48    1.23ns ± 2%  1.23ns ± 2%     ~     (p=0.839 n=20+19)
CopyFat24-48    1.85ns ± 2%  1.84ns ± 0%   -0.48%  (p=0.014 n=19+20)
CopyFat32-48    2.45ns ± 0%  2.45ns ± 1%     ~     (p=1.000 n=16+16)
CopyFat64-48    3.30ns ± 2%  2.14ns ± 1%  -35.00%  (p=0.000 n=20+18)
CopyFat128-48   6.05ns ± 0%  3.98ns ± 0%  -34.22%  (p=0.000 n=18+17)
CopyFat256-48   11.9ns ± 3%   7.7ns ± 0%  -35.87%  (p=0.000 n=20+17)
CopyFat512-48   23.0ns ± 2%  15.1ns ± 2%  -34.52%  (p=0.000 n=20+18)
CopyFat1024-48  44.8ns ± 1%  29.8ns ± 2%  -33.48%  (p=0.000 n=17+19)

Change-Id: I8a78773c656d400726a020894461e00c59f896bf
Reviewed-on: https://go-review.googlesource.com/14836
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2015-09-22 15:02:37 +00:00

428 lines
5.5 KiB
ArmAsm

// AUTO-GENERATED by mkduff.go
// Run go generate from src/runtime to update.
// See mkduff.go for comments.
#include "textflag.h"
TEXT runtime·duffzero(SB), NOSPLIT, $0-0
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
MOVUPS X0,(DI)
MOVUPS X0,16(DI)
MOVUPS X0,32(DI)
MOVUPS X0,48(DI)
ADDQ $64,DI
RET
TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
MOVUPS (SI), X0
ADDQ $16, SI
MOVUPS X0, (DI)
ADDQ $16, DI
RET