mirror of
https://github.com/golang/go
synced 2024-11-12 10:20:27 -07:00
bytes: improve Compare function on amd64 for large byte arrays
This patch contains only loop unrolling change for size > 63B Following are the performance numbers for various sizes on On Haswell based system: Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz. benchcmp go.head.8.25.15.txt go.head.8.25.15.opt.txt benchmark old ns/op new ns/op delta BenchmarkBytesCompare1-4 5.37 5.37 +0.00% BenchmarkBytesCompare2-4 5.37 5.38 +0.19% BenchmarkBytesCompare4-4 5.37 5.37 +0.00% BenchmarkBytesCompare8-4 4.42 4.38 -0.90% BenchmarkBytesCompare16-4 4.27 4.45 +4.22% BenchmarkBytesCompare32-4 5.30 5.36 +1.13% BenchmarkBytesCompare64-4 6.93 6.78 -2.16% BenchmarkBytesCompare128-4 10.3 9.50 -7.77% BenchmarkBytesCompare256-4 17.1 13.8 -19.30% BenchmarkBytesCompare512-4 31.3 22.1 -29.39% BenchmarkBytesCompare1024-4 62.5 39.0 -37.60% BenchmarkBytesCompare2048-4 112 73.2 -34.64% Change-Id: I4eeb1c22732fd62cbac97ba757b0d29f648d4ef1 Reviewed-on: https://go-review.googlesource.com/11871 Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
abab21b1d1
commit
32add8d7c8
@ -1255,3 +1255,34 @@ func BenchmarkRepeat(b *testing.B) {
|
||||
Repeat([]byte("-"), 80)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkBytesCompare(b *testing.B, n int) {
|
||||
var x = make([]byte, n)
|
||||
var y = make([]byte, n)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
x[i] = 'a'
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
y[i] = 'a'
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Compare(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBytesCompare1(b *testing.B) { benchmarkBytesCompare(b, 1) }
|
||||
func BenchmarkBytesCompare2(b *testing.B) { benchmarkBytesCompare(b, 2) }
|
||||
func BenchmarkBytesCompare4(b *testing.B) { benchmarkBytesCompare(b, 4) }
|
||||
func BenchmarkBytesCompare8(b *testing.B) { benchmarkBytesCompare(b, 8) }
|
||||
func BenchmarkBytesCompare16(b *testing.B) { benchmarkBytesCompare(b, 16) }
|
||||
func BenchmarkBytesCompare32(b *testing.B) { benchmarkBytesCompare(b, 32) }
|
||||
func BenchmarkBytesCompare64(b *testing.B) { benchmarkBytesCompare(b, 64) }
|
||||
func BenchmarkBytesCompare128(b *testing.B) { benchmarkBytesCompare(b, 128) }
|
||||
func BenchmarkBytesCompare256(b *testing.B) { benchmarkBytesCompare(b, 256) }
|
||||
func BenchmarkBytesCompare512(b *testing.B) { benchmarkBytesCompare(b, 512) }
|
||||
func BenchmarkBytesCompare1024(b *testing.B) { benchmarkBytesCompare(b, 1024) }
|
||||
func BenchmarkBytesCompare2048(b *testing.B) { benchmarkBytesCompare(b, 2048) }
|
||||
|
@ -1445,6 +1445,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
|
||||
CMPQ R8, $8
|
||||
JB small
|
||||
|
||||
CMPQ R8, $63
|
||||
JA big_loop
|
||||
loop:
|
||||
CMPQ R8, $16
|
||||
JBE _0through16
|
||||
@ -1459,6 +1461,17 @@ loop:
|
||||
SUBQ $16, R8
|
||||
JMP loop
|
||||
|
||||
diff64:
|
||||
ADDQ $48, SI
|
||||
ADDQ $48, DI
|
||||
JMP diff16
|
||||
diff48:
|
||||
ADDQ $32, SI
|
||||
ADDQ $32, DI
|
||||
JMP diff16
|
||||
diff32:
|
||||
ADDQ $16, SI
|
||||
ADDQ $16, DI
|
||||
// AX = bit mask of differences
|
||||
diff16:
|
||||
BSFQ AX, BX // index of first byte that differs
|
||||
@ -1545,6 +1558,43 @@ allsame:
|
||||
MOVQ AX, (R9)
|
||||
RET
|
||||
|
||||
// this works for >= 64 bytes of data.
|
||||
big_loop:
|
||||
MOVOU (SI), X0
|
||||
MOVOU (DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff16
|
||||
|
||||
MOVOU 16(SI), X0
|
||||
MOVOU 16(DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff32
|
||||
|
||||
MOVOU 32(SI), X0
|
||||
MOVOU 32(DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff48
|
||||
|
||||
MOVOU 48(SI), X0
|
||||
MOVOU 48(DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff64
|
||||
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, R8
|
||||
CMPQ R8, $64
|
||||
JBE loop
|
||||
JMP big_loop
|
||||
|
||||
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), BX
|
||||
|
Loading…
Reference in New Issue
Block a user