mirror of
https://github.com/golang/go
synced 2024-09-25 03:20:13 -06:00
runtime: Optimize aeshash a bit. Use a better predicted branch
for checking for page boundary. Also avoid boundary check when >=16 bytes are hashed. benchmark old ns/op new ns/op delta BenchmarkHashStringSpeed 23 22 -0.43% BenchmarkHashBytesSpeed 44 42 -3.61% BenchmarkHashStringArraySpeed 71 68 -4.05% R=iant, khr CC=gobot, golang-dev, google https://golang.org/cl/9123046
This commit is contained in:
parent
23ad563119
commit
ee66972dce
@ -755,31 +755,39 @@ TEXT runtime·aeshashbody(SB),7,$0
|
||||
PINSRD $1, CX, X0 // size to next 32 bits of xmm0
|
||||
MOVO runtime·aeskeysched+0(SB), X2
|
||||
MOVO runtime·aeskeysched+16(SB), X3
|
||||
CMPL CX, $16
|
||||
JB aessmall
|
||||
aesloop:
|
||||
CMPL CX, $16
|
||||
JB aesloopend
|
||||
JBE aesloopend
|
||||
MOVOU (AX), X1
|
||||
AESENC X2, X0
|
||||
AESENC X1, X0
|
||||
SUBL $16, CX
|
||||
ADDL $16, AX
|
||||
JMP aesloop
|
||||
// 1-16 bytes remaining
|
||||
aesloopend:
|
||||
// This load may overlap with the previous load above.
|
||||
// We'll hash some bytes twice, but that's ok.
|
||||
MOVOU -16(AX)(CX*1), X1
|
||||
JMP partial
|
||||
// 0-15 bytes
|
||||
aessmall:
|
||||
TESTL CX, CX
|
||||
JE finalize // no partial block
|
||||
JE finalize // 0 bytes
|
||||
|
||||
TESTL $16, AX
|
||||
JNE highpartial
|
||||
CMPB AX, $0xf0
|
||||
JA highpartial
|
||||
|
||||
// address ends in 0xxxx. 16 bytes loaded
|
||||
// at this address won't cross a page boundary, so
|
||||
// we can load it directly.
|
||||
// 16 bytes loaded at this address won't cross
|
||||
// a page boundary, so we can load it directly.
|
||||
MOVOU (AX), X1
|
||||
ADDL CX, CX
|
||||
PAND masks(SB)(CX*8), X1
|
||||
JMP partial
|
||||
highpartial:
|
||||
// address ends in 1xxxx. Might be up against
|
||||
// address ends in 1111xxxx. Might be up against
|
||||
// a page boundary, so load ending at last byte.
|
||||
// Then shift bytes down using pshufb.
|
||||
MOVOU -16(AX)(CX*1), X1
|
||||
|
@ -772,31 +772,39 @@ TEXT runtime·aeshashbody(SB),7,$0
|
||||
PINSRQ $1, CX, X0 // size to high 64 bits of xmm0
|
||||
MOVO runtime·aeskeysched+0(SB), X2
|
||||
MOVO runtime·aeskeysched+16(SB), X3
|
||||
CMPQ CX, $16
|
||||
JB aessmall
|
||||
aesloop:
|
||||
CMPQ CX, $16
|
||||
JB aesloopend
|
||||
JBE aesloopend
|
||||
MOVOU (AX), X1
|
||||
AESENC X2, X0
|
||||
AESENC X1, X0
|
||||
SUBQ $16, CX
|
||||
ADDQ $16, AX
|
||||
JMP aesloop
|
||||
// 1-16 bytes remaining
|
||||
aesloopend:
|
||||
// This load may overlap with the previous load above.
|
||||
// We'll hash some bytes twice, but that's ok.
|
||||
MOVOU -16(AX)(CX*1), X1
|
||||
JMP partial
|
||||
// 0-15 bytes
|
||||
aessmall:
|
||||
TESTQ CX, CX
|
||||
JE finalize // no partial block
|
||||
JE finalize // 0 bytes
|
||||
|
||||
TESTQ $16, AX
|
||||
JNE highpartial
|
||||
CMPB AX, $0xf0
|
||||
JA highpartial
|
||||
|
||||
// address ends in 0xxxx. 16 bytes loaded
|
||||
// at this address won't cross a page boundary, so
|
||||
// we can load it directly.
|
||||
// 16 bytes loaded at this address won't cross
|
||||
// a page boundary, so we can load it directly.
|
||||
MOVOU (AX), X1
|
||||
ADDQ CX, CX
|
||||
PAND masks(SB)(CX*8), X1
|
||||
JMP partial
|
||||
highpartial:
|
||||
// address ends in 1xxxx. Might be up against
|
||||
// address ends in 1111xxxx. Might be up against
|
||||
// a page boundary, so load ending at last byte.
|
||||
// Then shift bytes down using pshufb.
|
||||
MOVOU -16(AX)(CX*1), X1
|
||||
|
@ -32,6 +32,33 @@ func BenchmarkHashStringSpeed(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
type chunk [17]byte
|
||||
|
||||
func BenchmarkHashBytesSpeed(b *testing.B) {
|
||||
// a bunch of chunks, each with a different alignment mod 16
|
||||
var chunks [size]chunk
|
||||
// initialize each to a different value
|
||||
for i := 0; i < size; i++ {
|
||||
chunks[i][0] = byte(i)
|
||||
}
|
||||
// put into a map
|
||||
m := make(map[chunk]int, size)
|
||||
for i, c := range chunks {
|
||||
m[c] = i
|
||||
}
|
||||
idx := 0
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if m[chunks[idx]] != idx {
|
||||
b.Error("bad map entry for chunk")
|
||||
}
|
||||
idx++
|
||||
if idx == size {
|
||||
idx = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHashInt32Speed(b *testing.B) {
|
||||
ints := make([]int32, size)
|
||||
for i := 0; i < size; i++ {
|
||||
|
Loading…
Reference in New Issue
Block a user