mirror of
https://github.com/golang/go
synced 2024-11-12 07:40:23 -07:00
runtime: Optimize aeshash a bit. Use a better predicted branch
for checking for page boundary. Also avoid boundary check when >=16 bytes are hashed. benchmark old ns/op new ns/op delta BenchmarkHashStringSpeed 23 22 -0.43% BenchmarkHashBytesSpeed 44 42 -3.61% BenchmarkHashStringArraySpeed 71 68 -4.05% R=iant, khr CC=gobot, golang-dev, google https://golang.org/cl/9123046
This commit is contained in:
parent
23ad563119
commit
ee66972dce
@ -755,31 +755,39 @@ TEXT runtime·aeshashbody(SB),7,$0
|
|||||||
PINSRD $1, CX, X0 // size to next 32 bits of xmm0
|
PINSRD $1, CX, X0 // size to next 32 bits of xmm0
|
||||||
MOVO runtime·aeskeysched+0(SB), X2
|
MOVO runtime·aeskeysched+0(SB), X2
|
||||||
MOVO runtime·aeskeysched+16(SB), X3
|
MOVO runtime·aeskeysched+16(SB), X3
|
||||||
|
CMPL CX, $16
|
||||||
|
JB aessmall
|
||||||
aesloop:
|
aesloop:
|
||||||
CMPL CX, $16
|
CMPL CX, $16
|
||||||
JB aesloopend
|
JBE aesloopend
|
||||||
MOVOU (AX), X1
|
MOVOU (AX), X1
|
||||||
AESENC X2, X0
|
AESENC X2, X0
|
||||||
AESENC X1, X0
|
AESENC X1, X0
|
||||||
SUBL $16, CX
|
SUBL $16, CX
|
||||||
ADDL $16, AX
|
ADDL $16, AX
|
||||||
JMP aesloop
|
JMP aesloop
|
||||||
|
// 1-16 bytes remaining
|
||||||
aesloopend:
|
aesloopend:
|
||||||
|
// This load may overlap with the previous load above.
|
||||||
|
// We'll hash some bytes twice, but that's ok.
|
||||||
|
MOVOU -16(AX)(CX*1), X1
|
||||||
|
JMP partial
|
||||||
|
// 0-15 bytes
|
||||||
|
aessmall:
|
||||||
TESTL CX, CX
|
TESTL CX, CX
|
||||||
JE finalize // no partial block
|
JE finalize // 0 bytes
|
||||||
|
|
||||||
TESTL $16, AX
|
CMPB AX, $0xf0
|
||||||
JNE highpartial
|
JA highpartial
|
||||||
|
|
||||||
// address ends in 0xxxx. 16 bytes loaded
|
// 16 bytes loaded at this address won't cross
|
||||||
// at this address won't cross a page boundary, so
|
// a page boundary, so we can load it directly.
|
||||||
// we can load it directly.
|
|
||||||
MOVOU (AX), X1
|
MOVOU (AX), X1
|
||||||
ADDL CX, CX
|
ADDL CX, CX
|
||||||
PAND masks(SB)(CX*8), X1
|
PAND masks(SB)(CX*8), X1
|
||||||
JMP partial
|
JMP partial
|
||||||
highpartial:
|
highpartial:
|
||||||
// address ends in 1xxxx. Might be up against
|
// address ends in 1111xxxx. Might be up against
|
||||||
// a page boundary, so load ending at last byte.
|
// a page boundary, so load ending at last byte.
|
||||||
// Then shift bytes down using pshufb.
|
// Then shift bytes down using pshufb.
|
||||||
MOVOU -16(AX)(CX*1), X1
|
MOVOU -16(AX)(CX*1), X1
|
||||||
|
@ -772,31 +772,39 @@ TEXT runtime·aeshashbody(SB),7,$0
|
|||||||
PINSRQ $1, CX, X0 // size to high 64 bits of xmm0
|
PINSRQ $1, CX, X0 // size to high 64 bits of xmm0
|
||||||
MOVO runtime·aeskeysched+0(SB), X2
|
MOVO runtime·aeskeysched+0(SB), X2
|
||||||
MOVO runtime·aeskeysched+16(SB), X3
|
MOVO runtime·aeskeysched+16(SB), X3
|
||||||
|
CMPQ CX, $16
|
||||||
|
JB aessmall
|
||||||
aesloop:
|
aesloop:
|
||||||
CMPQ CX, $16
|
CMPQ CX, $16
|
||||||
JB aesloopend
|
JBE aesloopend
|
||||||
MOVOU (AX), X1
|
MOVOU (AX), X1
|
||||||
AESENC X2, X0
|
AESENC X2, X0
|
||||||
AESENC X1, X0
|
AESENC X1, X0
|
||||||
SUBQ $16, CX
|
SUBQ $16, CX
|
||||||
ADDQ $16, AX
|
ADDQ $16, AX
|
||||||
JMP aesloop
|
JMP aesloop
|
||||||
|
// 1-16 bytes remaining
|
||||||
aesloopend:
|
aesloopend:
|
||||||
|
// This load may overlap with the previous load above.
|
||||||
|
// We'll hash some bytes twice, but that's ok.
|
||||||
|
MOVOU -16(AX)(CX*1), X1
|
||||||
|
JMP partial
|
||||||
|
// 0-15 bytes
|
||||||
|
aessmall:
|
||||||
TESTQ CX, CX
|
TESTQ CX, CX
|
||||||
JE finalize // no partial block
|
JE finalize // 0 bytes
|
||||||
|
|
||||||
TESTQ $16, AX
|
CMPB AX, $0xf0
|
||||||
JNE highpartial
|
JA highpartial
|
||||||
|
|
||||||
// address ends in 0xxxx. 16 bytes loaded
|
// 16 bytes loaded at this address won't cross
|
||||||
// at this address won't cross a page boundary, so
|
// a page boundary, so we can load it directly.
|
||||||
// we can load it directly.
|
|
||||||
MOVOU (AX), X1
|
MOVOU (AX), X1
|
||||||
ADDQ CX, CX
|
ADDQ CX, CX
|
||||||
PAND masks(SB)(CX*8), X1
|
PAND masks(SB)(CX*8), X1
|
||||||
JMP partial
|
JMP partial
|
||||||
highpartial:
|
highpartial:
|
||||||
// address ends in 1xxxx. Might be up against
|
// address ends in 1111xxxx. Might be up against
|
||||||
// a page boundary, so load ending at last byte.
|
// a page boundary, so load ending at last byte.
|
||||||
// Then shift bytes down using pshufb.
|
// Then shift bytes down using pshufb.
|
||||||
MOVOU -16(AX)(CX*1), X1
|
MOVOU -16(AX)(CX*1), X1
|
||||||
|
@ -32,6 +32,33 @@ func BenchmarkHashStringSpeed(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type chunk [17]byte
|
||||||
|
|
||||||
|
func BenchmarkHashBytesSpeed(b *testing.B) {
|
||||||
|
// a bunch of chunks, each with a different alignment mod 16
|
||||||
|
var chunks [size]chunk
|
||||||
|
// initialize each to a different value
|
||||||
|
for i := 0; i < size; i++ {
|
||||||
|
chunks[i][0] = byte(i)
|
||||||
|
}
|
||||||
|
// put into a map
|
||||||
|
m := make(map[chunk]int, size)
|
||||||
|
for i, c := range chunks {
|
||||||
|
m[c] = i
|
||||||
|
}
|
||||||
|
idx := 0
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
if m[chunks[idx]] != idx {
|
||||||
|
b.Error("bad map entry for chunk")
|
||||||
|
}
|
||||||
|
idx++
|
||||||
|
if idx == size {
|
||||||
|
idx = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkHashInt32Speed(b *testing.B) {
|
func BenchmarkHashInt32Speed(b *testing.B) {
|
||||||
ints := make([]int32, size)
|
ints := make([]int32, size)
|
||||||
for i := 0; i < size; i++ {
|
for i := 0; i < size; i++ {
|
||||||
|
Loading…
Reference in New Issue
Block a user