mirror of
https://github.com/golang/go
synced 2024-11-23 06:40:05 -07:00
hash/crc32: fix optimized s390x implementation
The code wasn't checking to see if the data was still >= 64 bytes long after aligning it. Aligning the data is an optimization and we don't actually need to do it. In fact for smaller sizes it slows things down due to the overhead of calling the generic function. Therefore for now I have simply removed the alignment stage. I have also added a check into the assembly to deliberately trigger a segmentation fault if the data is too short. Fixes #16779. Change-Id: Ic01636d775efc5ec97689f050991cee04ce8fe73 Reviewed-on: https://go-review.googlesource.com/27409 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
94d9cc7741
commit
4b17b152a3
@ -4,14 +4,9 @@
|
|||||||
|
|
||||||
package crc32
|
package crc32
|
||||||
|
|
||||||
import (
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
vxMinLen = 64
|
vxMinLen = 64
|
||||||
vxAlignment = 16
|
vxAlignMask = 15 // align to 16 bytes
|
||||||
vxAlignMask = vxAlignment - 1
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// hasVectorFacility reports whether the machine has the z/Architecture
|
// hasVectorFacility reports whether the machine has the z/Architecture
|
||||||
@ -49,20 +44,13 @@ func genericIEEE(crc uint32, p []byte) uint32 {
|
|||||||
return update(crc, IEEETable, p)
|
return update(crc, IEEETable, p)
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateCastagnoli calculates the checksum of p using genericCastagnoli to
|
// updateCastagnoli calculates the checksum of p using
|
||||||
// align the data appropriately for vectorCastagnoli. It avoids using
|
// vectorizedCastagnoli if possible and falling back onto
|
||||||
// vectorCastagnoli entirely if the length of p is less than or equal to
|
// genericCastagnoli as needed.
|
||||||
// vxMinLen.
|
|
||||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||||
// Use vectorized function if vector facility is available and
|
// Use vectorized function if vector facility is available and
|
||||||
// data length is above threshold.
|
// data length is above threshold.
|
||||||
if hasVX && len(p) > vxMinLen {
|
if hasVX && len(p) >= vxMinLen {
|
||||||
pAddr := uintptr(unsafe.Pointer(&p[0]))
|
|
||||||
if pAddr&vxAlignMask != 0 {
|
|
||||||
prealign := vxAlignment - int(pAddr&vxAlignMask)
|
|
||||||
crc = genericCastagnoli(crc, p[:prealign])
|
|
||||||
p = p[prealign:]
|
|
||||||
}
|
|
||||||
aligned := len(p) & ^vxAlignMask
|
aligned := len(p) & ^vxAlignMask
|
||||||
crc = vectorizedCastagnoli(crc, p[:aligned])
|
crc = vectorizedCastagnoli(crc, p[:aligned])
|
||||||
p = p[aligned:]
|
p = p[aligned:]
|
||||||
@ -75,19 +63,12 @@ func updateCastagnoli(crc uint32, p []byte) uint32 {
|
|||||||
return genericCastagnoli(crc, p)
|
return genericCastagnoli(crc, p)
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateIEEE calculates the checksum of p using genericIEEE to align the data
|
// updateIEEE calculates the checksum of p using vectorizedIEEE if
|
||||||
// appropriately for vectorIEEE. It avoids using vectorIEEE entirely if the length
|
// possible and falling back onto genericIEEE as needed.
|
||||||
// of p is less than or equal to vxMinLen.
|
|
||||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||||
// Use vectorized function if vector facility is available and
|
// Use vectorized function if vector facility is available and
|
||||||
// data length is above threshold.
|
// data length is above threshold.
|
||||||
if hasVX && len(p) > vxMinLen {
|
if hasVX && len(p) >= vxMinLen {
|
||||||
pAddr := uintptr(unsafe.Pointer(&p[0]))
|
|
||||||
if pAddr&vxAlignMask != 0 {
|
|
||||||
prealign := vxAlignment - int(pAddr&vxAlignMask)
|
|
||||||
crc = genericIEEE(crc, p[:prealign])
|
|
||||||
p = p[prealign:]
|
|
||||||
}
|
|
||||||
aligned := len(p) & ^vxAlignMask
|
aligned := len(p) & ^vxAlignMask
|
||||||
crc = vectorizedIEEE(crc, p[:aligned])
|
crc = vectorizedIEEE(crc, p[:aligned])
|
||||||
p = p[aligned:]
|
p = p[aligned:]
|
||||||
|
@ -128,6 +128,10 @@ TEXT vectorizedBody<>(SB),NOSPLIT,$0
|
|||||||
VZERO V0
|
VZERO V0
|
||||||
VLVGF $3, R2, V0
|
VLVGF $3, R2, V0
|
||||||
|
|
||||||
|
// Crash if the input size is less than 64-bytes.
|
||||||
|
CMP R4, $64
|
||||||
|
BLT crash
|
||||||
|
|
||||||
// Load a 64-byte data chunk and XOR with CRC
|
// Load a 64-byte data chunk and XOR with CRC
|
||||||
VLM 0(R3), V1, V4 // 64-bytes into V1..V4
|
VLM 0(R3), V1, V4 // 64-bytes into V1..V4
|
||||||
|
|
||||||
@ -243,3 +247,6 @@ done:
|
|||||||
XOR $0xffffffff, R2 // NOTW R2
|
XOR $0xffffffff, R2 // NOTW R2
|
||||||
MOVWZ R2, ret + 32(FP)
|
MOVWZ R2, ret + 32(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
crash:
|
||||||
|
MOVD $0, (R0) // input size is less than 64-bytes
|
||||||
|
Loading…
Reference in New Issue
Block a user