mirror of
https://github.com/golang/go
synced 2024-11-23 06:20:07 -07:00
crypto/cipher: add VSX implementation of xorBytes for ppc64x
This change adds asm implementations of xorBytes for ppc64x that takes advantage of VSX registers and instructions. name old time/op new time/op delta XORBytes/8Bytes-8 16.4ns ± 0% 11.1ns ± 0% -32.32% (p=0.000 n=5+4) XORBytes/128Bytes-8 45.6ns ± 0% 16.2ns ± 0% -64.50% (p=0.008 n=5+5) XORBytes/2048Bytes-8 433ns ±13% 129ns ± 1% -70.29% (p=0.000 n=5+4) XORBytes/32768Bytes-8 7.16µs ± 0% 1.83µs ± 0% -74.39% (p=0.008 n=5+5) name old speed new speed delta XORBytes/8Bytes-8 488MB/s ± 0% 721MB/s ± 0% +47.75% (p=0.016 n=5+4) XORBytes/128Bytes-8 2.80GB/s ± 0% 7.89GB/s ± 0% +181.33% (p=0.008 n=5+5) XORBytes/2048Bytes-8 4.77GB/s ±13% 15.87GB/s ± 0% +232.68% (p=0.016 n=5+4) XORBytes/32768Bytes-8 4.58GB/s ± 0% 17.88GB/s ± 0% +290.47% (p=0.008 n=5+5) Change-Id: Ic27d9b858f8ec2d597fdabc68a288d6844eba701 Reviewed-on: https://go-review.googlesource.com/c/145997 Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
f5b695030b
commit
0ff6e5f1b4
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64
|
||||
// +build !amd64,!ppc64,!ppc64le
|
||||
|
||||
package cipher
|
||||
|
||||
|
29
src/crypto/cipher/xor_ppc64x.go
Normal file
29
src/crypto/cipher/xor_ppc64x.go
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
package cipher
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination should have enough
|
||||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
_ = dst[n-1]
|
||||
xorBytesVSX(&dst[0], &a[0], &b[0], n)
|
||||
return n
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
xorBytes(dst, a, b)
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorBytesVSX(dst, a, b *byte, n int)
|
66
src/crypto/cipher/xor_ppc64x.s
Normal file
66
src/crypto/cipher/xor_ppc64x.s
Normal file
@ -0,0 +1,66 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytesVSX(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytesVSX(SB), NOSPLIT, $0
|
||||
MOVD dst+0(FP), R3 // R3 = dst
|
||||
MOVD a+8(FP), R4 // R4 = a
|
||||
MOVD b+16(FP), R5 // R5 = b
|
||||
MOVD n+24(FP), R6 // R6 = n
|
||||
|
||||
CMPU R6, $16, CR7 // Check if n ≥ 16 bytes
|
||||
MOVD R0, R8 // R8 = index
|
||||
CMPU R6, $8, CR6 // Check if 8 ≤ n < 16 bytes
|
||||
BGE CR7, preloop16
|
||||
BLT CR6, small
|
||||
|
||||
// Case for 8 ≤ n < 16 bytes
|
||||
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
|
||||
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
|
||||
XOR R14, R15, R16 // R16 = a[] ^ b[]
|
||||
SUB $8, R6 // n = n - 8
|
||||
MOVD R16, (R3)(R8) // Store to dst
|
||||
ADD $8, R8
|
||||
|
||||
// Check if we're finished
|
||||
CMP R6, R0
|
||||
BGT small
|
||||
JMP done
|
||||
|
||||
// Case for n ≥ 16 bytes
|
||||
preloop16:
|
||||
SRD $4, R6, R7 // Setup loop counter
|
||||
MOVD R7, CTR
|
||||
ANDCC $15, R6, R9 // Check for tailing bytes for later
|
||||
loop16:
|
||||
LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15]
|
||||
LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15]
|
||||
XXLXOR VS32, VS33, VS34 // VS34 = a[] ^ b[]
|
||||
STXVD2X VS34, (R3)(R8) // Store to dst
|
||||
ADD $16, R8 // Update index
|
||||
BC 16, 0, loop16 // bdnz loop16
|
||||
|
||||
BEQ CR0, done
|
||||
SLD $4, R7
|
||||
SUB R7, R6 // R6 = n - (R7 * 16)
|
||||
|
||||
// Case for n < 8 bytes and tailing bytes from the
|
||||
// previous cases.
|
||||
small:
|
||||
MOVD R6, CTR // Setup loop counter
|
||||
|
||||
loop:
|
||||
MOVBZ (R4)(R8), R14 // R14 = a[i]
|
||||
MOVBZ (R5)(R8), R15 // R15 = b[i]
|
||||
XOR R14, R15, R16 // R16 = a[i] ^ b[i]
|
||||
MOVB R16, (R3)(R8) // Store to dst
|
||||
ADD $1, R8
|
||||
BC 16, 0, loop // bdnz loop
|
||||
|
||||
done:
|
||||
RET
|
Loading…
Reference in New Issue
Block a user