mirror of
https://github.com/golang/go
synced 2024-11-17 09:54:46 -07:00
crypto/aes: On ppc64le, use better instructions when available
Several operations emulate instructions available on power9. Use the GOPPC64_power9 macro provided by the compiler to select the native instructions if the minimum cpu requirements are met. Likewise rework the LXSDX_BE to simplify usage when overriding it. It is only used in one place. All three configurations are tested via CI. On POWER9: pkg:crypto/cipher goos:linux goarch:ppc64le AESCBCEncrypt1K 949MB/s ± 0% 957MB/s ± 0% +0.83% AESCBCDecrypt1K 1.82GB/s ± 0% 1.99GB/s ± 0% +8.93% pkg:crypto/aes goos:linux goarch:ppc64le Encrypt 1.01GB/s ± 0% 1.05GB/s ± 0% +4.36% Decrypt 987MB/s ± 0% 1024MB/s ± 0% +3.77% Change-Id: I56d0eb845647dd3c43bcad71eb281b499e1d1789 Reviewed-on: https://go-review.googlesource.com/c/go/+/449116 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Auto-Submit: Paul Murphy <murp@ibm.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Joedian Reid <joedian@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Run-TryBot: Paul Murphy <murp@ibm.com>
This commit is contained in:
parent
cf93b25366
commit
8614c525b3
@ -48,7 +48,7 @@
|
||||
|
||||
// For P9 instruction emulation
|
||||
#define ESPERM V21 // Endian swapping permute into BE
|
||||
#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV
|
||||
#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X
|
||||
|
||||
// For {en,de}cryptBlockAsm
|
||||
#define BLK_INP R3
|
||||
@ -69,8 +69,15 @@ DATA ·rcon+0x40(SB)/8, $0x0000000000000000
|
||||
DATA ·rcon+0x48(SB)/8, $0x0000000000000000
|
||||
GLOBL ·rcon(SB), RODATA, $80
|
||||
|
||||
// Emulate unaligned BE vector load/stores on LE targets
|
||||
#ifdef GOARCH_ppc64le
|
||||
# ifdef GOPPC64_power9
|
||||
#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
|
||||
#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
|
||||
#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT
|
||||
# else
|
||||
// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
|
||||
// doublewords and byte-swapping each doubleword to emulate BE load/stores.
|
||||
#define NEEDS_ESPERM
|
||||
#define P8_LXVB16X(RA,RB,VT) \
|
||||
LXVD2X (RA+RB), VT \
|
||||
VPERM VT, VT, ESPERM, VT
|
||||
@ -79,19 +86,15 @@ GLOBL ·rcon(SB), RODATA, $80
|
||||
VPERM VS, VS, ESPERM, TMP2 \
|
||||
STXVD2X TMP2, (RA+RB)
|
||||
|
||||
#define LXSDX_BE(RA,RB,VT) \
|
||||
LXSDX (RA+RB), VT \
|
||||
VPERM VT, VT, ESPERM, VT
|
||||
#define XXBRD_ON_LE(VA,VT) \
|
||||
VPERM VA, VA, ESPERM, VT
|
||||
|
||||
# endif // defined(GOPPC64_power9)
|
||||
#else
|
||||
#define P8_LXVB16X(RA,RB,VT) \
|
||||
LXVD2X (RA+RB), VT
|
||||
|
||||
#define P8_STXVB16X(VS,RA,RB) \
|
||||
STXVD2X VS, (RA+RB)
|
||||
|
||||
#define LXSDX_BE(RA,RB,VT) \
|
||||
LXSDX (RA+RB), VT
|
||||
#endif
|
||||
#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT
|
||||
#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB)
|
||||
#define XXBRD_ON_LE(VA, VT)
|
||||
#endif // defined(GOARCH_ppc64le)
|
||||
|
||||
// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
|
||||
TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
|
||||
@ -101,7 +104,7 @@ TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVD enc+16(FP), OUTENC
|
||||
MOVD dec+24(FP), OUTDEC
|
||||
|
||||
#ifdef GOARCH_ppc64le
|
||||
#ifdef NEEDS_ESPERM
|
||||
MOVD $·rcon(SB), PTR // PTR points to rcon addr
|
||||
LVX (PTR), ESPERM
|
||||
ADD $0x10, PTR
|
||||
@ -191,7 +194,8 @@ loop128:
|
||||
RET
|
||||
|
||||
l192:
|
||||
LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order.
|
||||
LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR.
|
||||
XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts.
|
||||
MOVD $4, CNT // li 7,4
|
||||
STXVD2X IN0, (R0+OUTENC)
|
||||
STXVD2X IN0, (R0+OUTDEC)
|
||||
@ -309,7 +313,7 @@ TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVD xk+8(FP), R5 // Key pointer
|
||||
MOVD dst+16(FP), R3 // Dest pointer
|
||||
MOVD src+24(FP), R4 // Src pointer
|
||||
#ifdef GOARCH_ppc64le
|
||||
#ifdef NEEDS_ESPERM
|
||||
MOVD $·rcon(SB), R7
|
||||
LVX (R7), ESPERM // Permute value for P8_ macros.
|
||||
#endif
|
||||
@ -404,7 +408,7 @@ TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVD xk+8(FP), R5 // Key pointer
|
||||
MOVD dst+16(FP), R3 // Dest pointer
|
||||
MOVD src+24(FP), R4 // Src pointer
|
||||
#ifdef GOARCH_ppc64le
|
||||
#ifdef NEEDS_ESPERM
|
||||
MOVD $·rcon(SB), R7
|
||||
LVX (R7), ESPERM // Permute value for P8_ macros.
|
||||
#endif
|
||||
@ -622,7 +626,7 @@ TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVD enc+40(FP), ENC
|
||||
MOVD nr+48(FP), ROUNDS
|
||||
|
||||
#ifdef GOARCH_ppc64le
|
||||
#ifdef NEEDS_ESPERM
|
||||
MOVD $·rcon(SB), R11
|
||||
LVX (R11), ESPERM // Permute value for P8_ macros.
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user