mirror of
https://github.com/golang/go
synced 2024-11-23 11:20:05 -07:00
crypto/cipher: speed up xor operations in CBC, CFB, OBF, CTR
and GCM on 386 and amd64 Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz: benchmark old MB/s new MB/s speedup BenchmarkAESGCMSeal1K 82.39 92.05 1.12x BenchmarkAESGCMOpen1K 82.28 91.88 1.12x BenchmarkAESCFBEncrypt1K 141.54 277.59 1.96x BenchmarkAESCFBDecrypt1K 133.06 278.07 2.09x BenchmarkAESOFB1K 160.51 380.24 2.37x BenchmarkAESCTR1K 164.07 429.25 2.62x BenchmarkAESCBCEncrypt1K 170.99 263.74 1.54x BenchmarkAESCBCDecrypt1K 124.96 249.14 1.99x Fixes #6741. R=agl, dave, agl CC=golang-dev https://golang.org/cl/24250044
This commit is contained in:
parent
426b48a775
commit
5744be9fe4
139
src/pkg/crypto/cipher/benchmark_test.go
Normal file
139
src/pkg/crypto/cipher/benchmark_test.go
Normal file
@ -0,0 +1,139 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cipher_test
|
||||
|
||||
import (
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkAESGCMSeal1K(b *testing.B) {
|
||||
buf := make([]byte, 1024)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var nonce [12]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
aesgcm, _ := cipher.NewGCM(aes)
|
||||
var out []byte
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
out = aesgcm.Seal(out[:0], nonce[:], buf, nonce[:])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESGCMOpen1K(b *testing.B) {
|
||||
buf := make([]byte, 1024)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var nonce [12]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
aesgcm, _ := cipher.NewGCM(aes)
|
||||
var out []byte
|
||||
out = aesgcm.Seal(out[:0], nonce[:], buf, nonce[:])
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := aesgcm.Open(buf[:0], nonce[:], out, nonce[:])
|
||||
if err != nil {
|
||||
b.Errorf("Open: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we test exactly 1K blocks, we would generate exact multiples of
|
||||
// the cipher's block size, and and the cipher stream fragments would
|
||||
// always be wordsize aligned, whereas non-aligned is a more typical
|
||||
// use-case.
|
||||
const almost1K = 1024 - 5
|
||||
|
||||
func BenchmarkAESCFBEncrypt1K(b *testing.B) {
|
||||
buf := make([]byte, almost1K)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var iv [16]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
ctr := cipher.NewCFBEncrypter(aes, iv[:])
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
ctr.XORKeyStream(buf, buf)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESCFBDecrypt1K(b *testing.B) {
|
||||
buf := make([]byte, almost1K)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var iv [16]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
ctr := cipher.NewCFBDecrypter(aes, iv[:])
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
ctr.XORKeyStream(buf, buf)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESOFB1K(b *testing.B) {
|
||||
buf := make([]byte, almost1K)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var iv [16]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
ctr := cipher.NewOFB(aes, iv[:])
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
ctr.XORKeyStream(buf, buf)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESCTR1K(b *testing.B) {
|
||||
buf := make([]byte, almost1K)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var iv [16]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
ctr := cipher.NewCTR(aes, iv[:])
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
ctr.XORKeyStream(buf, buf)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESCBCEncrypt1K(b *testing.B) {
|
||||
buf := make([]byte, 1024)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var iv [16]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
cbc := cipher.NewCBCEncrypter(aes, iv[:])
|
||||
for i := 0; i < b.N; i++ {
|
||||
cbc.CryptBlocks(buf, buf)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESCBCDecrypt1K(b *testing.B) {
|
||||
buf := make([]byte, 1024)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var iv [16]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
cbc := cipher.NewCBCDecrypter(aes, iv[:])
|
||||
for i := 0; i < b.N; i++ {
|
||||
cbc.CryptBlocks(buf, buf)
|
||||
}
|
||||
}
|
@ -49,13 +49,9 @@ func (x *cbcEncrypter) CryptBlocks(dst, src []byte) {
|
||||
panic("crypto/cipher: output smaller than input")
|
||||
}
|
||||
for len(src) > 0 {
|
||||
for i := 0; i < x.blockSize; i++ {
|
||||
x.iv[i] ^= src[i]
|
||||
}
|
||||
xorBytes(x.iv, x.iv, src[:x.blockSize])
|
||||
x.b.Encrypt(x.iv, x.iv)
|
||||
for i := 0; i < x.blockSize; i++ {
|
||||
dst[i] = x.iv[i]
|
||||
}
|
||||
copy(dst, x.iv)
|
||||
src = src[x.blockSize:]
|
||||
dst = dst[x.blockSize:]
|
||||
}
|
||||
@ -91,12 +87,9 @@ func (x *cbcDecrypter) CryptBlocks(dst, src []byte) {
|
||||
}
|
||||
for len(src) > 0 {
|
||||
x.b.Decrypt(x.tmp, src[:x.blockSize])
|
||||
for i := 0; i < x.blockSize; i++ {
|
||||
x.tmp[i] ^= x.iv[i]
|
||||
x.iv[i] = src[i]
|
||||
dst[i] = x.tmp[i]
|
||||
}
|
||||
|
||||
xorBytes(x.tmp, x.tmp, x.iv)
|
||||
copy(x.iv, src)
|
||||
copy(dst, x.tmp)
|
||||
src = src[x.blockSize:]
|
||||
dst = dst[x.blockSize:]
|
||||
}
|
||||
|
@ -8,18 +8,40 @@ package cipher
|
||||
|
||||
type cfb struct {
|
||||
b Block
|
||||
next []byte
|
||||
out []byte
|
||||
outUsed int
|
||||
|
||||
decrypt bool
|
||||
}
|
||||
|
||||
func (x *cfb) XORKeyStream(dst, src []byte) {
|
||||
for i := 0; i < len(src); i++ {
|
||||
if x.outUsed == len(x.out) {
|
||||
x.b.Encrypt(x.out, x.next)
|
||||
x.outUsed = 0
|
||||
}
|
||||
|
||||
n := xorBytes(dst, src, x.out[x.outUsed:])
|
||||
if x.decrypt {
|
||||
// We can precompute a larger segment of the
|
||||
// keystream on decryption. This will allow
|
||||
// larger batches for xor, and we should be
|
||||
// able to match CTR/OFB performance.
|
||||
copy(x.next[x.outUsed:], src[:n])
|
||||
} else {
|
||||
copy(x.next[x.outUsed:], dst[:n])
|
||||
}
|
||||
dst = dst[n:]
|
||||
src = src[n:]
|
||||
x.outUsed += n
|
||||
}
|
||||
}
|
||||
|
||||
// NewCFBEncrypter returns a Stream which encrypts with cipher feedback mode,
|
||||
// using the given Block. The iv must be the same length as the Block's block
|
||||
// size.
|
||||
func NewCFBEncrypter(block Block, iv []byte) Stream {
|
||||
if len(iv) != block.BlockSize() {
|
||||
panic("cipher.NewCBFEncrypter: IV length must equal block size")
|
||||
}
|
||||
return newCFB(block, iv, false)
|
||||
}
|
||||
|
||||
@ -27,44 +49,23 @@ func NewCFBEncrypter(block Block, iv []byte) Stream {
|
||||
// using the given Block. The iv must be the same length as the Block's block
|
||||
// size.
|
||||
func NewCFBDecrypter(block Block, iv []byte) Stream {
|
||||
if len(iv) != block.BlockSize() {
|
||||
panic("cipher.NewCBFEncrypter: IV length must equal block size")
|
||||
}
|
||||
return newCFB(block, iv, true)
|
||||
}
|
||||
|
||||
func newCFB(block Block, iv []byte, decrypt bool) Stream {
|
||||
blockSize := block.BlockSize()
|
||||
if len(iv) != blockSize {
|
||||
return nil
|
||||
// stack trace will indicate whether it was de or encryption
|
||||
panic("cipher.newCFB: IV length must equal block size")
|
||||
}
|
||||
|
||||
x := &cfb{
|
||||
b: block,
|
||||
out: make([]byte, blockSize),
|
||||
outUsed: 0,
|
||||
next: make([]byte, blockSize),
|
||||
outUsed: blockSize,
|
||||
decrypt: decrypt,
|
||||
}
|
||||
block.Encrypt(x.out, iv)
|
||||
copy(x.next, iv)
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
func (x *cfb) XORKeyStream(dst, src []byte) {
|
||||
for i := 0; i < len(src); i++ {
|
||||
if x.outUsed == len(x.out) {
|
||||
x.b.Encrypt(x.out, x.out)
|
||||
x.outUsed = 0
|
||||
}
|
||||
|
||||
if x.decrypt {
|
||||
t := src[i]
|
||||
dst[i] = src[i] ^ x.out[x.outUsed]
|
||||
x.out[x.outUsed] = t
|
||||
} else {
|
||||
x.out[x.outUsed] ^= src[i]
|
||||
dst[i] = x.out[x.outUsed]
|
||||
}
|
||||
x.outUsed++
|
||||
}
|
||||
}
|
||||
|
@ -19,37 +19,58 @@ type ctr struct {
|
||||
outUsed int
|
||||
}
|
||||
|
||||
const streamBufferSize = 512
|
||||
|
||||
// NewCTR returns a Stream which encrypts/decrypts using the given Block in
|
||||
// counter mode. The length of iv must be the same as the Block's block size.
|
||||
func NewCTR(block Block, iv []byte) Stream {
|
||||
if len(iv) != block.BlockSize() {
|
||||
panic("cipher.NewCTR: IV length must equal block size")
|
||||
}
|
||||
|
||||
bufSize := streamBufferSize
|
||||
if bufSize < block.BlockSize() {
|
||||
bufSize = block.BlockSize()
|
||||
}
|
||||
return &ctr{
|
||||
b: block,
|
||||
ctr: dup(iv),
|
||||
out: make([]byte, len(iv)),
|
||||
outUsed: len(iv),
|
||||
out: make([]byte, 0, bufSize),
|
||||
outUsed: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func (x *ctr) refill() {
|
||||
remain := len(x.out) - x.outUsed
|
||||
if remain > x.outUsed {
|
||||
return
|
||||
}
|
||||
copy(x.out, x.out[x.outUsed:])
|
||||
x.out = x.out[:cap(x.out)]
|
||||
bs := x.b.BlockSize()
|
||||
for remain < len(x.out)-bs {
|
||||
x.b.Encrypt(x.out[remain:], x.ctr)
|
||||
remain += bs
|
||||
|
||||
// Increment counter
|
||||
for i := len(x.ctr) - 1; i >= 0; i-- {
|
||||
x.ctr[i]++
|
||||
if x.ctr[i] != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
x.out = x.out[:remain]
|
||||
x.outUsed = 0
|
||||
}
|
||||
|
||||
func (x *ctr) XORKeyStream(dst, src []byte) {
|
||||
for i := 0; i < len(src); i++ {
|
||||
if x.outUsed == len(x.ctr) {
|
||||
x.b.Encrypt(x.out, x.ctr)
|
||||
x.outUsed = 0
|
||||
|
||||
// Increment counter
|
||||
for i := len(x.ctr) - 1; i >= 0; i-- {
|
||||
x.ctr[i]++
|
||||
if x.ctr[i] != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for len(src) > 0 {
|
||||
if x.outUsed >= len(x.out)-x.b.BlockSize() {
|
||||
x.refill()
|
||||
}
|
||||
|
||||
dst[i] = src[i] ^ x.out[x.outUsed]
|
||||
x.outUsed++
|
||||
n := xorBytes(dst, src, x.out[x.outUsed:])
|
||||
dst = dst[n:]
|
||||
src = src[n:]
|
||||
x.outUsed += n
|
||||
}
|
||||
}
|
||||
|
@ -289,9 +289,7 @@ func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) {
|
||||
g.cipher.Encrypt(mask[:], counter[:])
|
||||
gcmInc32(counter)
|
||||
|
||||
for i := range mask {
|
||||
out[i] = in[i] ^ mask[i]
|
||||
}
|
||||
xorWords(out, in, mask[:])
|
||||
out = out[gcmBlockSize:]
|
||||
in = in[gcmBlockSize:]
|
||||
}
|
||||
@ -299,10 +297,7 @@ func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) {
|
||||
if len(in) > 0 {
|
||||
g.cipher.Encrypt(mask[:], counter[:])
|
||||
gcmInc32(counter)
|
||||
|
||||
for i := range in {
|
||||
out[i] = in[i] ^ mask[i]
|
||||
}
|
||||
xorBytes(out, in, mask[:])
|
||||
}
|
||||
}
|
||||
|
||||
@ -321,9 +316,7 @@ func (g *gcm) auth(out, ciphertext, additionalData []byte, tagMask *[gcmTagSize]
|
||||
putUint64(out, y.low)
|
||||
putUint64(out[8:], y.high)
|
||||
|
||||
for i := range tagMask {
|
||||
out[i] ^= tagMask[i]
|
||||
}
|
||||
xorWords(out, out, tagMask[:])
|
||||
}
|
||||
|
||||
func getUint64(data []byte) uint64 {
|
||||
|
@ -157,19 +157,3 @@ func TestAESGCM(t *testing.T) {
|
||||
ct[0] ^= 0x80
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAESGCM(b *testing.B) {
|
||||
buf := make([]byte, 1024)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
|
||||
var key [16]byte
|
||||
var nonce [12]byte
|
||||
aes, _ := aes.NewCipher(key[:])
|
||||
aesgcm, _ := cipher.NewGCM(aes)
|
||||
var out []byte
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
out = aesgcm.Seal(out[:0], nonce[:], buf, nonce[:])
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ package cipher
|
||||
|
||||
type ofb struct {
|
||||
b Block
|
||||
cipher []byte
|
||||
out []byte
|
||||
outUsed int
|
||||
}
|
||||
@ -20,25 +21,46 @@ func NewOFB(b Block, iv []byte) Stream {
|
||||
if len(iv) != blockSize {
|
||||
return nil
|
||||
}
|
||||
|
||||
bufSize := streamBufferSize
|
||||
if bufSize < blockSize {
|
||||
bufSize = blockSize
|
||||
}
|
||||
x := &ofb{
|
||||
b: b,
|
||||
out: make([]byte, blockSize),
|
||||
cipher: make([]byte, blockSize),
|
||||
out: make([]byte, 0, bufSize),
|
||||
outUsed: 0,
|
||||
}
|
||||
b.Encrypt(x.out, iv)
|
||||
|
||||
copy(x.cipher, iv)
|
||||
return x
|
||||
}
|
||||
|
||||
func (x *ofb) XORKeyStream(dst, src []byte) {
|
||||
for i, s := range src {
|
||||
if x.outUsed == len(x.out) {
|
||||
x.b.Encrypt(x.out, x.out)
|
||||
x.outUsed = 0
|
||||
}
|
||||
func (x *ofb) refill() {
|
||||
bs := x.b.BlockSize()
|
||||
remain := len(x.out) - x.outUsed
|
||||
if remain > x.outUsed {
|
||||
return
|
||||
}
|
||||
copy(x.out, x.out[x.outUsed:])
|
||||
x.out = x.out[:cap(x.out)]
|
||||
for remain < len(x.out)-bs {
|
||||
x.b.Encrypt(x.cipher, x.cipher)
|
||||
copy(x.out[remain:], x.cipher)
|
||||
remain += bs
|
||||
}
|
||||
x.out = x.out[:remain]
|
||||
x.outUsed = 0
|
||||
}
|
||||
|
||||
dst[i] = s ^ x.out[x.outUsed]
|
||||
x.outUsed++
|
||||
func (x *ofb) XORKeyStream(dst, src []byte) {
|
||||
for len(src) > 0 {
|
||||
if x.outUsed >= len(x.out)-x.b.BlockSize() {
|
||||
x.refill()
|
||||
}
|
||||
n := xorBytes(dst, src, x.out[x.outUsed:])
|
||||
dst = dst[n:]
|
||||
src = src[n:]
|
||||
x.outUsed += n
|
||||
}
|
||||
}
|
||||
|
84
src/pkg/crypto/cipher/xor.go
Normal file
84
src/pkg/crypto/cipher/xor.go
Normal file
@ -0,0 +1,84 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cipher
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64"
|
||||
|
||||
// fastXORBytes xors in bulk. It only works on architectures that
|
||||
// support unaligned read/writes.
|
||||
func fastXORBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
for i := 0; i < w; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
}
|
||||
|
||||
for i := (n - n%wordSize); i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func safeXORBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination is assumed to have enough
|
||||
// space. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
if supportsUnaligned {
|
||||
return fastXORBytes(dst, a, b)
|
||||
} else {
|
||||
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||
// we could still try fastXORBytes. It is not clear
|
||||
// how often this happens, and it's only worth it if
|
||||
// the block encryption itself is hardware
|
||||
// accelerated.
|
||||
return safeXORBytes(dst, a, b)
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The arguments are assumed to be of equal length.
|
||||
func fastXORWords(dst, a, b []byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
for i := 0; i < n; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
if supportsUnaligned {
|
||||
fastXORWords(dst, a, b)
|
||||
} else {
|
||||
safeXORBytes(dst, a, b)
|
||||
}
|
||||
}
|
28
src/pkg/crypto/cipher/xor_test.go
Normal file
28
src/pkg/crypto/cipher/xor_test.go
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cipher
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestXOR(t *testing.T) {
|
||||
for alignP := 0; alignP < 2; alignP++ {
|
||||
for alignQ := 0; alignQ < 2; alignQ++ {
|
||||
for alignD := 0; alignD < 2; alignD++ {
|
||||
p := make([]byte, 1024)[alignP:]
|
||||
q := make([]byte, 1024)[alignQ:]
|
||||
d1 := make([]byte, 1024+alignD)[alignD:]
|
||||
d2 := make([]byte, 1024+alignD)[alignD:]
|
||||
xorBytes(d1, p, q)
|
||||
safeXORBytes(d2, p, q)
|
||||
if bytes.Compare(d1, d2) != 0 {
|
||||
t.Error("not equal")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user