1
0
mirror of https://github.com/golang/go synced 2024-11-23 13:20:09 -07:00

crypto/cipher: speed up xor operations in CBC, CFB, OBF, CTR

and GCM on 386 and amd64

Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz:

benchmark                    old MB/s     new MB/s  speedup
BenchmarkAESGCMSeal1K           82.39        92.05    1.12x
BenchmarkAESGCMOpen1K           82.28        91.88    1.12x
BenchmarkAESCFBEncrypt1K       141.54       277.59    1.96x
BenchmarkAESCFBDecrypt1K       133.06       278.07    2.09x
BenchmarkAESOFB1K              160.51       380.24    2.37x
BenchmarkAESCTR1K              164.07       429.25    2.62x
BenchmarkAESCBCEncrypt1K       170.99       263.74    1.54x
BenchmarkAESCBCDecrypt1K       124.96       249.14    1.99x

Fixes #6741.

R=agl, dave, agl
CC=golang-dev
https://golang.org/cl/24250044
This commit is contained in:
Han-Wen Nienhuys 2013-12-11 16:05:02 -05:00 committed by Adam Langley
parent 426b48a775
commit 5744be9fe4
9 changed files with 361 additions and 96 deletions

View File

@ -0,0 +1,139 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cipher_test
import (
"crypto/aes"
"crypto/cipher"
"testing"
)
func BenchmarkAESGCMSeal1K(b *testing.B) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var nonce [12]byte
aes, _ := aes.NewCipher(key[:])
aesgcm, _ := cipher.NewGCM(aes)
var out []byte
b.ResetTimer()
for i := 0; i < b.N; i++ {
out = aesgcm.Seal(out[:0], nonce[:], buf, nonce[:])
}
}
func BenchmarkAESGCMOpen1K(b *testing.B) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var nonce [12]byte
aes, _ := aes.NewCipher(key[:])
aesgcm, _ := cipher.NewGCM(aes)
var out []byte
out = aesgcm.Seal(out[:0], nonce[:], buf, nonce[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := aesgcm.Open(buf[:0], nonce[:], out, nonce[:])
if err != nil {
b.Errorf("Open: %v", err)
}
}
}
// If we test exactly 1K blocks, we would generate exact multiples of
// the cipher's block size, and and the cipher stream fragments would
// always be wordsize aligned, whereas non-aligned is a more typical
// use-case.
const almost1K = 1024 - 5
func BenchmarkAESCFBEncrypt1K(b *testing.B) {
buf := make([]byte, almost1K)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
aes, _ := aes.NewCipher(key[:])
ctr := cipher.NewCFBEncrypter(aes, iv[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
ctr.XORKeyStream(buf, buf)
}
}
func BenchmarkAESCFBDecrypt1K(b *testing.B) {
buf := make([]byte, almost1K)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
aes, _ := aes.NewCipher(key[:])
ctr := cipher.NewCFBDecrypter(aes, iv[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
ctr.XORKeyStream(buf, buf)
}
}
func BenchmarkAESOFB1K(b *testing.B) {
buf := make([]byte, almost1K)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
aes, _ := aes.NewCipher(key[:])
ctr := cipher.NewOFB(aes, iv[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
ctr.XORKeyStream(buf, buf)
}
}
func BenchmarkAESCTR1K(b *testing.B) {
buf := make([]byte, almost1K)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
aes, _ := aes.NewCipher(key[:])
ctr := cipher.NewCTR(aes, iv[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
ctr.XORKeyStream(buf, buf)
}
}
func BenchmarkAESCBCEncrypt1K(b *testing.B) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
aes, _ := aes.NewCipher(key[:])
cbc := cipher.NewCBCEncrypter(aes, iv[:])
for i := 0; i < b.N; i++ {
cbc.CryptBlocks(buf, buf)
}
}
func BenchmarkAESCBCDecrypt1K(b *testing.B) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
aes, _ := aes.NewCipher(key[:])
cbc := cipher.NewCBCDecrypter(aes, iv[:])
for i := 0; i < b.N; i++ {
cbc.CryptBlocks(buf, buf)
}
}

View File

@ -49,13 +49,9 @@ func (x *cbcEncrypter) CryptBlocks(dst, src []byte) {
panic("crypto/cipher: output smaller than input") panic("crypto/cipher: output smaller than input")
} }
for len(src) > 0 { for len(src) > 0 {
for i := 0; i < x.blockSize; i++ { xorBytes(x.iv, x.iv, src[:x.blockSize])
x.iv[i] ^= src[i]
}
x.b.Encrypt(x.iv, x.iv) x.b.Encrypt(x.iv, x.iv)
for i := 0; i < x.blockSize; i++ { copy(dst, x.iv)
dst[i] = x.iv[i]
}
src = src[x.blockSize:] src = src[x.blockSize:]
dst = dst[x.blockSize:] dst = dst[x.blockSize:]
} }
@ -91,12 +87,9 @@ func (x *cbcDecrypter) CryptBlocks(dst, src []byte) {
} }
for len(src) > 0 { for len(src) > 0 {
x.b.Decrypt(x.tmp, src[:x.blockSize]) x.b.Decrypt(x.tmp, src[:x.blockSize])
for i := 0; i < x.blockSize; i++ { xorBytes(x.tmp, x.tmp, x.iv)
x.tmp[i] ^= x.iv[i] copy(x.iv, src)
x.iv[i] = src[i] copy(dst, x.tmp)
dst[i] = x.tmp[i]
}
src = src[x.blockSize:] src = src[x.blockSize:]
dst = dst[x.blockSize:] dst = dst[x.blockSize:]
} }

View File

@ -8,18 +8,40 @@ package cipher
type cfb struct { type cfb struct {
b Block b Block
next []byte
out []byte out []byte
outUsed int outUsed int
decrypt bool decrypt bool
} }
func (x *cfb) XORKeyStream(dst, src []byte) {
for i := 0; i < len(src); i++ {
if x.outUsed == len(x.out) {
x.b.Encrypt(x.out, x.next)
x.outUsed = 0
}
n := xorBytes(dst, src, x.out[x.outUsed:])
if x.decrypt {
// We can precompute a larger segment of the
// keystream on decryption. This will allow
// larger batches for xor, and we should be
// able to match CTR/OFB performance.
copy(x.next[x.outUsed:], src[:n])
} else {
copy(x.next[x.outUsed:], dst[:n])
}
dst = dst[n:]
src = src[n:]
x.outUsed += n
}
}
// NewCFBEncrypter returns a Stream which encrypts with cipher feedback mode, // NewCFBEncrypter returns a Stream which encrypts with cipher feedback mode,
// using the given Block. The iv must be the same length as the Block's block // using the given Block. The iv must be the same length as the Block's block
// size. // size.
func NewCFBEncrypter(block Block, iv []byte) Stream { func NewCFBEncrypter(block Block, iv []byte) Stream {
if len(iv) != block.BlockSize() {
panic("cipher.NewCBFEncrypter: IV length must equal block size")
}
return newCFB(block, iv, false) return newCFB(block, iv, false)
} }
@ -27,44 +49,23 @@ func NewCFBEncrypter(block Block, iv []byte) Stream {
// using the given Block. The iv must be the same length as the Block's block // using the given Block. The iv must be the same length as the Block's block
// size. // size.
func NewCFBDecrypter(block Block, iv []byte) Stream { func NewCFBDecrypter(block Block, iv []byte) Stream {
if len(iv) != block.BlockSize() {
panic("cipher.NewCBFEncrypter: IV length must equal block size")
}
return newCFB(block, iv, true) return newCFB(block, iv, true)
} }
func newCFB(block Block, iv []byte, decrypt bool) Stream { func newCFB(block Block, iv []byte, decrypt bool) Stream {
blockSize := block.BlockSize() blockSize := block.BlockSize()
if len(iv) != blockSize { if len(iv) != blockSize {
return nil // stack trace will indicate whether it was de or encryption
panic("cipher.newCFB: IV length must equal block size")
} }
x := &cfb{ x := &cfb{
b: block, b: block,
out: make([]byte, blockSize), out: make([]byte, blockSize),
outUsed: 0, next: make([]byte, blockSize),
outUsed: blockSize,
decrypt: decrypt, decrypt: decrypt,
} }
block.Encrypt(x.out, iv) copy(x.next, iv)
return x return x
} }
func (x *cfb) XORKeyStream(dst, src []byte) {
for i := 0; i < len(src); i++ {
if x.outUsed == len(x.out) {
x.b.Encrypt(x.out, x.out)
x.outUsed = 0
}
if x.decrypt {
t := src[i]
dst[i] = src[i] ^ x.out[x.outUsed]
x.out[x.outUsed] = t
} else {
x.out[x.outUsed] ^= src[i]
dst[i] = x.out[x.outUsed]
}
x.outUsed++
}
}

View File

@ -19,37 +19,58 @@ type ctr struct {
outUsed int outUsed int
} }
const streamBufferSize = 512
// NewCTR returns a Stream which encrypts/decrypts using the given Block in // NewCTR returns a Stream which encrypts/decrypts using the given Block in
// counter mode. The length of iv must be the same as the Block's block size. // counter mode. The length of iv must be the same as the Block's block size.
func NewCTR(block Block, iv []byte) Stream { func NewCTR(block Block, iv []byte) Stream {
if len(iv) != block.BlockSize() { if len(iv) != block.BlockSize() {
panic("cipher.NewCTR: IV length must equal block size") panic("cipher.NewCTR: IV length must equal block size")
} }
bufSize := streamBufferSize
if bufSize < block.BlockSize() {
bufSize = block.BlockSize()
}
return &ctr{ return &ctr{
b: block, b: block,
ctr: dup(iv), ctr: dup(iv),
out: make([]byte, len(iv)), out: make([]byte, 0, bufSize),
outUsed: len(iv), outUsed: 0,
} }
} }
func (x *ctr) refill() {
remain := len(x.out) - x.outUsed
if remain > x.outUsed {
return
}
copy(x.out, x.out[x.outUsed:])
x.out = x.out[:cap(x.out)]
bs := x.b.BlockSize()
for remain < len(x.out)-bs {
x.b.Encrypt(x.out[remain:], x.ctr)
remain += bs
// Increment counter
for i := len(x.ctr) - 1; i >= 0; i-- {
x.ctr[i]++
if x.ctr[i] != 0 {
break
}
}
}
x.out = x.out[:remain]
x.outUsed = 0
}
func (x *ctr) XORKeyStream(dst, src []byte) { func (x *ctr) XORKeyStream(dst, src []byte) {
for i := 0; i < len(src); i++ { for len(src) > 0 {
if x.outUsed == len(x.ctr) { if x.outUsed >= len(x.out)-x.b.BlockSize() {
x.b.Encrypt(x.out, x.ctr) x.refill()
x.outUsed = 0
// Increment counter
for i := len(x.ctr) - 1; i >= 0; i-- {
x.ctr[i]++
if x.ctr[i] != 0 {
break
}
}
} }
n := xorBytes(dst, src, x.out[x.outUsed:])
dst[i] = src[i] ^ x.out[x.outUsed] dst = dst[n:]
x.outUsed++ src = src[n:]
x.outUsed += n
} }
} }

View File

@ -289,9 +289,7 @@ func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) {
g.cipher.Encrypt(mask[:], counter[:]) g.cipher.Encrypt(mask[:], counter[:])
gcmInc32(counter) gcmInc32(counter)
for i := range mask { xorWords(out, in, mask[:])
out[i] = in[i] ^ mask[i]
}
out = out[gcmBlockSize:] out = out[gcmBlockSize:]
in = in[gcmBlockSize:] in = in[gcmBlockSize:]
} }
@ -299,10 +297,7 @@ func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) {
if len(in) > 0 { if len(in) > 0 {
g.cipher.Encrypt(mask[:], counter[:]) g.cipher.Encrypt(mask[:], counter[:])
gcmInc32(counter) gcmInc32(counter)
xorBytes(out, in, mask[:])
for i := range in {
out[i] = in[i] ^ mask[i]
}
} }
} }
@ -321,9 +316,7 @@ func (g *gcm) auth(out, ciphertext, additionalData []byte, tagMask *[gcmTagSize]
putUint64(out, y.low) putUint64(out, y.low)
putUint64(out[8:], y.high) putUint64(out[8:], y.high)
for i := range tagMask { xorWords(out, out, tagMask[:])
out[i] ^= tagMask[i]
}
} }
func getUint64(data []byte) uint64 { func getUint64(data []byte) uint64 {

View File

@ -157,19 +157,3 @@ func TestAESGCM(t *testing.T) {
ct[0] ^= 0x80 ct[0] ^= 0x80
} }
} }
func BenchmarkAESGCM(b *testing.B) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var nonce [12]byte
aes, _ := aes.NewCipher(key[:])
aesgcm, _ := cipher.NewGCM(aes)
var out []byte
b.ResetTimer()
for i := 0; i < b.N; i++ {
out = aesgcm.Seal(out[:0], nonce[:], buf, nonce[:])
}
}

View File

@ -8,6 +8,7 @@ package cipher
type ofb struct { type ofb struct {
b Block b Block
cipher []byte
out []byte out []byte
outUsed int outUsed int
} }
@ -20,25 +21,46 @@ func NewOFB(b Block, iv []byte) Stream {
if len(iv) != blockSize { if len(iv) != blockSize {
return nil return nil
} }
bufSize := streamBufferSize
if bufSize < blockSize {
bufSize = blockSize
}
x := &ofb{ x := &ofb{
b: b, b: b,
out: make([]byte, blockSize), cipher: make([]byte, blockSize),
out: make([]byte, 0, bufSize),
outUsed: 0, outUsed: 0,
} }
b.Encrypt(x.out, iv)
copy(x.cipher, iv)
return x return x
} }
func (x *ofb) XORKeyStream(dst, src []byte) { func (x *ofb) refill() {
for i, s := range src { bs := x.b.BlockSize()
if x.outUsed == len(x.out) { remain := len(x.out) - x.outUsed
x.b.Encrypt(x.out, x.out) if remain > x.outUsed {
x.outUsed = 0 return
} }
copy(x.out, x.out[x.outUsed:])
x.out = x.out[:cap(x.out)]
for remain < len(x.out)-bs {
x.b.Encrypt(x.cipher, x.cipher)
copy(x.out[remain:], x.cipher)
remain += bs
}
x.out = x.out[:remain]
x.outUsed = 0
}
dst[i] = s ^ x.out[x.outUsed] func (x *ofb) XORKeyStream(dst, src []byte) {
x.outUsed++ for len(src) > 0 {
if x.outUsed >= len(x.out)-x.b.BlockSize() {
x.refill()
}
n := xorBytes(dst, src, x.out[x.outUsed:])
dst = dst[n:]
src = src[n:]
x.outUsed += n
} }
} }

View File

@ -0,0 +1,84 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cipher
import (
"runtime"
"unsafe"
)
const wordSize = int(unsafe.Sizeof(uintptr(0)))
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64"
// fastXORBytes xors in bulk. It only works on architectures that
// support unaligned read/writes.
func fastXORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
w := n / wordSize
if w > 0 {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
for i := 0; i < w; i++ {
dw[i] = aw[i] ^ bw[i]
}
}
for i := (n - n%wordSize); i < n; i++ {
dst[i] = a[i] ^ b[i]
}
return n
}
func safeXORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
for i := 0; i < n; i++ {
dst[i] = a[i] ^ b[i]
}
return n
}
// xorBytes xors the bytes in a and b. The destination is assumed to have enough
// space. Returns the number of bytes xor'd.
func xorBytes(dst, a, b []byte) int {
if supportsUnaligned {
return fastXORBytes(dst, a, b)
} else {
// TODO(hanwen): if (dst, a, b) have common alignment
// we could still try fastXORBytes. It is not clear
// how often this happens, and it's only worth it if
// the block encryption itself is hardware
// accelerated.
return safeXORBytes(dst, a, b)
}
}
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
// The arguments are assumed to be of equal length.
func fastXORWords(dst, a, b []byte) {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
n := len(b) / wordSize
for i := 0; i < n; i++ {
dw[i] = aw[i] ^ bw[i]
}
}
func xorWords(dst, a, b []byte) {
if supportsUnaligned {
fastXORWords(dst, a, b)
} else {
safeXORBytes(dst, a, b)
}
}

View File

@ -0,0 +1,28 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cipher
import (
"bytes"
"testing"
)
func TestXOR(t *testing.T) {
for alignP := 0; alignP < 2; alignP++ {
for alignQ := 0; alignQ < 2; alignQ++ {
for alignD := 0; alignD < 2; alignD++ {
p := make([]byte, 1024)[alignP:]
q := make([]byte, 1024)[alignQ:]
d1 := make([]byte, 1024+alignD)[alignD:]
d2 := make([]byte, 1024+alignD)[alignD:]
xorBytes(d1, p, q)
safeXORBytes(d2, p, q)
if bytes.Compare(d1, d2) != 0 {
t.Error("not equal")
}
}
}
}
}