From 80e1cf73eb673b352a7888141c42ab9ab16488df Mon Sep 17 00:00:00 2001 From: Shenghou Ma Date: Thu, 7 Feb 2013 18:54:21 +0800 Subject: [PATCH] =?UTF-8?q?crypto/rc4:=20na=C3=AFve=20ARM=20assembly=20imp?= =?UTF-8?q?lementation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 800MHz Cortex-A8: benchmark old ns/op new ns/op delta BenchmarkRC4_128 9395 2838 -69.79% BenchmarkRC4_1K 74497 22120 -70.31% BenchmarkRC4_8K 587243 171435 -70.81% benchmark old MB/s new MB/s speedup BenchmarkRC4_128 13.62 45.09 3.31x BenchmarkRC4_1K 13.75 46.29 3.37x BenchmarkRC4_8K 13.79 47.22 3.42x Result for "OpenSSL 1.0.1c 10 May 2012" from Debian/armhf sid: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rc4 39553.81k 46522.39k 49336.11k 50085.63k 50258.06k R=golang-dev, agl, dave CC=golang-dev https://golang.org/cl/7310051 --- src/pkg/crypto/rc4/rc4_arm.s | 58 +++++++++++++++++++++++++++++++++++ src/pkg/crypto/rc4/rc4_asm.go | 2 +- src/pkg/crypto/rc4/rc4_ref.go | 2 +- 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 src/pkg/crypto/rc4/rc4_arm.s diff --git a/src/pkg/crypto/rc4/rc4_arm.s b/src/pkg/crypto/rc4/rc4_arm.s new file mode 100644 index 0000000000..51a332f624 --- /dev/null +++ b/src/pkg/crypto/rc4/rc4_arm.s @@ -0,0 +1,58 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Registers +dst = 0 +src = 1 +n = 2 +state = 3 +pi = 4 +pj = 5 +i = 6 +j = 7 +k = 8 +t = 11 +t2 = 12 + +// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) +TEXT ·xorKeyStream(SB),7,$0 + MOVW 0(FP), R(dst) + MOVW 4(FP), R(src) + MOVW 8(FP), R(n) + MOVW 12(FP), R(state) + MOVW 16(FP), R(pi) + MOVW 20(FP), R(pj) + MOVBU (R(pi)), R(i) + MOVBU (R(pj)), R(j) + MOVW $0, R(k) + +loop: + // i += 1; j += state[i] + ADD $1, R(i) + AND $0xff, R(i) + MOVBU R(i)<<0(R(state)), R(t) + ADD R(t), R(j) + AND $0xff, R(j) + + // swap state[i] <-> state[j] + MOVBU R(j)<<0(R(state)), R(t2) + MOVB R(t2), R(i)<<0(R(state)) + MOVB R(t), R(j)<<0(R(state)) + + // dst[k] = src[k] ^ state[state[i] + state[j]] + ADD R(t2), R(t) + AND $0xff, R(t) + MOVBU R(t)<<0(R(state)), R(t) + MOVBU R(k)<<0(R(src)), R(t2) + EOR R(t), R(t2) + MOVB R(t2), R(k)<<0(R(dst)) + + ADD $1, R(k) + CMP R(k), R(n) + BNE loop + +done: + MOVB R(i), (R(pi)) + MOVB R(j), (R(pj)) + RET diff --git a/src/pkg/crypto/rc4/rc4_asm.go b/src/pkg/crypto/rc4/rc4_asm.go index 6bb24398e2..0b66e4a9e2 100644 --- a/src/pkg/crypto/rc4/rc4_asm.go +++ b/src/pkg/crypto/rc4/rc4_asm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 +// +build amd64 arm package rc4 diff --git a/src/pkg/crypto/rc4/rc4_ref.go b/src/pkg/crypto/rc4/rc4_ref.go index 4d47299cbb..1018548c24 100644 --- a/src/pkg/crypto/rc4/rc4_ref.go +++ b/src/pkg/crypto/rc4/rc4_ref.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 +// +build !amd64,!arm package rc4