mirror of
https://github.com/golang/go
synced 2024-11-17 22:54:48 -07:00
crypto/rc4: optimize generic implementation slightly
The compiler can't currently figure out that it can eliminate both c.s loads (using store to load forwarding) in the second line of the following code: ... c.s[i], c.s[j] = c.s[j], c.s[i] x := c.s[j] + c.s[i] ... The compiler eliminates the second load of c.s[j] (using the original value of c.s[i]), however the load of c.s[i] remains because the compiler doesn't know that c.s[i] and c.s[j] either overlap completely or not at all. Introducing temporaries to make this explicit improves the performance of the generic code slightly, the goal being to remove the assembly in this package in the future. This change also hoists a bounds check out of the main loop which gives a slight performance boost and also makes the behaviour identical to the assembly implementation when len(dst) < len(src). name old speed new speed delta RC4_128-4 491MB/s ± 3% 596MB/s ± 5% +21.51% (p=0.000 n=9+9) RC4_1K-4 504MB/s ± 2% 616MB/s ± 1% +22.33% (p=0.000 n=10+10) RC4_8K-4 509MB/s ± 1% 630MB/s ± 2% +23.85% (p=0.000 n=8+9) Change-Id: I27adc775713b2e74a1a94e0c1de0909fb4379463 Reviewed-on: https://go-review.googlesource.com/102335 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
58734039bd
commit
41402b59bd
@ -57,12 +57,19 @@ func (c *Cipher) Reset() {
|
||||
// This is the pure Go version. rc4_{amd64,386,arm}* contain assembly
|
||||
// implementations. This is here for tests and to prevent bitrot.
|
||||
func (c *Cipher) xorKeyStreamGeneric(dst, src []byte) {
|
||||
if len(src) == 0 {
|
||||
return
|
||||
}
|
||||
i, j := c.i, c.j
|
||||
_ = dst[len(src)-1]
|
||||
dst = dst[:len(src)] // eliminate bounds check from loop
|
||||
for k, v := range src {
|
||||
i += 1
|
||||
j += uint8(c.s[i])
|
||||
c.s[i], c.s[j] = c.s[j], c.s[i]
|
||||
dst[k] = v ^ uint8(c.s[uint8(c.s[i]+c.s[j])])
|
||||
x := c.s[i]
|
||||
j += uint8(x)
|
||||
y := c.s[j]
|
||||
c.s[i], c.s[j] = y, x
|
||||
dst[k] = v ^ uint8(c.s[uint8(x+y)])
|
||||
}
|
||||
c.i, c.j = i, j
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user