mirror of
https://github.com/golang/go
synced 2024-11-21 23:34:42 -07:00
Symmetric changes to md4.go as for md5.go.
Use uint index variables in some cases instead of int to enable strength reduction; this makes it possible for the compiler to reduce % into masks. Old code: 6g -S md4.go md4block.go | grep "md4block.go:44" 0471 (md4block.go:44) MOVL AX,BX 0472 (md4block.go:44) MOVL AX,BP 0473 (md4block.go:44) MOVL AX,R8 0474 (md4block.go:44) SARL $31,R8 0475 (md4block.go:44) SHRL $30,R8 0476 (md4block.go:44) ADDL R8,BP 0477 (md4block.go:44) SARL $2,BP 0478 (md4block.go:44) IMULL $4,BP 0479 (md4block.go:44) SUBL BP,BX 0480 (md4block.go:44) MOVLQSX BX,BX 0481 (md4block.go:44) LEAQ shift1+0(SB),BP 0482 (md4block.go:44) CMPL BX,8(BP) 0483 (md4block.go:44) JCS ,485 0484 (md4block.go:44) CALL ,runtime.throwindex+0(SB) 0485 (md4block.go:44) MOVQ (BP),BP 0486 (md4block.go:44) MOVL (BP)(BX*4),DI New code: 6g -S md4.go md4block.go | grep "md4block.go:44" 0471 (md4block.go:44) MOVL AX,BX 0472 (md4block.go:44) ANDL $3,BX 0473 (md4block.go:44) MOVLQZX BX,BX 0474 (md4block.go:44) LEAQ shift1+0(SB),BP 0475 (md4block.go:44) CMPL BX,8(BP) 0476 (md4block.go:44) JCS ,478 0477 (md4block.go:44) CALL ,runtime.throwindex+0(SB) 0478 (md4block.go:44) MOVQ (BP),BP 0479 (md4block.go:44) MOVL (BP)(BX*4),DI R=agl, agl1 CC=golang-dev https://golang.org/cl/181086
This commit is contained in:
parent
9d07d37f31
commit
f0fcb2d59f
@ -68,8 +68,8 @@ func (d *digest) Write(p []byte) (nn int, err os.Error) {
|
||||
n := _Block(d, p)
|
||||
p = p[n:]
|
||||
if len(p) > 0 {
|
||||
for i := 0; i < len(p); i++ {
|
||||
d.x[i] = p[i]
|
||||
for i, x := range p {
|
||||
d.x[i] = x
|
||||
}
|
||||
d.nx = len(p)
|
||||
}
|
||||
@ -100,16 +100,12 @@ func (d *digest) Sum() []byte {
|
||||
|
||||
p := make([]byte, 16)
|
||||
j := 0
|
||||
for i := 0; i < 4; i++ {
|
||||
s := d.s[i]
|
||||
p[j] = byte(s)
|
||||
j++
|
||||
p[j] = byte(s >> 8)
|
||||
j++
|
||||
p[j] = byte(s >> 16)
|
||||
j++
|
||||
p[j] = byte(s >> 24)
|
||||
j++
|
||||
for _, s := range d.s {
|
||||
p[j+0] = byte(s >> 0)
|
||||
p[j+1] = byte(s >> 8)
|
||||
p[j+2] = byte(s >> 16)
|
||||
p[j+3] = byte(s >> 24)
|
||||
j += 4
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
@ -25,9 +25,10 @@ func _Block(dig *digest, p []byte) int {
|
||||
for len(p) >= _Chunk {
|
||||
aa, bb, cc, dd := a, b, c, d
|
||||
|
||||
j := 0
|
||||
for i := 0; i < 16; i++ {
|
||||
j := i * 4
|
||||
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
|
||||
j += 4
|
||||
}
|
||||
|
||||
// If this needs to be made faster in the future,
|
||||
@ -37,9 +38,12 @@ func _Block(dig *digest, p []byte) int {
|
||||
// with suitable variable renaming in each
|
||||
// unrolled body, delete the a, b, c, d = d, a, b, c
|
||||
// (or you can let the optimizer do the renaming).
|
||||
//
|
||||
// The index variables are uint so that % by a power
|
||||
// of two can be optimized easily by a compiler.
|
||||
|
||||
// Round 1.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := i
|
||||
s := shift1[i%4]
|
||||
f := ((c ^ d) & b) ^ d
|
||||
@ -49,7 +53,7 @@ func _Block(dig *digest, p []byte) int {
|
||||
}
|
||||
|
||||
// Round 2.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := xIndex2[i]
|
||||
s := shift2[i%4]
|
||||
g := (b & c) | (b & d) | (c & d)
|
||||
@ -59,7 +63,7 @@ func _Block(dig *digest, p []byte) int {
|
||||
}
|
||||
|
||||
// Round 3.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := xIndex3[i]
|
||||
s := shift3[i%4]
|
||||
h := b ^ c ^ d
|
||||
|
@ -98,9 +98,10 @@ func _Block(dig *digest, p []byte) int {
|
||||
for len(p) >= _Chunk {
|
||||
aa, bb, cc, dd := a, b, c, d
|
||||
|
||||
j := 0
|
||||
for i := 0; i < 16; i++ {
|
||||
j := i * 4
|
||||
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
|
||||
j += 4
|
||||
}
|
||||
|
||||
// If this needs to be made faster in the future,
|
||||
@ -110,52 +111,47 @@ func _Block(dig *digest, p []byte) int {
|
||||
// with suitable variable renaming in each
|
||||
// unrolled body, delete the a, b, c, d = d, a, b, c
|
||||
// (or you can let the optimizer do the renaming).
|
||||
//
|
||||
// The index variables are uint so that % by a power
|
||||
// of two can be optimized easily by a compiler.
|
||||
|
||||
// Round 1.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := i
|
||||
t := i
|
||||
s := shift1[i%4]
|
||||
f := ((c ^ d) & b) ^ d
|
||||
a += f + X[x] + table[t]
|
||||
a = a<<s | a>>(32-s)
|
||||
a += b
|
||||
a += f + X[x] + table[i]
|
||||
a = a<<s | a>>(32-s) + b
|
||||
a, b, c, d = d, a, b, c
|
||||
}
|
||||
|
||||
// Round 2.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := (1 + 5*i) % 16
|
||||
t := 16 + i
|
||||
s := shift2[i%4]
|
||||
g := ((b ^ c) & d) ^ c
|
||||
a += g + X[x] + table[t]
|
||||
a = a<<s | a>>(32-s)
|
||||
a += b
|
||||
a += g + X[x] + table[i+16]
|
||||
a = a<<s | a>>(32-s) + b
|
||||
a, b, c, d = d, a, b, c
|
||||
}
|
||||
|
||||
// Round 3.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := (5 + 3*i) % 16
|
||||
t := 32 + i
|
||||
s := shift3[i%4]
|
||||
h := b ^ c ^ d
|
||||
a += h + X[x] + table[t]
|
||||
a = a<<s | a>>(32-s)
|
||||
a += b
|
||||
a += h + X[x] + table[i+32]
|
||||
a = a<<s | a>>(32-s) + b
|
||||
a, b, c, d = d, a, b, c
|
||||
}
|
||||
|
||||
// Round 4.
|
||||
for i := 0; i < 16; i++ {
|
||||
for i := uint(0); i < 16; i++ {
|
||||
x := (7 * i) % 16
|
||||
s := shift4[i%4]
|
||||
t := 48 + i
|
||||
ii := c ^ (b | ^d)
|
||||
a += ii + X[x] + table[t]
|
||||
a = a<<s | a>>(32-s)
|
||||
a += b
|
||||
j := c ^ (b | ^d)
|
||||
a += j + X[x] + table[i+48]
|
||||
a = a<<s | a>>(32-s) + b
|
||||
a, b, c, d = d, a, b, c
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user