mirror of
https://github.com/golang/go
synced 2024-11-25 10:07:56 -07:00
math/big: assembly versions of bitLen for x86-64, 386, and ARM.
Roughly 2x speedup for the internal bitLen function in arith.go. Added TestWordBitLen test. Performance differences against the new version of bitLen generic: x86-64 Macbook pro (current tip): benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 6 4 -37.40% big.BenchmarkBitLen1 6 2 -51.79% big.BenchmarkBitLen2 6 2 -65.04% big.BenchmarkBitLen3 6 2 -66.10% big.BenchmarkBitLen4 6 2 -60.96% big.BenchmarkBitLen5 6 2 -55.80% big.BenchmarkBitLen8 6 2 -56.19% big.BenchmarkBitLen9 6 2 -64.73% big.BenchmarkBitLen16 7 2 -68.84% big.BenchmarkBitLen17 6 2 -67.11% big.BenchmarkBitLen31 7 2 -61.57% 386 Intel Atom (current tip): benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 23 20 -13.04% big.BenchmarkBitLen1 23 20 -14.77% big.BenchmarkBitLen2 24 20 -19.28% big.BenchmarkBitLen3 25 20 -21.57% big.BenchmarkBitLen4 24 20 -16.94% big.BenchmarkBitLen5 25 20 -20.78% big.BenchmarkBitLen8 24 20 -19.28% big.BenchmarkBitLen9 25 20 -20.47% big.BenchmarkBitLen16 26 20 -23.37% big.BenchmarkBitLen17 26 20 -25.09% big.BenchmarkBitLen31 32 20 -35.51% ARM v5 SheevaPlug, previous weekly patched with bitLen: benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 50 29 -41.73% big.BenchmarkBitLen1 51 29 -42.75% big.BenchmarkBitLen2 59 29 -50.08% big.BenchmarkBitLen3 60 29 -50.75% big.BenchmarkBitLen4 59 29 -50.08% big.BenchmarkBitLen5 60 29 -50.75% big.BenchmarkBitLen8 59 29 -50.08% big.BenchmarkBitLen9 60 29 -50.75% big.BenchmarkBitLen16 69 29 -57.35% big.BenchmarkBitLen17 70 29 -57.89% big.BenchmarkBitLen31 95 29 -69.07% R=golang-dev, minux.ma, gri CC=golang-dev https://golang.org/cl/5574054
This commit is contained in:
parent
d645adc3d0
commit
316f81bb1d
@ -79,7 +79,7 @@ func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
|
||||
}
|
||||
|
||||
// Length of x in bits.
|
||||
func bitLen(x Word) (n int) {
|
||||
func bitLen_g(x Word) (n int) {
|
||||
for ; x >= 0x8000; x >>= 16 {
|
||||
n += 16
|
||||
}
|
||||
|
@ -245,7 +245,7 @@ E6: CMPL BX, $0 // i < 0
|
||||
RET
|
||||
|
||||
|
||||
// divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL xn+12(FP), DX // r = xn
|
||||
@ -263,3 +263,14 @@ E7: SUBL $1, BX // i--
|
||||
|
||||
MOVL DX, r+32(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRL x+0(FP), AX
|
||||
JZ Z1
|
||||
INCL AX
|
||||
MOVL AX, n+4(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVL $0, n+4(FP)
|
||||
RET
|
||||
|
@ -243,7 +243,7 @@ E6: CMPQ BX, R11 // i < n
|
||||
RET
|
||||
|
||||
|
||||
// divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
||||
// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ xn+16(FP), DX // r = xn
|
||||
@ -261,3 +261,14 @@ E7: SUBL $1, BX // i--
|
||||
|
||||
MOVQ DX, r+48(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRQ x+0(FP), AX
|
||||
JZ Z1
|
||||
INCQ AX
|
||||
MOVQ AX, n+8(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVQ $0, n+8(FP)
|
||||
RET
|
||||
|
@ -290,7 +290,7 @@ E9:
|
||||
RET
|
||||
|
||||
|
||||
// divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
// ARM has no multiword division, so use portable code.
|
||||
B ·divWVW_g(SB)
|
||||
@ -310,3 +310,12 @@ TEXT ·mulWW(SB),7,$0
|
||||
MOVW R4, z1+8(FP)
|
||||
MOVW R3, z0+12(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
MOVW x+0(FP), R0
|
||||
WORD $0xe16f0f10 // CLZ R0, R0 (count leading zeros)
|
||||
MOVW $32, R1
|
||||
SUB.S R0, R1
|
||||
MOVW R1, n+4(FP)
|
||||
RET
|
||||
|
@ -16,3 +16,4 @@ func shrVU(z, x []Word, s uint) (c Word)
|
||||
func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
||||
func bitLen(x Word) (n int)
|
||||
|
@ -334,6 +334,29 @@ func TestMulAddWWW(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestWordBitLen(t *testing.T) {
|
||||
// Test every possible output of bitLen with the high bit set
|
||||
// and then with all bits below max set
|
||||
z := bitLen(0)
|
||||
if z != 0 {
|
||||
t.Errorf("0 got %d want 0", z)
|
||||
}
|
||||
x := Word(1) // Will be ...00010000...
|
||||
y := Word(1) // Will be ...00011111...
|
||||
for i := 1; i <= _W; i++ {
|
||||
z = bitLen(x)
|
||||
if z != i {
|
||||
t.Errorf("%x got %d want %d", x, z, i)
|
||||
}
|
||||
z = bitLen(y)
|
||||
if z != i {
|
||||
t.Errorf("%x got %d want %d", y, z, i)
|
||||
}
|
||||
x <<= 1
|
||||
y = (y << 1) | 0x1
|
||||
}
|
||||
}
|
||||
|
||||
// runs b.N iterations of bitLen called on a Word containing (1 << nbits)-1.
|
||||
func benchmarkBitLenN(b *testing.B, nbits uint) {
|
||||
testword := Word((uint64(1) << nbits) - 1)
|
||||
|
Loading…
Reference in New Issue
Block a user