mirror of
https://github.com/golang/go
synced 2024-11-19 03:54:42 -07:00
math/big: faster "pure Go" addition/subtraction for long vectors
(platforms w/o corresponding assembly kernels) For short vector adds there's some erradic slow-down, but overall these routines have become significantly faster. This only matters for platforms w/o native (assembly) versions of these kernels, so we are not concerned about the minor slow-down for short vectors. This code was already reviewed under Mercurial (golang.org/cl/172810043) but wasn't submitted before the switch to git. Benchmarks run on 2.3GHz Intel Core i7, running OS X 10.9.5, with the respective AddVV and AddVW assembly routines disabled. benchmark old ns/op new ns/op delta BenchmarkAddVV_1 6.59 7.09 +7.59% BenchmarkAddVV_2 10.3 10.1 -1.94% BenchmarkAddVV_3 10.9 12.6 +15.60% BenchmarkAddVV_4 13.9 15.6 +12.23% BenchmarkAddVV_5 16.8 17.3 +2.98% BenchmarkAddVV_1e1 29.5 29.9 +1.36% BenchmarkAddVV_1e2 246 232 -5.69% BenchmarkAddVV_1e3 2374 2185 -7.96% BenchmarkAddVV_1e4 58942 22292 -62.18% BenchmarkAddVV_1e5 668622 225279 -66.31% BenchmarkAddVW_1 6.81 5.58 -18.06% BenchmarkAddVW_2 7.69 6.86 -10.79% BenchmarkAddVW_3 9.56 8.32 -12.97% BenchmarkAddVW_4 12.1 9.53 -21.24% BenchmarkAddVW_5 13.2 10.9 -17.42% BenchmarkAddVW_1e1 23.4 18.0 -23.08% BenchmarkAddVW_1e2 175 141 -19.43% BenchmarkAddVW_1e3 1568 1266 -19.26% BenchmarkAddVW_1e4 15425 12596 -18.34% BenchmarkAddVW_1e5 156737 133539 -14.80% BenchmarkFibo 381678466 132958666 -65.16% benchmark old MB/s new MB/s speedup BenchmarkAddVV_1 9715.25 9028.30 0.93x BenchmarkAddVV_2 12461.72 12622.60 1.01x BenchmarkAddVV_3 17549.64 15243.82 0.87x BenchmarkAddVV_4 18392.54 16398.29 0.89x BenchmarkAddVV_5 18995.23 18496.57 0.97x BenchmarkAddVV_1e1 21708.98 21438.28 0.99x BenchmarkAddVV_1e2 25956.53 27506.88 1.06x BenchmarkAddVV_1e3 26947.93 29286.66 1.09x BenchmarkAddVV_1e4 10857.96 28709.46 2.64x BenchmarkAddVV_1e5 9571.91 28409.21 2.97x BenchmarkAddVW_1 1175.28 1433.98 1.22x BenchmarkAddVW_2 2080.01 2332.54 1.12x BenchmarkAddVW_3 2509.28 2883.97 1.15x BenchmarkAddVW_4 2646.09 3356.83 1.27x BenchmarkAddVW_5 3020.69 3671.07 1.22x BenchmarkAddVW_1e1 3425.76 4441.40 1.30x BenchmarkAddVW_1e2 4553.17 5642.96 1.24x BenchmarkAddVW_1e3 5100.14 6318.72 1.24x BenchmarkAddVW_1e4 5186.15 6350.96 1.22x BenchmarkAddVW_1e5 5104.07 5990.74 1.17x Change-Id: I7a62023b1105248a0e85e5b9819d3fd4266123d4 Reviewed-on: https://go-review.googlesource.com/2480 Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
parent
80b3ff9f82
commit
067acd51b0
@ -70,7 +70,7 @@ func mulWW_g(x, y Word) (z1, z0 Word) {
|
||||
|
||||
// z1<<_W + z0 = x*y + c
|
||||
func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
|
||||
z1, zz0 := mulWW(x, y)
|
||||
z1, zz0 := mulWW_g(x, y)
|
||||
if z0 = zz0 + c; z0 < zz0 {
|
||||
z1++
|
||||
}
|
||||
@ -154,21 +154,52 @@ func divWW_g(u1, u0, v Word) (q, r Word) {
|
||||
return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s
|
||||
}
|
||||
|
||||
// Keep for performance debugging.
|
||||
// Using addWW_g is likely slower.
|
||||
const use_addWW_g = false
|
||||
|
||||
// The resulting carry c is either 0 or 1.
|
||||
func addVV_g(z, x, y []Word) (c Word) {
|
||||
if use_addWW_g {
|
||||
for i := range z {
|
||||
c, z[i] = addWW_g(x[i], y[i], c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for i, xi := range x[:len(z)] {
|
||||
yi := y[i]
|
||||
zi := xi + yi + c
|
||||
z[i] = zi
|
||||
// see "Hacker's Delight", section 2-12 (overflow detection)
|
||||
c = (xi&yi | (xi|yi)&^zi) >> (_W - 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// The resulting carry c is either 0 or 1.
|
||||
func subVV_g(z, x, y []Word) (c Word) {
|
||||
if use_addWW_g {
|
||||
for i := range z {
|
||||
c, z[i] = subWW_g(x[i], y[i], c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for i, xi := range x[:len(z)] {
|
||||
yi := y[i]
|
||||
zi := xi - yi - c
|
||||
z[i] = zi
|
||||
// see "Hacker's Delight", section 2-12 (overflow detection)
|
||||
c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Argument y must be either 0 or 1.
|
||||
// The resulting carry c is either 0 or 1.
|
||||
func addVW_g(z, x []Word, y Word) (c Word) {
|
||||
if use_addWW_g {
|
||||
c = y
|
||||
for i := range z {
|
||||
c, z[i] = addWW_g(x[i], c, 0)
|
||||
@ -176,7 +207,17 @@ func addVW_g(z, x []Word, y Word) (c Word) {
|
||||
return
|
||||
}
|
||||
|
||||
c = y
|
||||
for i, xi := range x[:len(z)] {
|
||||
zi := xi + c
|
||||
z[i] = zi
|
||||
c = xi &^ zi >> (_W - 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func subVW_g(z, x []Word, y Word) (c Word) {
|
||||
if use_addWW_g {
|
||||
c = y
|
||||
for i := range z {
|
||||
c, z[i] = subWW_g(x[i], c, 0)
|
||||
@ -184,6 +225,15 @@ func subVW_g(z, x []Word, y Word) (c Word) {
|
||||
return
|
||||
}
|
||||
|
||||
c = y
|
||||
for i, xi := range x[:len(z)] {
|
||||
zi := xi - c
|
||||
z[i] = zi
|
||||
c = (zi &^ xi) >> (_W - 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func shlVU_g(z, x []Word, s uint) (c Word) {
|
||||
if n := len(z); n > 0 {
|
||||
ŝ := _W - s
|
||||
@ -222,6 +272,7 @@ func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
|
||||
return
|
||||
}
|
||||
|
||||
// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g.
|
||||
func addMulVVW_g(z, x []Word, y Word) (c Word) {
|
||||
for i := range z {
|
||||
z1, z0 := mulAddWWW_g(x[i], y, z[i])
|
||||
|
@ -254,7 +254,7 @@ func benchmarkFunVW(b *testing.B, f funVW, n int) {
|
||||
x := rndV(n)
|
||||
y := rndW()
|
||||
z := make([]Word, n)
|
||||
b.SetBytes(int64(n * _W))
|
||||
b.SetBytes(int64(n * _S))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
f(z, x, y)
|
||||
|
@ -769,3 +769,55 @@ func BenchmarkExp3Power0x10000(b *testing.B) { ExpHelper(b, 3, 0x10000) }
|
||||
func BenchmarkExp3Power0x40000(b *testing.B) { ExpHelper(b, 3, 0x40000) }
|
||||
func BenchmarkExp3Power0x100000(b *testing.B) { ExpHelper(b, 3, 0x100000) }
|
||||
func BenchmarkExp3Power0x400000(b *testing.B) { ExpHelper(b, 3, 0x400000) }
|
||||
|
||||
func fibo(n int) nat {
|
||||
switch n {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return nat{1}
|
||||
}
|
||||
f0 := fibo(0)
|
||||
f1 := fibo(1)
|
||||
var f2 nat
|
||||
for i := 1; i < n; i++ {
|
||||
f2 = f2.add(f0, f1)
|
||||
f0, f1, f2 = f1, f2, f0
|
||||
}
|
||||
return f1
|
||||
}
|
||||
|
||||
var fiboNums = []string{
|
||||
"0",
|
||||
"55",
|
||||
"6765",
|
||||
"832040",
|
||||
"102334155",
|
||||
"12586269025",
|
||||
"1548008755920",
|
||||
"190392490709135",
|
||||
"23416728348467685",
|
||||
"2880067194370816120",
|
||||
"354224848179261915075",
|
||||
}
|
||||
|
||||
func TestFibo(t *testing.T) {
|
||||
for i, want := range fiboNums {
|
||||
n := i * 10
|
||||
got := fibo(n).decimalString()
|
||||
if got != want {
|
||||
t.Errorf("fibo(%d) failed: got %s want %s", n, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFibo(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
fibo(1e0)
|
||||
fibo(1e1)
|
||||
fibo(1e2)
|
||||
fibo(1e3)
|
||||
fibo(1e4)
|
||||
fibo(1e5)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user