From 187afdebef7953295189d4531e7dccdc0cb64500 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 4 Apr 2016 19:28:15 +0300 Subject: [PATCH] math/big: re-use memory in Int.GCD This improves TLS handshake performance. benchmark old ns/op new ns/op delta BenchmarkGCD10x10/WithoutXY-4 965 968 +0.31% BenchmarkGCD10x10/WithXY-4 1813 1391 -23.28% BenchmarkGCD10x100/WithoutXY-4 1093 1075 -1.65% BenchmarkGCD10x100/WithXY-4 2348 1676 -28.62% BenchmarkGCD10x1000/WithoutXY-4 1569 1565 -0.25% BenchmarkGCD10x1000/WithXY-4 4262 3242 -23.93% BenchmarkGCD10x10000/WithoutXY-4 6069 6066 -0.05% BenchmarkGCD10x10000/WithXY-4 12123 11331 -6.53% BenchmarkGCD10x100000/WithoutXY-4 52664 52610 -0.10% BenchmarkGCD10x100000/WithXY-4 97494 95649 -1.89% BenchmarkGCD100x100/WithoutXY-4 5244 5228 -0.31% BenchmarkGCD100x100/WithXY-4 22572 18630 -17.46% BenchmarkGCD100x1000/WithoutXY-4 6143 6233 +1.47% BenchmarkGCD100x1000/WithXY-4 24652 19357 -21.48% BenchmarkGCD100x10000/WithoutXY-4 15725 15804 +0.50% BenchmarkGCD100x10000/WithXY-4 60552 55973 -7.56% BenchmarkGCD100x100000/WithoutXY-4 107008 107853 +0.79% BenchmarkGCD100x100000/WithXY-4 349597 340994 -2.46% BenchmarkGCD1000x1000/WithoutXY-4 63785 64434 +1.02% BenchmarkGCD1000x1000/WithXY-4 373186 334035 -10.49% BenchmarkGCD1000x10000/WithoutXY-4 78038 78241 +0.26% BenchmarkGCD1000x10000/WithXY-4 543692 507034 -6.74% BenchmarkGCD1000x100000/WithoutXY-4 205607 207727 +1.03% BenchmarkGCD1000x100000/WithXY-4 2488113 2415323 -2.93% BenchmarkGCD10000x10000/WithoutXY-4 1731340 1714992 -0.94% BenchmarkGCD10000x10000/WithXY-4 10601046 7111329 -32.92% BenchmarkGCD10000x100000/WithoutXY-4 2239155 2212173 -1.21% BenchmarkGCD10000x100000/WithXY-4 30097040 26538887 -11.82% BenchmarkGCD100000x100000/WithoutXY-4 119845326 119863916 +0.02% BenchmarkGCD100000x100000/WithXY-4 768006543 426795966 -44.43% benchmark old allocs new allocs delta BenchmarkGCD10x10/WithoutXY-4 5 5 +0.00% BenchmarkGCD10x10/WithXY-4 17 9 -47.06% BenchmarkGCD10x100/WithoutXY-4 6 6 +0.00% BenchmarkGCD10x100/WithXY-4 21 9 -57.14% BenchmarkGCD10x1000/WithoutXY-4 6 6 +0.00% BenchmarkGCD10x1000/WithXY-4 30 12 -60.00% BenchmarkGCD10x10000/WithoutXY-4 6 6 +0.00% BenchmarkGCD10x10000/WithXY-4 26 12 -53.85% BenchmarkGCD10x100000/WithoutXY-4 6 6 +0.00% BenchmarkGCD10x100000/WithXY-4 28 12 -57.14% BenchmarkGCD100x100/WithoutXY-4 5 5 +0.00% BenchmarkGCD100x100/WithXY-4 183 61 -66.67% BenchmarkGCD100x1000/WithoutXY-4 8 8 +0.00% BenchmarkGCD100x1000/WithXY-4 170 47 -72.35% BenchmarkGCD100x10000/WithoutXY-4 8 8 +0.00% BenchmarkGCD100x10000/WithXY-4 200 67 -66.50% BenchmarkGCD100x100000/WithoutXY-4 8 8 +0.00% BenchmarkGCD100x100000/WithXY-4 188 65 -65.43% BenchmarkGCD1000x1000/WithoutXY-4 5 5 +0.00% BenchmarkGCD1000x1000/WithXY-4 2435 1193 -51.01% BenchmarkGCD1000x10000/WithoutXY-4 8 8 +0.00% BenchmarkGCD1000x10000/WithXY-4 2211 1076 -51.33% BenchmarkGCD1000x100000/WithoutXY-4 8 8 +0.00% BenchmarkGCD1000x100000/WithXY-4 2271 1108 -51.21% BenchmarkGCD10000x10000/WithoutXY-4 5 5 +0.00% BenchmarkGCD10000x10000/WithXY-4 23183 11605 -49.94% BenchmarkGCD10000x100000/WithoutXY-4 8 8 +0.00% BenchmarkGCD10000x100000/WithXY-4 23421 11717 -49.97% BenchmarkGCD100000x100000/WithoutXY-4 5 5 +0.00% BenchmarkGCD100000x100000/WithXY-4 232976 116815 -49.86% benchmark old bytes new bytes delta BenchmarkGCD10x10/WithoutXY-4 208 208 +0.00% BenchmarkGCD10x10/WithXY-4 736 432 -41.30% BenchmarkGCD10x100/WithoutXY-4 256 256 +0.00% BenchmarkGCD10x100/WithXY-4 896 432 -51.79% BenchmarkGCD10x1000/WithoutXY-4 368 368 +0.00% BenchmarkGCD10x1000/WithXY-4 1856 1152 -37.93% BenchmarkGCD10x10000/WithoutXY-4 1616 1616 +0.00% BenchmarkGCD10x10000/WithXY-4 7920 7376 -6.87% BenchmarkGCD10x100000/WithoutXY-4 13776 13776 +0.00% BenchmarkGCD10x100000/WithXY-4 68800 68176 -0.91% BenchmarkGCD100x100/WithoutXY-4 208 208 +0.00% BenchmarkGCD100x100/WithXY-4 6960 2112 -69.66% BenchmarkGCD100x1000/WithoutXY-4 544 560 +2.94% BenchmarkGCD100x1000/WithXY-4 7280 2400 -67.03% BenchmarkGCD100x10000/WithoutXY-4 2896 2912 +0.55% BenchmarkGCD100x10000/WithXY-4 15280 10002 -34.54% BenchmarkGCD100x100000/WithoutXY-4 27344 27365 +0.08% BenchmarkGCD100x100000/WithXY-4 88288 83427 -5.51% BenchmarkGCD1000x1000/WithoutXY-4 544 544 +0.00% BenchmarkGCD1000x1000/WithXY-4 178288 40043 -77.54% BenchmarkGCD1000x10000/WithoutXY-4 3344 3136 -6.22% BenchmarkGCD1000x10000/WithXY-4 188720 54432 -71.16% BenchmarkGCD1000x100000/WithoutXY-4 27792 27592 -0.72% BenchmarkGCD1000x100000/WithXY-4 373872 239447 -35.95% BenchmarkGCD10000x10000/WithoutXY-4 4288 4288 +0.00% BenchmarkGCD10000x10000/WithXY-4 11935584 481875 -95.96% BenchmarkGCD10000x100000/WithoutXY-4 31296 28834 -7.87% BenchmarkGCD10000x100000/WithXY-4 13237088 1662620 -87.44% BenchmarkGCD100000x100000/WithoutXY-4 40768 40768 +0.00% BenchmarkGCD100000x100000/WithXY-4 1165518864 14256010 -98.78% Change-Id: I652b3244bd074a03f3bc9a87c282330f9e5f1507 Reviewed-on: https://go-review.googlesource.com/21506 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/math/big/gcd_test.go | 16 +++++++++++++++- src/math/big/int.go | 4 ++-- src/math/big/nat.go | 29 ++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/math/big/gcd_test.go b/src/math/big/gcd_test.go index c0b9f583000..a929bf597f4 100644 --- a/src/math/big/gcd_test.go +++ b/src/math/big/gcd_test.go @@ -20,13 +20,27 @@ func randInt(r *rand.Rand, size uint) *Int { } func runGCD(b *testing.B, aSize, bSize uint) { + b.Run("WithoutXY", func(b *testing.B) { + runGCDExt(b, aSize, bSize, false) + }) + b.Run("WithXY", func(b *testing.B) { + runGCDExt(b, aSize, bSize, true) + }) +} + +func runGCDExt(b *testing.B, aSize, bSize uint, calcXY bool) { b.StopTimer() var r = rand.New(rand.NewSource(1234)) aa := randInt(r, aSize) bb := randInt(r, bSize) + var x, y *Int + if calcXY { + x = new(Int) + y = new(Int) + } b.StartTimer() for i := 0; i < b.N; i++ { - new(Int).GCD(nil, nil, aa, bb) + new(Int).GCD(x, y, aa, bb) } } diff --git a/src/math/big/int.go b/src/math/big/int.go index 67ab7042ffe..f2a75d1cd50 100644 --- a/src/math/big/int.go +++ b/src/math/big/int.go @@ -459,11 +459,11 @@ func (z *Int) GCD(x, y, a, b *Int) *Int { q := new(Int) temp := new(Int) + r := new(Int) for len(B.abs) > 0 { - r := new(Int) q, r = q.QuoRem(A, B, r) - A, B = B, r + A, B, r = B, r, A temp.Set(X) X.Mul(X, q) diff --git a/src/math/big/nat.go b/src/math/big/nat.go index 7668b6481b3..2e65d2a7ef7 100644 --- a/src/math/big/nat.go +++ b/src/math/big/nat.go @@ -8,7 +8,10 @@ package big -import "math/rand" +import ( + "math/rand" + "sync" +) // An unsigned integer x of the form // @@ -539,6 +542,21 @@ func (z nat) div(z2, u, v nat) (q, r nat) { return } +// getNat returns a nat of len n. The contents may not be zero. +func getNat(n int) nat { + var z nat + if v := natPool.Get(); v != nil { + z = v.(nat) + } + return z.make(n) +} + +func putNat(x nat) { + natPool.Put(x) +} + +var natPool sync.Pool + // q = (uIn-r)/v, with 0 <= r < y // Uses z as storage for q, and u as storage for r if possible. // See Knuth, Volume 2, section 4.3.1, Algorithm D. @@ -557,7 +575,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { } q = z.make(m + 1) - qhatv := make(nat, n+1) + qhatv := getNat(n + 1) if alias(u, uIn) || alias(u, v) { u = nil // u is an alias for uIn or v - cannot reuse } @@ -565,10 +583,11 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { u.clear() // TODO(gri) no need to clear if we allocated a new u // D1. + var v1 nat shift := nlz(v[n-1]) if shift > 0 { // do not modify v, it may be used by another goroutine simultaneously - v1 := make(nat, n) + v1 = getNat(n) shlVU(v1, v, shift) v = v1 } @@ -609,6 +628,10 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { q[j] = qhat } + if v1 != nil { + putNat(v1) + } + putNat(qhatv) q = q.norm() shrVU(u, u, shift)