From 87cc56718adb43340b24fcf9e5f14d87c028ce88 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sun, 3 Mar 2019 13:08:40 -0800 Subject: [PATCH] math/big: optimize shlVU_g and shrVU_g MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Special case shifts by zero. Provide hints to the compiler that shifts are bounded. There are no existing benchmarks for shifts, but the Float implementation uses shifts, so we can use those. Benchmarks on amd64 with -tags=math_big_pure_go. name old time/op new time/op delta FloatString/100-8 869ns ± 3% 872ns ± 4% +0.40% (p=0.001 n=94+83) FloatString/1000-8 26.5µs ± 1% 26.4µs ± 1% -0.46% (p=0.000 n=87+96) FloatString/10000-8 2.18ms ± 2% 2.18ms ± 2% ~ (p=0.687 n=90+89) FloatString/100000-8 200ms ± 7% 197ms ± 5% -1.47% (p=0.000 n=100+90) FloatAdd/10-8 65.9ns ± 4% 64.0ns ± 4% -2.94% (p=0.000 n=92+93) FloatAdd/100-8 71.3ns ± 4% 67.4ns ± 4% -5.51% (p=0.000 n=96+93) FloatAdd/1000-8 128ns ± 1% 121ns ± 0% -5.69% (p=0.000 n=91+80) FloatAdd/10000-8 718ns ± 4% 626ns ± 4% -12.83% (p=0.000 n=99+99) FloatAdd/100000-8 6.43µs ± 3% 5.50µs ± 1% -14.50% (p=0.000 n=98+83) FloatSub/10-8 57.7ns ± 2% 57.0ns ± 4% -1.20% (p=0.000 n=89+96) FloatSub/100-8 59.9ns ± 3% 58.7ns ± 4% -2.10% (p=0.000 n=100+98) FloatSub/1000-8 94.5ns ± 1% 88.6ns ± 0% -6.16% (p=0.000 n=74+70) FloatSub/10000-8 456ns ± 1% 416ns ± 5% -8.83% (p=0.000 n=87+95) FloatSub/100000-8 4.00µs ± 1% 3.57µs ± 1% -10.87% (p=0.000 n=68+85) FloatSqrt/64-8 585ns ± 1% 579ns ± 1% -0.99% (p=0.000 n=92+90) FloatSqrt/128-8 1.26µs ± 1% 1.23µs ± 2% -2.42% (p=0.000 n=91+81) FloatSqrt/256-8 1.45µs ± 3% 1.40µs ± 1% -3.61% (p=0.000 n=96+90) FloatSqrt/1000-8 4.03µs ± 1% 3.91µs ± 1% -3.05% (p=0.000 n=90+93) FloatSqrt/10000-8 48.0µs ± 0% 47.3µs ± 1% -1.55% (p=0.000 n=90+90) FloatSqrt/100000-8 1.23ms ± 3% 1.22ms ± 4% -1.00% (p=0.000 n=99+99) FloatSqrt/1000000-8 96.7ms ± 4% 98.0ms ±10% ~ (p=0.322 n=89+99) Change-Id: I0f941c05b7c324256d7f0674559b6ba906e92ba8 Reviewed-on: https://go-review.googlesource.com/c/go/+/164967 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Robert Griesemer --- src/math/big/arith.go | 54 +++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/math/big/arith.go b/src/math/big/arith.go index f9db9118eb..611193ef18 100644 --- a/src/math/big/arith.go +++ b/src/math/big/arith.go @@ -204,32 +204,46 @@ func subVW_g(z, x []Word, y Word) (c Word) { } func shlVU_g(z, x []Word, s uint) (c Word) { - if n := len(z); n > 0 { - ŝ := _W - s - w1 := x[n-1] - c = w1 >> ŝ - for i := n - 1; i > 0; i-- { - w := w1 - w1 = x[i-1] - z[i] = w<>ŝ - } - z[0] = w1 << s + if s == 0 { + copy(z, x) + return } + if len(z) == 0 { + return + } + s &= _W - 1 // hint to the compiler that shifts by s don't need guard code + ŝ := _W - s + ŝ &= _W - 1 // ditto + w1 := x[len(z)-1] + c = w1 >> ŝ + for i := len(z) - 1; i > 0; i-- { + w := w1 + w1 = x[i-1] + z[i] = w<>ŝ + } + z[0] = w1 << s return } func shrVU_g(z, x []Word, s uint) (c Word) { - if n := len(z); n > 0 { - ŝ := _W - s - w1 := x[0] - c = w1 << ŝ - for i := 0; i < n-1; i++ { - w := w1 - w1 = x[i+1] - z[i] = w>>s | w1<<ŝ - } - z[n-1] = w1 >> s + if s == 0 { + copy(z, x) + return } + if len(z) == 0 { + return + } + s &= _W - 1 // hint to the compiler that shifts by s don't need guard code + ŝ := _W - s + ŝ &= _W - 1 // ditto + w1 := x[0] + c = w1 << ŝ + for i := 0; i < len(z)-1; i++ { + w := w1 + w1 = x[i+1] + z[i] = w>>s | w1<<ŝ + } + z[len(z)-1] = w1 >> s return }