mirror of
https://github.com/golang/go
synced 2024-09-30 05:34:35 -06:00
math/big: optimize shlVU_g and shrVU_g
Special case shifts by zero. Provide hints to the compiler that shifts are bounded. There are no existing benchmarks for shifts, but the Float implementation uses shifts, so we can use those. Benchmarks on amd64 with -tags=math_big_pure_go. name old time/op new time/op delta FloatString/100-8 869ns ± 3% 872ns ± 4% +0.40% (p=0.001 n=94+83) FloatString/1000-8 26.5µs ± 1% 26.4µs ± 1% -0.46% (p=0.000 n=87+96) FloatString/10000-8 2.18ms ± 2% 2.18ms ± 2% ~ (p=0.687 n=90+89) FloatString/100000-8 200ms ± 7% 197ms ± 5% -1.47% (p=0.000 n=100+90) FloatAdd/10-8 65.9ns ± 4% 64.0ns ± 4% -2.94% (p=0.000 n=92+93) FloatAdd/100-8 71.3ns ± 4% 67.4ns ± 4% -5.51% (p=0.000 n=96+93) FloatAdd/1000-8 128ns ± 1% 121ns ± 0% -5.69% (p=0.000 n=91+80) FloatAdd/10000-8 718ns ± 4% 626ns ± 4% -12.83% (p=0.000 n=99+99) FloatAdd/100000-8 6.43µs ± 3% 5.50µs ± 1% -14.50% (p=0.000 n=98+83) FloatSub/10-8 57.7ns ± 2% 57.0ns ± 4% -1.20% (p=0.000 n=89+96) FloatSub/100-8 59.9ns ± 3% 58.7ns ± 4% -2.10% (p=0.000 n=100+98) FloatSub/1000-8 94.5ns ± 1% 88.6ns ± 0% -6.16% (p=0.000 n=74+70) FloatSub/10000-8 456ns ± 1% 416ns ± 5% -8.83% (p=0.000 n=87+95) FloatSub/100000-8 4.00µs ± 1% 3.57µs ± 1% -10.87% (p=0.000 n=68+85) FloatSqrt/64-8 585ns ± 1% 579ns ± 1% -0.99% (p=0.000 n=92+90) FloatSqrt/128-8 1.26µs ± 1% 1.23µs ± 2% -2.42% (p=0.000 n=91+81) FloatSqrt/256-8 1.45µs ± 3% 1.40µs ± 1% -3.61% (p=0.000 n=96+90) FloatSqrt/1000-8 4.03µs ± 1% 3.91µs ± 1% -3.05% (p=0.000 n=90+93) FloatSqrt/10000-8 48.0µs ± 0% 47.3µs ± 1% -1.55% (p=0.000 n=90+90) FloatSqrt/100000-8 1.23ms ± 3% 1.22ms ± 4% -1.00% (p=0.000 n=99+99) FloatSqrt/1000000-8 96.7ms ± 4% 98.0ms ±10% ~ (p=0.322 n=89+99) Change-Id: I0f941c05b7c324256d7f0674559b6ba906e92ba8 Reviewed-on: https://go-review.googlesource.com/c/go/+/164967 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
abf8e355a8
commit
87cc56718a
@ -204,32 +204,46 @@ func subVW_g(z, x []Word, y Word) (c Word) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func shlVU_g(z, x []Word, s uint) (c Word) {
|
func shlVU_g(z, x []Word, s uint) (c Word) {
|
||||||
if n := len(z); n > 0 {
|
if s == 0 {
|
||||||
ŝ := _W - s
|
copy(z, x)
|
||||||
w1 := x[n-1]
|
return
|
||||||
c = w1 >> ŝ
|
|
||||||
for i := n - 1; i > 0; i-- {
|
|
||||||
w := w1
|
|
||||||
w1 = x[i-1]
|
|
||||||
z[i] = w<<s | w1>>ŝ
|
|
||||||
}
|
|
||||||
z[0] = w1 << s
|
|
||||||
}
|
}
|
||||||
|
if len(z) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
|
||||||
|
ŝ := _W - s
|
||||||
|
ŝ &= _W - 1 // ditto
|
||||||
|
w1 := x[len(z)-1]
|
||||||
|
c = w1 >> ŝ
|
||||||
|
for i := len(z) - 1; i > 0; i-- {
|
||||||
|
w := w1
|
||||||
|
w1 = x[i-1]
|
||||||
|
z[i] = w<<s | w1>>ŝ
|
||||||
|
}
|
||||||
|
z[0] = w1 << s
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func shrVU_g(z, x []Word, s uint) (c Word) {
|
func shrVU_g(z, x []Word, s uint) (c Word) {
|
||||||
if n := len(z); n > 0 {
|
if s == 0 {
|
||||||
ŝ := _W - s
|
copy(z, x)
|
||||||
w1 := x[0]
|
return
|
||||||
c = w1 << ŝ
|
|
||||||
for i := 0; i < n-1; i++ {
|
|
||||||
w := w1
|
|
||||||
w1 = x[i+1]
|
|
||||||
z[i] = w>>s | w1<<ŝ
|
|
||||||
}
|
|
||||||
z[n-1] = w1 >> s
|
|
||||||
}
|
}
|
||||||
|
if len(z) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
|
||||||
|
ŝ := _W - s
|
||||||
|
ŝ &= _W - 1 // ditto
|
||||||
|
w1 := x[0]
|
||||||
|
c = w1 << ŝ
|
||||||
|
for i := 0; i < len(z)-1; i++ {
|
||||||
|
w := w1
|
||||||
|
w1 = x[i+1]
|
||||||
|
z[i] = w>>s | w1<<ŝ
|
||||||
|
}
|
||||||
|
z[len(z)-1] = w1 >> s
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user