From d092f597d7f0bef88620d7340df9079477df727f Mon Sep 17 00:00:00 2001 From: ruinan Date: Tue, 8 Nov 2022 16:49:53 +0800 Subject: [PATCH] math/bits: directly calculate quo/rem when hi is zero in Div64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit func Div64(hi, lo, y uint64) (quo, rem uint64) {...} math/bits.Div64 returns the quotient and remainder of (hi, lo) divided by y. When hi is zero, we can directly return lo/y, lo%y, which can save a lot of unnecessary calculations. The performance is measured on arm64 and the changes will only affect the arch that doesn't have the intrinsic. name old time/op new time/op delta DivWVW/1-10 4.62ns ± 1% 2.45ns ± 1% -46.97% DivWVW/2-10 12.4ns ± 0% 12.2ns ± 0% -1.38% DivWVW/3-10 17.4ns ± 1% 17.2ns ± 0% -0.88% DivWVW/4-10 21.4ns ± 1% 21.6ns ± 0% +0.75% DivWVW/5-10 22.1ns ± 1% 21.9ns ± 0% -0.69% DivWVW/10-10 53.4ns ± 1% 53.0ns ± 1% -0.69% DivWVW/100-10 641ns ± 1% 633ns ± 0% -1.26% DivWVW/1000-10 5.52µs ± 1% 5.44µs ± 0% -1.39% DivWVW/10000-10 54.9µs ± 1% 54.7µs ± 1% -0.54% DivWVW/100000-10 646µs ± 1% 643µs ± 1% ~ name old speed new speed delta DivWVW/1-10 13.8GB/s ± 1% 26.1GB/s ± 1% +88.57% DivWVW/2-10 10.3GB/s ± 0% 10.5GB/s ± 0% +1.39% DivWVW/3-10 11.1GB/s ± 1% 11.2GB/s ± 0% +0.90% DivWVW/4-10 12.0GB/s ± 1% 11.9GB/s ± 0% -0.74% DivWVW/5-10 14.5GB/s ± 1% 14.6GB/s ± 0% +0.69% DivWVW/10-10 12.0GB/s ± 1% 12.1GB/s ± 1% +0.69% DivWVW/100-10 10.0GB/s ± 1% 10.1GB/s ± 0% +1.28% DivWVW/1000-10 11.6GB/s ± 1% 11.8GB/s ± 0% +1.41% DivWVW/10000-10 11.6GB/s ± 1% 11.7GB/s ± 1% +0.54% DivWVW/100000-10 9.91GB/s ± 1% 9.95GB/s ± 1% ~ Change-Id: I12014c2e2cdb2c91608079f7502592307af9e525 Reviewed-on: https://go-review.googlesource.com/c/go/+/449776 Reviewed-by: Keith Randall Run-TryBot: Eric Fang Reviewed-by: Robert Griesemer TryBot-Result: Gopher Robot Reviewed-by: Keith Randall --- src/math/bits/bits.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go index 6ca4bfe9a60..c1c7b7978ac 100644 --- a/src/math/bits/bits.go +++ b/src/math/bits/bits.go @@ -516,10 +516,6 @@ func Div32(hi, lo, y uint32) (quo, rem uint32) { // half in parameter hi and the lower half in parameter lo. // Div64 panics for y == 0 (division by zero) or y <= hi (quotient overflow). func Div64(hi, lo, y uint64) (quo, rem uint64) { - const ( - two32 = 1 << 32 - mask32 = two32 - 1 - ) if y == 0 { panic(divideError) } @@ -527,9 +523,18 @@ func Div64(hi, lo, y uint64) (quo, rem uint64) { panic(overflowError) } + // If high part is zero, we can directly return the results. + if hi == 0 { + return lo / y, lo % y + } + s := uint(LeadingZeros64(y)) y <<= s + const ( + two32 = 1 << 32 + mask32 = two32 - 1 + ) yn1 := y >> 32 yn0 := y & mask32 un32 := hi<>(64-s)