From 7e01b3b3879593828b89f4ff4a04667a547b22d9 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Fri, 23 Oct 2020 10:41:50 +1100 Subject: [PATCH] strconv: add eiselLemire32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does for ParseFloat(etc, 32) what commit a2eb53c571 did for ParseFloat(etc, 64). name old time/op new time/op delta Atof32Decimal-4 48.3ns ± 4% 48.8ns ± 2% ~ (p=0.548 n=5+5) Atof32Float-4 56.2ns ± 5% 54.7ns ± 3% ~ (p=0.246 n=5+5) Atof32FloatExp-4 104ns ± 0% 76ns ± 2% -27.19% (p=0.008 n=5+5) Atof32Random-4 142ns ± 2% 109ns ± 1% -23.07% (p=0.008 n=5+5) Change-Id: I6ee5a2f2d791d4fe3028f1d40aca96400120fda0 Reviewed-on: https://go-review.googlesource.com/c/go/+/264517 Trust: Nigel Tao Trust: Robert Griesemer Reviewed-by: Robert Griesemer --- src/strconv/atof.go | 4 +- src/strconv/eisel_lemire.go | 86 ++++++++++++++++++++++++++++++++++--- 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/strconv/atof.go b/src/strconv/atof.go index c0b9c1f1e0a..e61eeab1c3a 100644 --- a/src/strconv/atof.go +++ b/src/strconv/atof.go @@ -581,6 +581,8 @@ func atof32(s string) (f float32, n int, err error) { if !trunc { if f, ok := atof32exact(mantissa, exp, neg); ok { return f, n, nil + } else if f, ok = eiselLemire32(mantissa, exp, neg); ok { + return f, n, nil } } // Try another fast path. @@ -629,7 +631,7 @@ func atof64(s string) (f float64, n int, err error) { if !trunc { if f, ok := atof64exact(mantissa, exp, neg); ok { return f, n, nil - } else if f, ok = eiselLemire(mantissa, exp, neg); ok { + } else if f, ok = eiselLemire64(mantissa, exp, neg); ok { return f, n, nil } } diff --git a/src/strconv/eisel_lemire.go b/src/strconv/eisel_lemire.go index e5482706887..6c7f852eba8 100644 --- a/src/strconv/eisel_lemire.go +++ b/src/strconv/eisel_lemire.go @@ -15,14 +15,14 @@ package strconv // https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/internal/cgen/base/floatconv-submodule-code.c#L990 // // Additional testing (on over several million test strings) is done by -// https://github.com/nigeltao/parse-number-f64-test-data/blob/d085ef805be7f0e8f61066619364b2f529ea75f2/script/test-go-strconv.go +// https://github.com/nigeltao/parse-number-fxx-test-data/blob/5280dcfccf6d0b02a65ae282dad0b6d9de50e039/script/test-go-strconv.go import ( "math" "math/bits" ) -func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { +func eiselLemire64(man uint64, exp10 int, neg bool) (f float64, ok bool) { // The terse comments in this function body refer to sections of the // https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post. @@ -40,7 +40,8 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { // Normalization. clz := bits.LeadingZeros64(man) man <<= clz - retExp2 := uint64(217706*exp10>>16+1087) - uint64(clz) + const float64ExponentBias = 1023 + retExp2 := uint64(217706*exp10>>16+64+float64ExponentBias) - uint64(clz) // Multiplication. xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1]) @@ -78,8 +79,8 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { // retExp2 is a uint64. Zero or underflow means that we're in subnormal // float64 space. 0x7FF or above means that we're in Inf/NaN float64 space. // - // The if condition is equivalent to (but has fewer branches than): - // if retExp2 <= 0 || retExp2 >= 0x7FF { + // The if block is equivalent to (but has fewer branches than): + // if retExp2 <= 0 || retExp2 >= 0x7FF { etc } if retExp2-1 >= 0x7FF-1 { return 0, false } @@ -90,6 +91,81 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { return math.Float64frombits(retBits), true } +func eiselLemire32(man uint64, exp10 int, neg bool) (f float32, ok bool) { + // The terse comments in this function body refer to sections of the + // https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post. + // + // That blog post discusses the float64 flavor (11 exponent bits with a + // -1023 bias, 52 mantissa bits) of the algorithm, but the same approach + // applies to the float32 flavor (8 exponent bits with a -127 bias, 23 + // mantissa bits). The computation here happens with 64-bit values (e.g. + // man, xHi, retMantissa) before finally converting to a 32-bit float. + + // Exp10 Range. + if man == 0 { + if neg { + f = math.Float32frombits(0x80000000) // Negative zero. + } + return f, true + } + if exp10 < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < exp10 { + return 0, false + } + + // Normalization. + clz := bits.LeadingZeros64(man) + man <<= clz + const float32ExponentBias = 127 + retExp2 := uint64(217706*exp10>>16+64+float32ExponentBias) - uint64(clz) + + // Multiplication. + xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1]) + + // Wider Approximation. + if xHi&0x3F_FFFFFFFF == 0x3F_FFFFFFFF && xLo+man < man { + yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][0]) + mergedHi, mergedLo := xHi, xLo+yHi + if mergedLo < xLo { + mergedHi++ + } + if mergedHi&0x3F_FFFFFFFF == 0x3F_FFFFFFFF && mergedLo+1 == 0 && yLo+man < man { + return 0, false + } + xHi, xLo = mergedHi, mergedLo + } + + // Shifting to 54 Bits (and for float32, it's shifting to 25 bits). + msb := xHi >> 63 + retMantissa := xHi >> (msb + 38) + retExp2 -= 1 ^ msb + + // Half-way Ambiguity. + if xLo == 0 && xHi&0x3F_FFFFFFFF == 0 && retMantissa&3 == 1 { + return 0, false + } + + // From 54 to 53 Bits (and for float32, it's from 25 to 24 bits). + retMantissa += retMantissa & 1 + retMantissa >>= 1 + if retMantissa>>24 > 0 { + retMantissa >>= 1 + retExp2 += 1 + } + // retExp2 is a uint64. Zero or underflow means that we're in subnormal + // float32 space. 0xFF or above means that we're in Inf/NaN float32 space. + // + // The if block is equivalent to (but has fewer branches than): + // if retExp2 <= 0 || retExp2 >= 0xFF { etc } + if retExp2-1 >= 0xFF-1 { + return 0, false + } + retBits := retExp2<<23 | retMantissa&0x007FFFFF + if neg { + retBits |= 0x80000000 + } + return math.Float32frombits(uint32(retBits)), true +} + // detailedPowersOfTen{Min,Max}Exp10 is the power of 10 represented by the // first and last rows of detailedPowersOfTen. Both bounds are inclusive. const (