From 0575cd9de45215c069ffb15afe11599dcb409f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Oudompheng?= Date: Fri, 13 Jan 2012 23:24:33 +0100 Subject: [PATCH] strconv: faster FormatFloat(x, *, -1, 64) using Grisu3 algorithm. The implementation is similar to the one from the double-conversion library used in the Chrome V8 engine. old ns/op new ns/op speedup BenchmarkAppendFloatDecimal 591 480 1.2x BenchmarkAppendFloat 2956 486 6.1x BenchmarkAppendFloatExp 10622 503 21.1x BenchmarkAppendFloatNegExp 40343 483 83.5x BenchmarkAppendFloatBig 2798 664 4.2x See F. Loitsch, ``Printing Floating-Point Numbers Quickly and Accurately with Integers'', Proceedings of the ACM, 2010. R=rsc CC=golang-dev, remy https://golang.org/cl/5502079 --- src/pkg/strconv/extfloat.go | 190 +++++++++++++++++++++++++++++++++++ src/pkg/strconv/ftoa.go | 54 ++++++---- src/pkg/strconv/ftoa_test.go | 33 ++++++ 3 files changed, 257 insertions(+), 20 deletions(-) diff --git a/src/pkg/strconv/extfloat.go b/src/pkg/strconv/extfloat.go index 980052a778b..64ab84f4554 100644 --- a/src/pkg/strconv/extfloat.go +++ b/src/pkg/strconv/extfloat.go @@ -191,6 +191,36 @@ func (f *extFloat) Assign(x float64) { f.exp -= 64 } +// AssignComputeBounds sets f to the value of x and returns +// lower, upper such that any number in the closed interval +// [lower, upper] is converted back to x. +func (f *extFloat) AssignComputeBounds(x float64) (lower, upper extFloat) { + // Special cases. + bits := math.Float64bits(x) + flt := &float64info + neg := bits>>(flt.expbits+flt.mantbits) != 0 + expBiased := int(bits>>flt.mantbits) & (1< expMax: + i-- + default: + break Loop + } + } + // Apply the desired decimal shift on f. It will have exponent + // in the desired range. This is multiplication by 10^-exp10. + f.Multiply(powersOfTen[i]) + + return -(firstPowerOfTen + i*stepPowerOfTen), i +} + +// frexp10Many applies a common shift by a power of ten to a, b, c. +func frexp10Many(expMin, expMax int, a, b, c *extFloat) (exp10 int) { + exp10, i := c.frexp10(expMin, expMax) + a.Multiply(powersOfTen[i]) + b.Multiply(powersOfTen[i]) + return +} + +// ShortestDecimal stores in d the shortest decimal representation of f +// which belongs to the open interval (lower, upper), where f is supposed +// to lie. It returns false whenever the result is unsure. The implementation +// uses the Grisu3 algorithm. +func (f *extFloat) ShortestDecimal(d *decimal, lower, upper *extFloat) bool { + if f.mant == 0 { + d.d[0] = '0' + d.nd = 1 + d.dp = 0 + d.neg = f.neg + } + const minExp = -60 + const maxExp = -32 + upper.Normalize() + // Uniformize exponents. + if f.exp > upper.exp { + f.mant <<= uint(f.exp - upper.exp) + f.exp = upper.exp + } + if lower.exp > upper.exp { + lower.mant <<= uint(lower.exp - upper.exp) + lower.exp = upper.exp + } + + exp10 := frexp10Many(minExp, maxExp, lower, f, upper) + // Take a safety margin due to rounding in frexp10Many, but we lose precision. + upper.mant++ + lower.mant-- + + // The shortest representation of f is either rounded up or down, but + // in any case, it is a truncation of upper. + shift := uint(-upper.exp) + integer := uint32(upper.mant >> shift) + fraction := upper.mant - (uint64(integer) << shift) + + // How far we can go down from upper until the result is wrong. + allowance := upper.mant - lower.mant + // How far we should go to get a very precise result. + targetDiff := upper.mant - f.mant + + // Count integral digits: there are at most 10. + var integerDigits int + for i, pow := range uint64pow10 { + if uint64(integer) >= pow { + integerDigits = i + 1 + } + } + for i := 0; i < integerDigits; i++ { + pow := uint64pow10[integerDigits-i-1] + digit := integer / uint32(pow) + d.d[i] = byte(digit + '0') + integer -= digit * uint32(pow) + // evaluate whether we should stop. + if currentDiff := uint64(integer)<> shift) + d.d[d.nd] = byte(digit + '0') + d.nd++ + fraction -= uint64(digit) << shift + if fraction < allowance*multiplier { + // We are in the admissible range. Note that if allowance is about to + // overflow, that is, allowance > 2^64/10, the condition is automatically + // true due to the limited range of fraction. + return adjustLastDigit(d, + fraction, targetDiff*multiplier, allowance*multiplier, + 1< maxDiff-ulpBinary { + // we went too far + return false + } + if d.nd == 1 && d.d[0] == '0' { + // the number has actually reached zero. + d.nd = 0 + d.dp = 0 + } + return true +} diff --git a/src/pkg/strconv/ftoa.go b/src/pkg/strconv/ftoa.go index ab8dd2bf952..8eefbee79f2 100644 --- a/src/pkg/strconv/ftoa.go +++ b/src/pkg/strconv/ftoa.go @@ -98,29 +98,43 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte { return fmtB(dst, neg, mant, exp, flt) } - // Create exact decimal representation. - // The shift is exp - flt.mantbits because mant is a 1-bit integer - // followed by a flt.mantbits fraction, and we are treating it as - // a 1+flt.mantbits-bit integer. - d := new(decimal) - d.Assign(mant) - d.Shift(exp - int(flt.mantbits)) - - // Round appropriately. // Negative precision means "only as much as needed to be exact." - shortest := false - if prec < 0 { - shortest = true - roundShortest(d, mant, exp, flt) - switch fmt { - case 'e', 'E': - prec = d.nd - 1 - case 'f': - prec = max(d.nd-d.dp, 0) - case 'g', 'G': - prec = d.nd + shortest := prec < 0 + + d := new(decimal) + if shortest { + ok := false + if optimize && bitSize == 64 { + // Try Grisu3 algorithm. + f := new(extFloat) + lower, upper := f.AssignComputeBounds(val) + ok = f.ShortestDecimal(d, &lower, &upper) + } + if !ok { + // Create exact decimal representation. + // The shift is exp - flt.mantbits because mant is a 1-bit integer + // followed by a flt.mantbits fraction, and we are treating it as + // a 1+flt.mantbits-bit integer. + d.Assign(mant) + d.Shift(exp - int(flt.mantbits)) + roundShortest(d, mant, exp, flt) + } + // Precision for shortest representation mode. + if prec < 0 { + switch fmt { + case 'e', 'E': + prec = d.nd - 1 + case 'f': + prec = max(d.nd-d.dp, 0) + case 'g', 'G': + prec = d.nd + } } } else { + // Create exact decimal representation. + d.Assign(mant) + d.Shift(exp - int(flt.mantbits)) + // Round appropriately. switch fmt { case 'e', 'E': d.Round(prec + 1) diff --git a/src/pkg/strconv/ftoa_test.go b/src/pkg/strconv/ftoa_test.go index 1d90a67f528..7d8617a854c 100644 --- a/src/pkg/strconv/ftoa_test.go +++ b/src/pkg/strconv/ftoa_test.go @@ -6,6 +6,7 @@ package strconv_test import ( "math" + "math/rand" . "strconv" "testing" ) @@ -153,6 +154,25 @@ func TestFtoa(t *testing.T) { } } +func TestFtoaRandom(t *testing.T) { + N := int(1e4) + if testing.Short() { + N = 100 + } + t.Logf("testing %d random numbers with fast and slow FormatFloat", N) + for i := 0; i < N; i++ { + bits := uint64(rand.Uint32())<<32 | uint64(rand.Uint32()) + x := math.Float64frombits(bits) + shortFast := FormatFloat(x, 'g', -1, 64) + SetOptimize(false) + shortSlow := FormatFloat(x, 'g', -1, 64) + SetOptimize(true) + if shortSlow != shortFast { + t.Errorf("%b printed as %s, want %s", x, shortFast, shortSlow) + } + } +} + func TestAppendFloatDoesntAllocate(t *testing.T) { n := numAllocations(func() { var buf [64]byte @@ -188,6 +208,12 @@ func BenchmarkFormatFloatExp(b *testing.B) { } } +func BenchmarkFormatFloatNegExp(b *testing.B) { + for i := 0; i < b.N; i++ { + FormatFloat(-5.11e-95, 'g', -1, 64) + } +} + func BenchmarkFormatFloatBig(b *testing.B) { for i := 0; i < b.N; i++ { FormatFloat(123456789123456789123456789, 'g', -1, 64) @@ -215,6 +241,13 @@ func BenchmarkAppendFloatExp(b *testing.B) { } } +func BenchmarkAppendFloatNegExp(b *testing.B) { + dst := make([]byte, 0, 30) + for i := 0; i < b.N; i++ { + AppendFloat(dst, -5.11e-95, 'g', -1, 64) + } +} + func BenchmarkAppendFloatBig(b *testing.B) { dst := make([]byte, 0, 30) for i := 0; i < b.N; i++ {