From c1c027964e17d9c3f8acb4a9136698ae696a14e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Oudompheng?= Date: Sat, 1 Sep 2012 16:31:46 +0200 Subject: [PATCH] strconv: faster FormatFloat for fixed number of digits. The performance improvement applies to the case where prec >= 0 and fmt is 'e' or 'g'. Additional minor optimisations are included. A small performance impact happens in some cases due to code refactoring. benchmark old ns/op new ns/op delta BenchmarkAppendFloat64Fixed1 623 235 -62.28% BenchmarkAppendFloat64Fixed2 1050 272 -74.10% BenchmarkAppendFloat64Fixed3 3723 243 -93.47% BenchmarkAppendFloat64Fixed4 10285 274 -97.34% BenchmarkAppendFloatDecimal 190 206 +8.42% BenchmarkAppendFloat 387 377 -2.58% BenchmarkAppendFloatExp 397 339 -14.61% BenchmarkAppendFloatNegExp 377 336 -10.88% BenchmarkAppendFloatBig 546 482 -11.72% BenchmarkAppendFloat32Integer 188 204 +8.51% BenchmarkAppendFloat32ExactFraction 329 298 -9.42% BenchmarkAppendFloat32Point 400 372 -7.00% BenchmarkAppendFloat32Exp 369 306 -17.07% BenchmarkAppendFloat32NegExp 372 305 -18.01% R=golang-dev, rsc CC=golang-dev, remy https://golang.org/cl/6462049 --- src/pkg/strconv/extfloat.go | 205 +++++++++++++++++++++++++++++------ src/pkg/strconv/ftoa.go | 129 ++++++++++++++-------- src/pkg/strconv/ftoa_test.go | 15 +++ 3 files changed, 273 insertions(+), 76 deletions(-) diff --git a/src/pkg/strconv/extfloat.go b/src/pkg/strconv/extfloat.go index 78bb2ba943d..6c352019401 100644 --- a/src/pkg/strconv/extfloat.go +++ b/src/pkg/strconv/extfloat.go @@ -4,8 +4,6 @@ package strconv -import "math" - // An extFloat represents an extended floating-point number, with more // precision than a float64. It does not try to save bits: the // number represented by the structure is mant*(2^exp), with a negative @@ -179,17 +177,6 @@ out: return } -// Assign sets f to the value of x. -func (f *extFloat) Assign(x float64) { - if x < 0 { - x = -x - f.neg = true - } - x, f.exp = math.Frexp(x) - f.mant = uint64(x * float64(1<<64)) - f.exp -= 64 -} - // AssignComputeBounds sets f to the floating point value // defined by mant, exp and precision given by flt. It returns // lower, upper such that any number in the closed interval @@ -354,16 +341,17 @@ func (f *extFloat) AssignDecimal(mantissa uint64, exp10 int, neg bool, trunc boo // f by an approximate power of ten 10^-exp, and returns exp10, so // that f*10^exp10 has the same value as the old f, up to an ulp, // as well as the index of 10^-exp in the powersOfTen table. -// The arguments expMin and expMax constrain the final value of the -// binary exponent of f. -func (f *extFloat) frexp10(expMin, expMax int) (exp10, index int) { - // it is illegal to call this function with a too restrictive exponent range. - if expMax-expMin <= 25 { - panic("strconv: invalid exponent range") - } +func (f *extFloat) frexp10() (exp10, index int) { + // The constants expMin and expMax constrain the final value of the + // binary exponent of f. We want a small integral part in the result + // because finding digits of an integer requires divisions, whereas + // digits of the fractional part can be found by repeatedly multiplying + // by 10. + const expMin = -60 + const expMax = -32 // Find power of ten such that x * 10^n has a binary exponent - // between expMin and expMax - approxExp10 := -(f.exp + 100) * 28 / 93 // log(10)/log(2) is close to 93/28. + // between expMin and expMax. + approxExp10 := ((expMin+expMax)/2 - f.exp) * 28 / 93 // log(10)/log(2) is close to 93/28. i := (approxExp10 - firstPowerOfTen) / stepPowerOfTen Loop: for { @@ -385,23 +373,176 @@ Loop: } // frexp10Many applies a common shift by a power of ten to a, b, c. -func frexp10Many(expMin, expMax int, a, b, c *extFloat) (exp10 int) { - exp10, i := c.frexp10(expMin, expMax) +func frexp10Many(a, b, c *extFloat) (exp10 int) { + exp10, i := c.frexp10() a.Multiply(powersOfTen[i]) b.Multiply(powersOfTen[i]) return } +// FixedDecimal stores in d the first n significant digits +// of the decimal representation of f. It returns false +// if it cannot be sure of the answer. +func (f *extFloat) FixedDecimal(d *decimalSlice, n int) bool { + if f.mant == 0 { + d.nd = 0 + d.dp = 0 + d.neg = f.neg + return true + } + if n == 0 { + panic("strconv: internal error: extFloat.FixedDecimal called with n == 0") + } + // Multiply by an appropriate power of ten to have a reasonable + // number to process. + f.Normalize() + exp10, _ := f.frexp10() + + shift := uint(-f.exp) + integer := uint32(f.mant >> shift) + fraction := f.mant - (uint64(integer) << shift) + ε := uint64(1) // ε is the uncertainty we have on the mantissa of f. + + // Write exactly n digits to d. + needed := n // how many digits are left to write. + integerDigits := 0 // the number of decimal digits of integer. + pow10 := uint64(1) // the power of ten by which f was scaled. + for i, pow := 0, uint64(1); i < 20; i++ { + if pow > uint64(integer) { + integerDigits = i + break + } + pow *= 10 + } + rest := integer + if integerDigits > needed { + // the integral part is already large, trim the last digits. + pow10 = uint64pow10[integerDigits-needed] + integer /= uint32(pow10) + rest -= integer * uint32(pow10) + } else { + rest = 0 + } + + // Write the digits of integer: the digits of rest are omitted. + var buf [32]byte + pos := len(buf) + for v := integer; v > 0; { + v1 := v / 10 + v -= 10 * v1 + pos-- + buf[pos] = byte(v + '0') + v = v1 + } + for i := pos; i < len(buf); i++ { + d.d[i-pos] = buf[i] + } + nd := len(buf) - pos + d.nd = nd + d.dp = integerDigits + exp10 + needed -= nd + + if needed > 0 { + if rest != 0 || pow10 != 1 { + panic("strconv: internal error, rest != 0 but needed > 0") + } + // Emit digits for the fractional part. Each time, 10*fraction + // fits in a uint64 without overflow. + for needed > 0 { + fraction *= 10 + ε *= 10 // the uncertainty scales as we multiply by ten. + if 2*ε > 1<> shift + d.d[nd] = byte(digit + '0') + fraction -= digit << shift + nd++ + needed-- + } + d.nd = nd + } + + // We have written a truncation of f (a numerator / 10^d.dp). The remaining part + // can be interpreted as a small number (< 1) to be added to the last digit of the + // numerator. + // + // If rest > 0, the amount is: + // (rest< 0 guarantees that pow10 << shift does not overflow a uint64. + // + // If rest = 0, pow10 == 1 and the amount is + // fraction / (1 << shift) + // fraction being known with a ±ε uncertainty. + // + // We pass this information to the rounding routine for adjustment. + + ok := adjustLastDigitFixed(d, uint64(rest)<= 0; i-- { + if d.d[i] != '0' { + d.nd = i + 1 + break + } + } + return true +} + +// adjustLastDigitFixed assumes d contains the representation of the integral part +// of some number, whose fractional part is num / (den << shift). The numerator +// num is only known up to an uncertainty of size ε, assumed to be less than +// (den << shift)/2. +// +// It will increase the last digit by one to account for correct rounding, typically +// when the fractional part is greater than 1/2, and will return false if ε is such +// that no correct answer can be given. +func adjustLastDigitFixed(d *decimalSlice, num, den uint64, shift uint, ε uint64) bool { + if num > den< den< den< (den< den<= 0; i-- { + if d.d[i] == '9' { + d.nd-- + } else { + break + } + } + if i < 0 { + d.d[0] = '1' + d.nd = 1 + d.dp++ + } else { + d.d[i]++ + } + return true + } + return false +} + // ShortestDecimal stores in d the shortest decimal representation of f // which belongs to the open interval (lower, upper), where f is supposed // to lie. It returns false whenever the result is unsure. The implementation // uses the Grisu3 algorithm. func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool { if f.mant == 0 { - d.d[0] = '0' - d.nd = 1 + d.nd = 0 d.dp = 0 d.neg = f.neg + return true } if f.exp == 0 && *lower == *f && *lower == *upper { // an exact integer. @@ -428,8 +569,6 @@ func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool d.neg = f.neg return true } - const minExp = -60 - const maxExp = -32 upper.Normalize() // Uniformize exponents. if f.exp > upper.exp { @@ -441,7 +580,7 @@ func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool lower.exp = upper.exp } - exp10 := frexp10Many(minExp, maxExp, lower, f, upper) + exp10 := frexp10Many(lower, f, upper) // Take a safety margin due to rounding in frexp10Many, but we lose precision. upper.mant++ lower.mant-- @@ -459,10 +598,12 @@ func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool // Count integral digits: there are at most 10. var integerDigits int - for i, pow := range uint64pow10 { - if uint64(integer) >= pow { - integerDigits = i + 1 + for i, pow := 0, uint64(1); i < 20; i++ { + if pow > uint64(integer) { + integerDigits = i + break } + pow *= 10 } for i := 0; i < integerDigits; i++ { pow := uint64pow10[integerDigits-i-1] diff --git a/src/pkg/strconv/ftoa.go b/src/pkg/strconv/ftoa.go index f6eb5391642..8067881e0d1 100644 --- a/src/pkg/strconv/ftoa.go +++ b/src/pkg/strconv/ftoa.go @@ -98,47 +98,79 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte { return fmtB(dst, neg, mant, exp, flt) } - // Negative precision means "only as much as needed to be exact." - shortest := prec < 0 + if !optimize { + return bigFtoa(dst, prec, fmt, neg, mant, exp, flt) + } var digs decimalSlice + ok := false + // Negative precision means "only as much as needed to be exact." + shortest := prec < 0 if shortest { - ok := false - if optimize { - // Try Grisu3 algorithm. - f := new(extFloat) - lower, upper := f.AssignComputeBounds(mant, exp, neg, flt) - var buf [32]byte - digs.d = buf[:] - ok = f.ShortestDecimal(&digs, &lower, &upper) - } + // Try Grisu3 algorithm. + f := new(extFloat) + lower, upper := f.AssignComputeBounds(mant, exp, neg, flt) + var buf [32]byte + digs.d = buf[:] + ok = f.ShortestDecimal(&digs, &lower, &upper) if !ok { - // Create exact decimal representation. - // The shift is exp - flt.mantbits because mant is a 1-bit integer - // followed by a flt.mantbits fraction, and we are treating it as - // a 1+flt.mantbits-bit integer. - d := new(decimal) - d.Assign(mant) - d.Shift(exp - int(flt.mantbits)) - roundShortest(d, mant, exp, flt) - digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp} + return bigFtoa(dst, prec, fmt, neg, mant, exp, flt) } // Precision for shortest representation mode. - if prec < 0 { - switch fmt { - case 'e', 'E': - prec = digs.nd - 1 - case 'f': - prec = max(digs.nd-digs.dp, 0) - case 'g', 'G': - prec = digs.nd + switch fmt { + case 'e', 'E': + prec = digs.nd - 1 + case 'f': + prec = max(digs.nd-digs.dp, 0) + case 'g', 'G': + prec = digs.nd + } + } else if fmt != 'f' { + // Fixed number of digits. + digits := prec + switch fmt { + case 'e', 'E': + digits++ + case 'g', 'G': + if prec == 0 { + prec = 1 } + digits = prec + } + if digits <= 15 { + // try fast algorithm when the number of digits is reasonable. + var buf [24]byte + digs.d = buf[:] + f := extFloat{mant, exp - int(flt.mantbits), neg} + ok = f.FixedDecimal(&digs, digits) + } + } + if !ok { + return bigFtoa(dst, prec, fmt, neg, mant, exp, flt) + } + return formatDigits(dst, shortest, neg, digs, prec, fmt) +} + +// bigFtoa uses multiprecision computations to format a float. +func bigFtoa(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte { + d := new(decimal) + d.Assign(mant) + d.Shift(exp - int(flt.mantbits)) + var digs decimalSlice + shortest := prec < 0 + if shortest { + roundShortest(d, mant, exp, flt) + digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp} + // Precision for shortest representation mode. + switch fmt { + case 'e', 'E': + prec = digs.nd - 1 + case 'f': + prec = max(digs.nd-digs.dp, 0) + case 'g', 'G': + prec = digs.nd } } else { - // Create exact decimal representation. - d := new(decimal) - d.Assign(mant) - d.Shift(exp - int(flt.mantbits)) // Round appropriately. switch fmt { case 'e', 'E': @@ -153,7 +185,10 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte { } digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp} } + return formatDigits(dst, shortest, neg, digs, prec, fmt) +} +func formatDigits(dst []byte, shortest bool, neg bool, digs decimalSlice, prec int, fmt byte) []byte { switch fmt { case 'e', 'E': return fmtE(dst, neg, digs, prec, fmt) @@ -312,12 +347,15 @@ func fmtE(dst []byte, neg bool, d decimalSlice, prec int, fmt byte) []byte { // .moredigits if prec > 0 { dst = append(dst, '.') - for i := 1; i <= prec; i++ { - ch = '0' - if i < d.nd { - ch = d.d[i] - } - dst = append(dst, ch) + i := 1 + m := d.nd + prec + 1 - max(d.nd, prec+1) + for i < m { + dst = append(dst, d.d[i]) + i++ + } + for i <= prec { + dst = append(dst, '0') + i++ } } @@ -347,13 +385,16 @@ func fmtE(dst []byte, neg bool, d decimalSlice, prec int, fmt byte) []byte { i-- buf[i] = byte(exp + '0') - // leading zeroes - if i > len(buf)-2 { - i-- - buf[i] = '0' + switch i { + case 0: + dst = append(dst, buf[0], buf[1], buf[2]) + case 1: + dst = append(dst, buf[1], buf[2]) + case 2: + // leading zeroes + dst = append(dst, '0', buf[2]) } - - return append(dst, buf[i:]...) + return dst } // %f: -ddddddd.ddddd diff --git a/src/pkg/strconv/ftoa_test.go b/src/pkg/strconv/ftoa_test.go index 7b06235a40e..39b861547ea 100644 --- a/src/pkg/strconv/ftoa_test.go +++ b/src/pkg/strconv/ftoa_test.go @@ -163,6 +163,7 @@ func TestFtoaRandom(t *testing.T) { for i := 0; i < N; i++ { bits := uint64(rand.Uint32())<<32 | uint64(rand.Uint32()) x := math.Float64frombits(bits) + shortFast := FormatFloat(x, 'g', -1, 64) SetOptimize(false) shortSlow := FormatFloat(x, 'g', -1, 64) @@ -170,6 +171,15 @@ func TestFtoaRandom(t *testing.T) { if shortSlow != shortFast { t.Errorf("%b printed as %s, want %s", x, shortFast, shortSlow) } + + prec := rand.Intn(12) + 5 + shortFast = FormatFloat(x, 'e', prec, 64) + SetOptimize(false) + shortSlow = FormatFloat(x, 'e', prec, 64) + SetOptimize(true) + if shortSlow != shortFast { + t.Errorf("%b printed as %s, want %s", x, shortFast, shortSlow) + } } } @@ -223,3 +233,8 @@ func BenchmarkAppendFloat32ExactFraction(b *testing.B) { benchmarkAppendFloat(b, func BenchmarkAppendFloat32Point(b *testing.B) { benchmarkAppendFloat(b, 339.7784, 'g', -1, 32) } func BenchmarkAppendFloat32Exp(b *testing.B) { benchmarkAppendFloat(b, -5.09e25, 'g', -1, 32) } func BenchmarkAppendFloat32NegExp(b *testing.B) { benchmarkAppendFloat(b, -5.11e-25, 'g', -1, 32) } + +func BenchmarkAppendFloat64Fixed1(b *testing.B) { benchmarkAppendFloat(b, 123456, 'e', 3, 64) } +func BenchmarkAppendFloat64Fixed2(b *testing.B) { benchmarkAppendFloat(b, 123.456, 'e', 3, 64) } +func BenchmarkAppendFloat64Fixed3(b *testing.B) { benchmarkAppendFloat(b, 1.23456e+78, 'e', 3, 64) } +func BenchmarkAppendFloat64Fixed4(b *testing.B) { benchmarkAppendFloat(b, 1.23456e-78, 'e', 3, 64) }