strconv: faster FormatFloat(x, *, -1, 64) using Grisu3 algorithm.

The implementation is similar to the one from the double-conversion library used in the Chrome V8 engine. old ns/op new ns/op speedup BenchmarkAppendFloatDecimal 591 480 1.2x BenchmarkAppendFloat 2956 486 6.1x BenchmarkAppendFloatExp 10622 503 21.1x BenchmarkAppendFloatNegExp 40343 483 83.5x BenchmarkAppendFloatBig 2798 664 4.2x See F. Loitsch, ``Printing Floating-Point Numbers Quickly and Accurately with Integers'', Proceedings of the ACM, 2010. R=rsc CC=golang-dev, remy https://golang.org/cl/5502079
2024-11-25 03:47:57 -07:00 · 2012-01-13 23:24:33 +01:00 · 2012-01-13 23:24:33 +01:00 · 0575cd9de4
commit 0575cd9de4
parent a5950df89e
3 changed files with 257 additions and 20 deletions
--- a/src/pkg/strconv/extfloat.go
+++ b/src/pkg/strconv/extfloat.go
@ -191,6 +191,36 @@ func (f *extFloat) Assign(x float64) {
 	f.exp -= 64
 }
 // AssignComputeBounds sets f to the value of x and returns
 // lower, upper such that any number in the closed interval
 // [lower, upper] is converted back to x.
 func (f *extFloat) AssignComputeBounds(x float64) (lower, upper extFloat) {
 	// Special cases.
 	bits := math.Float64bits(x)
 	flt := &float64info
 	neg := bits>>(flt.expbits+flt.mantbits) != 0
 	expBiased := int(bits>>flt.mantbits) & (1<<flt.expbits - 1)
 	mant := bits & (uint64(1)<<flt.mantbits - 1)
 	if expBiased == 0 {
 		// denormalized.
 		f.mant = mant
 		f.exp = 1 + flt.bias - int(flt.mantbits)
 	} else {
 		f.mant = mant | 1<<flt.mantbits
 		f.exp = expBiased + flt.bias - int(flt.mantbits)
 	}
 	f.neg = neg
 	upper = extFloat{mant: 2*f.mant + 1, exp: f.exp - 1, neg: f.neg}
 	if mant != 0 || expBiased == 1 {
 		lower = extFloat{mant: 2*f.mant - 1, exp: f.exp - 1, neg: f.neg}
 	} else {
 		lower = extFloat{mant: 4*f.mant - 1, exp: f.exp - 2, neg: f.neg}
 	}
 	return
 }
 // Normalize normalizes f so that the highest bit of the mantissa is
 // set, and returns the number by which the mantissa was left-shifted.
 func (f *extFloat) Normalize() uint {
@ -309,3 +339,163 @@ func (f *extFloat) AssignDecimal(d *decimal) (ok bool) {
 	}
 	return true
 }
 // Frexp10 is an analogue of math.Frexp for decimal powers. It scales
 // f by an approximate power of ten 10^-exp, and returns exp10, so
 // that f*10^exp10 has the same value as the old f, up to an ulp,
 // as well as the index of 10^-exp in the powersOfTen table.
 // The arguments expMin and expMax constrain the final value of the
 // binary exponent of f.
 func (f *extFloat) frexp10(expMin, expMax int) (exp10, index int) {
 	// it is illegal to call this function with a too restrictive exponent range.
 	if expMax-expMin <= 25 {
 		panic("strconv: invalid exponent range")
 	}
 	// Find power of ten such that x * 10^n has a binary exponent
 	// between expMin and expMax
 	approxExp10 := -(f.exp + 100) * 28 / 93 // log(10)/log(2) is close to 93/28.
 	i := (approxExp10 - firstPowerOfTen) / stepPowerOfTen
 Loop:
 	for {
 		exp := f.exp + powersOfTen[i].exp + 64
 		switch {
 		case exp < expMin:
 			i++
 		case exp > expMax:
 			i--
 		default:
 			break Loop
 		}
 	}
 	// Apply the desired decimal shift on f. It will have exponent
 	// in the desired range. This is multiplication by 10^-exp10.
 	f.Multiply(powersOfTen[i])
 	return -(firstPowerOfTen + i*stepPowerOfTen), i
 }
 // frexp10Many applies a common shift by a power of ten to a, b, c.
 func frexp10Many(expMin, expMax int, a, b, c *extFloat) (exp10 int) {
 	exp10, i := c.frexp10(expMin, expMax)
 	a.Multiply(powersOfTen[i])
 	b.Multiply(powersOfTen[i])
 	return
 }
 // ShortestDecimal stores in d the shortest decimal representation of f
 // which belongs to the open interval (lower, upper), where f is supposed
 // to lie. It returns false whenever the result is unsure. The implementation
 // uses the Grisu3 algorithm.
 func (f *extFloat) ShortestDecimal(d *decimal, lower, upper *extFloat) bool {
 	if f.mant == 0 {
 		d.d[0] = '0'
 		d.nd = 1
 		d.dp = 0
 		d.neg = f.neg
 	}
 	const minExp = -60
 	const maxExp = -32
 	upper.Normalize()
 	// Uniformize exponents.
 	if f.exp > upper.exp {
 		f.mant <<= uint(f.exp - upper.exp)
 		f.exp = upper.exp
 	}
 	if lower.exp > upper.exp {
 		lower.mant <<= uint(lower.exp - upper.exp)
 		lower.exp = upper.exp
 	}
 	exp10 := frexp10Many(minExp, maxExp, lower, f, upper)
 	// Take a safety margin due to rounding in frexp10Many, but we lose precision.
 	upper.mant++
 	lower.mant--
 	// The shortest representation of f is either rounded up or down, but
 	// in any case, it is a truncation of upper.
 	shift := uint(-upper.exp)
 	integer := uint32(upper.mant >> shift)
 	fraction := upper.mant - (uint64(integer) << shift)
 	// How far we can go down from upper until the result is wrong.
 	allowance := upper.mant - lower.mant
 	// How far we should go to get a very precise result.
 	targetDiff := upper.mant - f.mant
 	// Count integral digits: there are at most 10.
 	var integerDigits int
 	for i, pow := range uint64pow10 {
 		if uint64(integer) >= pow {
 			integerDigits = i + 1
 		}
 	}
 	for i := 0; i < integerDigits; i++ {
 		pow := uint64pow10[integerDigits-i-1]
 		digit := integer / uint32(pow)
 		d.d[i] = byte(digit + '0')
 		integer -= digit * uint32(pow)
 		// evaluate whether we should stop.
 		if currentDiff := uint64(integer)<<shift + fraction; currentDiff < allowance {
 			d.nd = i + 1
 			d.dp = integerDigits + exp10
 			d.neg = f.neg
 			// Sometimes allowance is so large the last digit might need to be
 			// decremented to get closer to f.
 			return adjustLastDigit(d, currentDiff, targetDiff, allowance, pow<<shift, 2)
 		}
 	}
 	d.nd = integerDigits
 	d.dp = d.nd + exp10
 	d.neg = f.neg
 	// Compute digits of the fractional part. At each step fraction does not
 	// overflow. The choice of minExp implies that fraction is less than 2^60.
 	var digit int
 	multiplier := uint64(1)
 	for {
 		fraction *= 10
 		multiplier *= 10
 		digit = int(fraction >> shift)
 		d.d[d.nd] = byte(digit + '0')
 		d.nd++
 		fraction -= uint64(digit) << shift
 		if fraction < allowance*multiplier {
 			// We are in the admissible range. Note that if allowance is about to
 			// overflow, that is, allowance > 2^64/10, the condition is automatically
 			// true due to the limited range of fraction.
 			return adjustLastDigit(d,
 				fraction, targetDiff*multiplier, allowance*multiplier,
 				1<<shift, multiplier*2)
 		}
 	}
 	return false
 }
 // adjustLastDigit modifies d = x-currentDiff*ε, to get closest to 
 // d = x-targetDiff*ε, without becoming smaller than x-maxDiff*ε.
 // It assumes that a decimal digit is worth ulpDecimal*ε, and that
 // all data is known with a error estimate of ulpBinary*ε.
 func adjustLastDigit(d *decimal, currentDiff, targetDiff, maxDiff, ulpDecimal, ulpBinary uint64) bool {
 	if ulpDecimal < 2*ulpBinary {
 		// Appromixation is too wide.
 		return false
 	}
 	for currentDiff+ulpDecimal/2+ulpBinary < targetDiff {
 		d.d[d.nd-1]--
 		currentDiff += ulpDecimal
 	}
 	if currentDiff+ulpDecimal <= targetDiff+ulpDecimal/2+ulpBinary {
 		// we have two choices, and don't know what to do.
 		return false
 	}
 	if currentDiff < ulpBinary || currentDiff > maxDiff-ulpBinary {
 		// we went too far
 		return false
 	}
 	if d.nd == 1 && d.d[0] == '0' {
 		// the number has actually reached zero.
 		d.nd = 0
 		d.dp = 0
 	}
 	return true
 }
--- a/src/pkg/strconv/ftoa.go
+++ b/src/pkg/strconv/ftoa.go
@ -98,20 +98,29 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
 		return fmtB(dst, neg, mant, exp, flt)
 	}
 	// Negative precision means "only as much as needed to be exact."
 	shortest := prec < 0
 	d := new(decimal)
 	if shortest {
 		ok := false
 		if optimize && bitSize == 64 {
 			// Try Grisu3 algorithm.
 			f := new(extFloat)
 			lower, upper := f.AssignComputeBounds(val)
 			ok = f.ShortestDecimal(d, &lower, &upper)
 		}
 		if !ok {
 			// Create exact decimal representation.
 			// The shift is exp - flt.mantbits because mant is a 1-bit integer
 			// followed by a flt.mantbits fraction, and we are treating it as
 			// a 1+flt.mantbits-bit integer.
 	d := new(decimal)
 			d.Assign(mant)
 			d.Shift(exp - int(flt.mantbits))
 	// Round appropriately.
 	// Negative precision means "only as much as needed to be exact."
 	shortest := false
 	if prec < 0 {
 		shortest = true
 			roundShortest(d, mant, exp, flt)
 		}
 		// Precision for shortest representation mode.
 		if prec < 0 {
 			switch fmt {
 			case 'e', 'E':
 				prec = d.nd - 1
@ -120,7 +129,12 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
 			case 'g', 'G':
 				prec = d.nd
 			}
 		}
 	} else {
 		// Create exact decimal representation.
 		d.Assign(mant)
 		d.Shift(exp - int(flt.mantbits))
 		// Round appropriately.
 		switch fmt {
 		case 'e', 'E':
 			d.Round(prec + 1)
--- a/src/pkg/strconv/ftoa_test.go
+++ b/src/pkg/strconv/ftoa_test.go
@ -6,6 +6,7 @@ package strconv_test
 import (
 	"math"
 	"math/rand"
 	. "strconv"
 	"testing"
 )
@ -153,6 +154,25 @@ func TestFtoa(t *testing.T) {
 	}
 }
 func TestFtoaRandom(t *testing.T) {
 	N := int(1e4)
 	if testing.Short() {
 		N = 100
 	}
 	t.Logf("testing %d random numbers with fast and slow FormatFloat", N)
 	for i := 0; i < N; i++ {
 		bits := uint64(rand.Uint32())<<32 | uint64(rand.Uint32())
 		x := math.Float64frombits(bits)
 		shortFast := FormatFloat(x, 'g', -1, 64)
 		SetOptimize(false)
 		shortSlow := FormatFloat(x, 'g', -1, 64)
 		SetOptimize(true)
 		if shortSlow != shortFast {
 			t.Errorf("%b printed as %s, want %s", x, shortFast, shortSlow)
 		}
 	}
 }
 func TestAppendFloatDoesntAllocate(t *testing.T) {
 	n := numAllocations(func() {
 		var buf [64]byte
@ -188,6 +208,12 @@ func BenchmarkFormatFloatExp(b *testing.B) {
 	}
 }
 func BenchmarkFormatFloatNegExp(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		FormatFloat(-5.11e-95, 'g', -1, 64)
 	}
 }
 func BenchmarkFormatFloatBig(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		FormatFloat(123456789123456789123456789, 'g', -1, 64)
@ -215,6 +241,13 @@ func BenchmarkAppendFloatExp(b *testing.B) {
 	}
 }
 func BenchmarkAppendFloatNegExp(b *testing.B) {
 	dst := make([]byte, 0, 30)
 	for i := 0; i < b.N; i++ {
 		AppendFloat(dst, -5.11e-95, 'g', -1, 64)
 	}
 }
 func BenchmarkAppendFloatBig(b *testing.B) {
 	dst := make([]byte, 0, 30)
 	for i := 0; i < b.N; i++ {