From c1c027964e17d9c3f8acb4a9136698ae696a14e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9my=20Oudompheng?= <oudomphe@phare.normalesup.org>
Date: Sat, 1 Sep 2012 16:31:46 +0200
Subject: [PATCH] strconv: faster FormatFloat for fixed number of digits.

The performance improvement applies to the case where
prec >= 0 and fmt is 'e' or 'g'.

Additional minor optimisations are included. A small
performance impact happens in some cases due to code
refactoring.

benchmark                              old ns/op    new ns/op    delta
BenchmarkAppendFloat64Fixed1                 623          235  -62.28%
BenchmarkAppendFloat64Fixed2                1050          272  -74.10%
BenchmarkAppendFloat64Fixed3                3723          243  -93.47%
BenchmarkAppendFloat64Fixed4               10285          274  -97.34%

BenchmarkAppendFloatDecimal                  190          206   +8.42%
BenchmarkAppendFloat                         387          377   -2.58%
BenchmarkAppendFloatExp                      397          339  -14.61%
BenchmarkAppendFloatNegExp                   377          336  -10.88%
BenchmarkAppendFloatBig                      546          482  -11.72%

BenchmarkAppendFloat32Integer                188          204   +8.51%
BenchmarkAppendFloat32ExactFraction          329          298   -9.42%
BenchmarkAppendFloat32Point                  400          372   -7.00%
BenchmarkAppendFloat32Exp                    369          306  -17.07%
BenchmarkAppendFloat32NegExp                 372          305  -18.01%

R=golang-dev, rsc
CC=golang-dev, remy
https://golang.org/cl/6462049
---
 src/pkg/strconv/extfloat.go  | 205 +++++++++++++++++++++++++++++------
 src/pkg/strconv/ftoa.go      | 129 ++++++++++++++--------
 src/pkg/strconv/ftoa_test.go |  15 +++
 3 files changed, 273 insertions(+), 76 deletions(-)

diff --git a/src/pkg/strconv/extfloat.go b/src/pkg/strconv/extfloat.go
index 78bb2ba943d..6c352019401 100644
--- a/src/pkg/strconv/extfloat.go
+++ b/src/pkg/strconv/extfloat.go
@@ -4,8 +4,6 @@
 
 package strconv
 
-import "math"
-
 // An extFloat represents an extended floating-point number, with more
 // precision than a float64. It does not try to save bits: the
 // number represented by the structure is mant*(2^exp), with a negative
@@ -179,17 +177,6 @@ out:
 	return
 }
 
-// Assign sets f to the value of x.
-func (f *extFloat) Assign(x float64) {
-	if x < 0 {
-		x = -x
-		f.neg = true
-	}
-	x, f.exp = math.Frexp(x)
-	f.mant = uint64(x * float64(1<<64))
-	f.exp -= 64
-}
-
 // AssignComputeBounds sets f to the floating point value
 // defined by mant, exp and precision given by flt. It returns
 // lower, upper such that any number in the closed interval
@@ -354,16 +341,17 @@ func (f *extFloat) AssignDecimal(mantissa uint64, exp10 int, neg bool, trunc boo
 // f by an approximate power of ten 10^-exp, and returns exp10, so
 // that f*10^exp10 has the same value as the old f, up to an ulp,
 // as well as the index of 10^-exp in the powersOfTen table.
-// The arguments expMin and expMax constrain the final value of the
-// binary exponent of f.
-func (f *extFloat) frexp10(expMin, expMax int) (exp10, index int) {
-	// it is illegal to call this function with a too restrictive exponent range.
-	if expMax-expMin <= 25 {
-		panic("strconv: invalid exponent range")
-	}
+func (f *extFloat) frexp10() (exp10, index int) {
+	// The constants expMin and expMax constrain the final value of the
+	// binary exponent of f. We want a small integral part in the result
+	// because finding digits of an integer requires divisions, whereas
+	// digits of the fractional part can be found by repeatedly multiplying
+	// by 10.
+	const expMin = -60
+	const expMax = -32
 	// Find power of ten such that x * 10^n has a binary exponent
-	// between expMin and expMax
-	approxExp10 := -(f.exp + 100) * 28 / 93 // log(10)/log(2) is close to 93/28.
+	// between expMin and expMax.
+	approxExp10 := ((expMin+expMax)/2 - f.exp) * 28 / 93 // log(10)/log(2) is close to 93/28.
 	i := (approxExp10 - firstPowerOfTen) / stepPowerOfTen
 Loop:
 	for {
@@ -385,23 +373,176 @@ Loop:
 }
 
 // frexp10Many applies a common shift by a power of ten to a, b, c.
-func frexp10Many(expMin, expMax int, a, b, c *extFloat) (exp10 int) {
-	exp10, i := c.frexp10(expMin, expMax)
+func frexp10Many(a, b, c *extFloat) (exp10 int) {
+	exp10, i := c.frexp10()
 	a.Multiply(powersOfTen[i])
 	b.Multiply(powersOfTen[i])
 	return
 }
 
+// FixedDecimal stores in d the first n significant digits
+// of the decimal representation of f. It returns false
+// if it cannot be sure of the answer.
+func (f *extFloat) FixedDecimal(d *decimalSlice, n int) bool {
+	if f.mant == 0 {
+		d.nd = 0
+		d.dp = 0
+		d.neg = f.neg
+		return true
+	}
+	if n == 0 {
+		panic("strconv: internal error: extFloat.FixedDecimal called with n == 0")
+	}
+	// Multiply by an appropriate power of ten to have a reasonable
+	// number to process. 
+	f.Normalize()
+	exp10, _ := f.frexp10()
+
+	shift := uint(-f.exp)
+	integer := uint32(f.mant >> shift)
+	fraction := f.mant - (uint64(integer) << shift)
+	ε := uint64(1) // ε is the uncertainty we have on the mantissa of f.
+
+	// Write exactly n digits to d.
+	needed := n        // how many digits are left to write.
+	integerDigits := 0 // the number of decimal digits of integer.
+	pow10 := uint64(1) // the power of ten by which f was scaled.
+	for i, pow := 0, uint64(1); i < 20; i++ {
+		if pow > uint64(integer) {
+			integerDigits = i
+			break
+		}
+		pow *= 10
+	}
+	rest := integer
+	if integerDigits > needed {
+		// the integral part is already large, trim the last digits.
+		pow10 = uint64pow10[integerDigits-needed]
+		integer /= uint32(pow10)
+		rest -= integer * uint32(pow10)
+	} else {
+		rest = 0
+	}
+
+	// Write the digits of integer: the digits of rest are omitted.
+	var buf [32]byte
+	pos := len(buf)
+	for v := integer; v > 0; {
+		v1 := v / 10
+		v -= 10 * v1
+		pos--
+		buf[pos] = byte(v + '0')
+		v = v1
+	}
+	for i := pos; i < len(buf); i++ {
+		d.d[i-pos] = buf[i]
+	}
+	nd := len(buf) - pos
+	d.nd = nd
+	d.dp = integerDigits + exp10
+	needed -= nd
+
+	if needed > 0 {
+		if rest != 0 || pow10 != 1 {
+			panic("strconv: internal error, rest != 0 but needed > 0")
+		}
+		// Emit digits for the fractional part. Each time, 10*fraction
+		// fits in a uint64 without overflow.
+		for needed > 0 {
+			fraction *= 10
+			ε *= 10 // the uncertainty scales as we multiply by ten.
+			if 2*ε > 1<<shift {
+				// the error is so large it could modify which digit to write, abort.
+				return false
+			}
+			digit := fraction >> shift
+			d.d[nd] = byte(digit + '0')
+			fraction -= digit << shift
+			nd++
+			needed--
+		}
+		d.nd = nd
+	}
+
+	// We have written a truncation of f (a numerator / 10^d.dp). The remaining part
+	// can be interpreted as a small number (< 1) to be added to the last digit of the
+	// numerator.
+	//
+	// If rest > 0, the amount is:
+	//    (rest<<shift | fraction) / (pow10 << shift)
+	//    fraction being known with a ±ε uncertainty.
+	//    The fact that n > 0 guarantees that pow10 << shift does not overflow a uint64.
+	//
+	// If rest = 0, pow10 == 1 and the amount is
+	//    fraction / (1 << shift)
+	//    fraction being known with a ±ε uncertainty.
+	//
+	// We pass this information to the rounding routine for adjustment.
+
+	ok := adjustLastDigitFixed(d, uint64(rest)<<shift|fraction, pow10, shift, ε)
+	if !ok {
+		return false
+	}
+	// Trim trailing zeros.
+	for i := d.nd - 1; i >= 0; i-- {
+		if d.d[i] != '0' {
+			d.nd = i + 1
+			break
+		}
+	}
+	return true
+}
+
+// adjustLastDigitFixed assumes d contains the representation of the integral part
+// of some number, whose fractional part is num / (den << shift). The numerator
+// num is only known up to an uncertainty of size ε, assumed to be less than
+// (den << shift)/2.
+//
+// It will increase the last digit by one to account for correct rounding, typically
+// when the fractional part is greater than 1/2, and will return false if ε is such
+// that no correct answer can be given.
+func adjustLastDigitFixed(d *decimalSlice, num, den uint64, shift uint, ε uint64) bool {
+	if num > den<<shift {
+		panic("strconv: num > den<<shift in adjustLastDigitFixed")
+	}
+	if 2*ε > den<<shift {
+		panic("strconv: ε > (den<<shift)/2")
+	}
+	if 2*(num+ε) < den<<shift {
+		return true
+	}
+	if 2*(num-ε) > den<<shift {
+		// increment d by 1.
+		i := d.nd - 1
+		for ; i >= 0; i-- {
+			if d.d[i] == '9' {
+				d.nd--
+			} else {
+				break
+			}
+		}
+		if i < 0 {
+			d.d[0] = '1'
+			d.nd = 1
+			d.dp++
+		} else {
+			d.d[i]++
+		}
+		return true
+	}
+	return false
+}
+
 // ShortestDecimal stores in d the shortest decimal representation of f
 // which belongs to the open interval (lower, upper), where f is supposed
 // to lie. It returns false whenever the result is unsure. The implementation
 // uses the Grisu3 algorithm.
 func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool {
 	if f.mant == 0 {
-		d.d[0] = '0'
-		d.nd = 1
+		d.nd = 0
 		d.dp = 0
 		d.neg = f.neg
+		return true
 	}
 	if f.exp == 0 && *lower == *f && *lower == *upper {
 		// an exact integer.
@@ -428,8 +569,6 @@ func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool
 		d.neg = f.neg
 		return true
 	}
-	const minExp = -60
-	const maxExp = -32
 	upper.Normalize()
 	// Uniformize exponents.
 	if f.exp > upper.exp {
@@ -441,7 +580,7 @@ func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool
 		lower.exp = upper.exp
 	}
 
-	exp10 := frexp10Many(minExp, maxExp, lower, f, upper)
+	exp10 := frexp10Many(lower, f, upper)
 	// Take a safety margin due to rounding in frexp10Many, but we lose precision.
 	upper.mant++
 	lower.mant--
@@ -459,10 +598,12 @@ func (f *extFloat) ShortestDecimal(d *decimalSlice, lower, upper *extFloat) bool
 
 	// Count integral digits: there are at most 10.
 	var integerDigits int
-	for i, pow := range uint64pow10 {
-		if uint64(integer) >= pow {
-			integerDigits = i + 1
+	for i, pow := 0, uint64(1); i < 20; i++ {
+		if pow > uint64(integer) {
+			integerDigits = i
+			break
 		}
+		pow *= 10
 	}
 	for i := 0; i < integerDigits; i++ {
 		pow := uint64pow10[integerDigits-i-1]
diff --git a/src/pkg/strconv/ftoa.go b/src/pkg/strconv/ftoa.go
index f6eb5391642..8067881e0d1 100644
--- a/src/pkg/strconv/ftoa.go
+++ b/src/pkg/strconv/ftoa.go
@@ -98,47 +98,79 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
 		return fmtB(dst, neg, mant, exp, flt)
 	}
 
-	// Negative precision means "only as much as needed to be exact."
-	shortest := prec < 0
+	if !optimize {
+		return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
+	}
 
 	var digs decimalSlice
+	ok := false
+	// Negative precision means "only as much as needed to be exact."
+	shortest := prec < 0
 	if shortest {
-		ok := false
-		if optimize {
-			// Try Grisu3 algorithm.
-			f := new(extFloat)
-			lower, upper := f.AssignComputeBounds(mant, exp, neg, flt)
-			var buf [32]byte
-			digs.d = buf[:]
-			ok = f.ShortestDecimal(&digs, &lower, &upper)
-		}
+		// Try Grisu3 algorithm.
+		f := new(extFloat)
+		lower, upper := f.AssignComputeBounds(mant, exp, neg, flt)
+		var buf [32]byte
+		digs.d = buf[:]
+		ok = f.ShortestDecimal(&digs, &lower, &upper)
 		if !ok {
-			// Create exact decimal representation.
-			// The shift is exp - flt.mantbits because mant is a 1-bit integer
-			// followed by a flt.mantbits fraction, and we are treating it as
-			// a 1+flt.mantbits-bit integer.
-			d := new(decimal)
-			d.Assign(mant)
-			d.Shift(exp - int(flt.mantbits))
-			roundShortest(d, mant, exp, flt)
-			digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
+			return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
 		}
 		// Precision for shortest representation mode.
-		if prec < 0 {
-			switch fmt {
-			case 'e', 'E':
-				prec = digs.nd - 1
-			case 'f':
-				prec = max(digs.nd-digs.dp, 0)
-			case 'g', 'G':
-				prec = digs.nd
+		switch fmt {
+		case 'e', 'E':
+			prec = digs.nd - 1
+		case 'f':
+			prec = max(digs.nd-digs.dp, 0)
+		case 'g', 'G':
+			prec = digs.nd
+		}
+	} else if fmt != 'f' {
+		// Fixed number of digits.
+		digits := prec
+		switch fmt {
+		case 'e', 'E':
+			digits++
+		case 'g', 'G':
+			if prec == 0 {
+				prec = 1
 			}
+			digits = prec
+		}
+		if digits <= 15 {
+			// try fast algorithm when the number of digits is reasonable.
+			var buf [24]byte
+			digs.d = buf[:]
+			f := extFloat{mant, exp - int(flt.mantbits), neg}
+			ok = f.FixedDecimal(&digs, digits)
+		}
+	}
+	if !ok {
+		return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
+	}
+	return formatDigits(dst, shortest, neg, digs, prec, fmt)
+}
+
+// bigFtoa uses multiprecision computations to format a float.
+func bigFtoa(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
+	d := new(decimal)
+	d.Assign(mant)
+	d.Shift(exp - int(flt.mantbits))
+	var digs decimalSlice
+	shortest := prec < 0
+	if shortest {
+		roundShortest(d, mant, exp, flt)
+		digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
+		// Precision for shortest representation mode.
+		switch fmt {
+		case 'e', 'E':
+			prec = digs.nd - 1
+		case 'f':
+			prec = max(digs.nd-digs.dp, 0)
+		case 'g', 'G':
+			prec = digs.nd
 		}
 	} else {
-		// Create exact decimal representation.
-		d := new(decimal)
-		d.Assign(mant)
-		d.Shift(exp - int(flt.mantbits))
 		// Round appropriately.
 		switch fmt {
 		case 'e', 'E':
@@ -153,7 +185,10 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
 		}
 		digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
 	}
+	return formatDigits(dst, shortest, neg, digs, prec, fmt)
+}
 
+func formatDigits(dst []byte, shortest bool, neg bool, digs decimalSlice, prec int, fmt byte) []byte {
 	switch fmt {
 	case 'e', 'E':
 		return fmtE(dst, neg, digs, prec, fmt)
@@ -312,12 +347,15 @@ func fmtE(dst []byte, neg bool, d decimalSlice, prec int, fmt byte) []byte {
 	// .moredigits
 	if prec > 0 {
 		dst = append(dst, '.')
-		for i := 1; i <= prec; i++ {
-			ch = '0'
-			if i < d.nd {
-				ch = d.d[i]
-			}
-			dst = append(dst, ch)
+		i := 1
+		m := d.nd + prec + 1 - max(d.nd, prec+1)
+		for i < m {
+			dst = append(dst, d.d[i])
+			i++
+		}
+		for i <= prec {
+			dst = append(dst, '0')
+			i++
 		}
 	}
 
@@ -347,13 +385,16 @@ func fmtE(dst []byte, neg bool, d decimalSlice, prec int, fmt byte) []byte {
 	i--
 	buf[i] = byte(exp + '0')
 
-	// leading zeroes
-	if i > len(buf)-2 {
-		i--
-		buf[i] = '0'
+	switch i {
+	case 0:
+		dst = append(dst, buf[0], buf[1], buf[2])
+	case 1:
+		dst = append(dst, buf[1], buf[2])
+	case 2:
+		// leading zeroes
+		dst = append(dst, '0', buf[2])
 	}
-
-	return append(dst, buf[i:]...)
+	return dst
 }
 
 // %f: -ddddddd.ddddd
diff --git a/src/pkg/strconv/ftoa_test.go b/src/pkg/strconv/ftoa_test.go
index 7b06235a40e..39b861547ea 100644
--- a/src/pkg/strconv/ftoa_test.go
+++ b/src/pkg/strconv/ftoa_test.go
@@ -163,6 +163,7 @@ func TestFtoaRandom(t *testing.T) {
 	for i := 0; i < N; i++ {
 		bits := uint64(rand.Uint32())<<32 | uint64(rand.Uint32())
 		x := math.Float64frombits(bits)
+
 		shortFast := FormatFloat(x, 'g', -1, 64)
 		SetOptimize(false)
 		shortSlow := FormatFloat(x, 'g', -1, 64)
@@ -170,6 +171,15 @@ func TestFtoaRandom(t *testing.T) {
 		if shortSlow != shortFast {
 			t.Errorf("%b printed as %s, want %s", x, shortFast, shortSlow)
 		}
+
+		prec := rand.Intn(12) + 5
+		shortFast = FormatFloat(x, 'e', prec, 64)
+		SetOptimize(false)
+		shortSlow = FormatFloat(x, 'e', prec, 64)
+		SetOptimize(true)
+		if shortSlow != shortFast {
+			t.Errorf("%b printed as %s, want %s", x, shortFast, shortSlow)
+		}
 	}
 }
 
@@ -223,3 +233,8 @@ func BenchmarkAppendFloat32ExactFraction(b *testing.B) { benchmarkAppendFloat(b,
 func BenchmarkAppendFloat32Point(b *testing.B)         { benchmarkAppendFloat(b, 339.7784, 'g', -1, 32) }
 func BenchmarkAppendFloat32Exp(b *testing.B)           { benchmarkAppendFloat(b, -5.09e25, 'g', -1, 32) }
 func BenchmarkAppendFloat32NegExp(b *testing.B)        { benchmarkAppendFloat(b, -5.11e-25, 'g', -1, 32) }
+
+func BenchmarkAppendFloat64Fixed1(b *testing.B) { benchmarkAppendFloat(b, 123456, 'e', 3, 64) }
+func BenchmarkAppendFloat64Fixed2(b *testing.B) { benchmarkAppendFloat(b, 123.456, 'e', 3, 64) }
+func BenchmarkAppendFloat64Fixed3(b *testing.B) { benchmarkAppendFloat(b, 1.23456e+78, 'e', 3, 64) }
+func BenchmarkAppendFloat64Fixed4(b *testing.B) { benchmarkAppendFloat(b, 1.23456e-78, 'e', 3, 64) }