mirror of
https://github.com/golang/go
synced 2024-11-26 05:07:59 -07:00
strconv: Implement Ryū algorithm for ftoa shortest mode
This patch implements the algorithm from Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369) for formatting floating-point numbers with a fixed number of decimal digits. It is not a direct translation of the reference C implementation but still follows the original paper. In particular, it uses full 128-bit powers of 10, which allows for more precision in the other modes (fixed ftoa, atof). name old time/op new time/op delta AppendFloat/Decimal-4 49.6ns ± 3% 59.3ns ± 0% +19.59% (p=0.008 n=5+5) AppendFloat/Float-4 122ns ± 1% 91ns ± 1% -25.92% (p=0.008 n=5+5) AppendFloat/Exp-4 89.3ns ± 1% 100.0ns ± 1% +11.98% (p=0.008 n=5+5) AppendFloat/NegExp-4 88.3ns ± 2% 97.1ns ± 1% +9.87% (p=0.008 n=5+5) AppendFloat/LongExp-4 143ns ± 2% 103ns ± 0% -28.17% (p=0.016 n=5+4) AppendFloat/Big-4 144ns ± 1% 110ns ± 1% -23.26% (p=0.008 n=5+5) AppendFloat/BinaryExp-4 46.2ns ± 2% 46.0ns ± 1% ~ (p=0.603 n=5+5) AppendFloat/32Integer-4 49.1ns ± 1% 58.7ns ± 1% +19.57% (p=0.008 n=5+5) AppendFloat/32ExactFraction-4 95.6ns ± 1% 88.6ns ± 1% -7.30% (p=0.008 n=5+5) AppendFloat/32Point-4 122ns ± 1% 87ns ± 1% -28.63% (p=0.008 n=5+5) AppendFloat/32Exp-4 88.6ns ± 2% 95.0ns ± 1% +7.29% (p=0.008 n=5+5) AppendFloat/32NegExp-4 87.2ns ± 1% 91.3ns ± 1% +4.63% (p=0.008 n=5+5) AppendFloat/32Shortest-4 107ns ± 1% 82ns ± 0% -24.08% (p=0.008 n=5+5) AppendFloat/Slowpath64-4 1.00µs ± 1% 0.10µs ± 0% -89.92% (p=0.016 n=5+4) AppendFloat/SlowpathDenormal64-4 34.1µs ± 3% 0.1µs ± 1% -99.72% (p=0.008 n=5+5) Fixes #15672 Change-Id: Ib90dfa245f62490a6666671896013cf3f9a1fb22 Reviewed-on: https://go-review.googlesource.com/c/go/+/170080 Trust: Emmanuel Odeke <emmanuel@orijtech.com> Trust: Nigel Tao <nigeltao@golang.org> Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
parent
0184b445c0
commit
61a08fc6ce
@ -113,15 +113,11 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
|
||||
// Negative precision means "only as much as needed to be exact."
|
||||
shortest := prec < 0
|
||||
if shortest {
|
||||
// Try Grisu3 algorithm.
|
||||
f := new(extFloat)
|
||||
lower, upper := f.AssignComputeBounds(mant, exp, neg, flt)
|
||||
// Use Ryu algorithm.
|
||||
var buf [32]byte
|
||||
digs.d = buf[:]
|
||||
ok = f.ShortestDecimal(&digs, &lower, &upper)
|
||||
if !ok {
|
||||
return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
|
||||
}
|
||||
ryuFtoaShortest(&digs, mant, exp-int(flt.mantbits), flt)
|
||||
ok = true
|
||||
// Precision for shortest representation mode.
|
||||
switch fmt {
|
||||
case 'e', 'E':
|
||||
|
@ -40,6 +40,7 @@ var ftoatests = []ftoaTest{
|
||||
{200000, 'x', -1, "0x1.86ap+17"},
|
||||
{200000, 'X', -1, "0X1.86AP+17"},
|
||||
{2000000, 'g', -1, "2e+06"},
|
||||
{1e10, 'g', -1, "1e+10"},
|
||||
|
||||
// g conversion and zero suppression
|
||||
{400, 'g', 2, "4e+02"},
|
||||
@ -84,6 +85,7 @@ var ftoatests = []ftoaTest{
|
||||
{1.2355, 'f', 3, "1.236"},
|
||||
{1234567890123456.5, 'e', 15, "1.234567890123456e+15"},
|
||||
{1234567890123457.5, 'e', 15, "1.234567890123458e+15"},
|
||||
{108678236358137.625, 'g', -1, "1.0867823635813762e+14"},
|
||||
|
||||
{1e23, 'e', 17, "9.99999999999999916e+22"},
|
||||
{1e23, 'f', 17, "99999999999999991611392.00000000000000000"},
|
||||
@ -191,6 +193,25 @@ func TestFtoa(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFtoaPowersOfTwo(t *testing.T) {
|
||||
for exp := -2048; exp <= 2048; exp++ {
|
||||
f := math.Ldexp(1, exp)
|
||||
if !math.IsInf(f, 0) {
|
||||
s := FormatFloat(f, 'e', -1, 64)
|
||||
if x, _ := ParseFloat(s, 64); x != f {
|
||||
t.Errorf("failed roundtrip %v => %s => %v", f, s, x)
|
||||
}
|
||||
}
|
||||
f32 := float32(f)
|
||||
if !math.IsInf(float64(f32), 0) {
|
||||
s := FormatFloat(float64(f32), 'e', -1, 32)
|
||||
if x, _ := ParseFloat(s, 32); float32(x) != f32 {
|
||||
t.Errorf("failed roundtrip %v => %s => %v", f32, s, float32(x))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFtoaRandom(t *testing.T) {
|
||||
N := int(1e4)
|
||||
if testing.Short() {
|
||||
@ -240,6 +261,7 @@ var ftoaBenches = []struct {
|
||||
{"Float", 339.7784, 'g', -1, 64},
|
||||
{"Exp", -5.09e75, 'g', -1, 64},
|
||||
{"NegExp", -5.11e-95, 'g', -1, 64},
|
||||
{"LongExp", 1.234567890123456e-78, 'g', -1, 64},
|
||||
|
||||
{"Big", 123456789123456789123456789, 'g', -1, 64},
|
||||
{"BinaryExp", -1, 'b', -1, 64},
|
||||
@ -249,6 +271,7 @@ var ftoaBenches = []struct {
|
||||
{"32Point", 339.7784, 'g', -1, 32},
|
||||
{"32Exp", -5.09e25, 'g', -1, 32},
|
||||
{"32NegExp", -5.11e-25, 'g', -1, 32},
|
||||
{"32Shortest", 1.234567e-8, 'g', -1, 32},
|
||||
{"32Fixed8Hard", math.Ldexp(15961084, -125), 'e', 8, 32},
|
||||
{"32Fixed9Hard", math.Ldexp(14855922, -83), 'e', 9, 32},
|
||||
|
||||
@ -264,7 +287,14 @@ var ftoaBenches = []struct {
|
||||
{"64Fixed18Hard", math.Ldexp(6994187472632449, 690), 'e', 18, 64},
|
||||
|
||||
// Trigger slow path (see issue #15672).
|
||||
{"Slowpath64", 622666234635.3213e-320, 'e', -1, 64},
|
||||
// The shortest is: 8.034137530808823e+43
|
||||
{"Slowpath64", 8.03413753080882349e+43, 'e', -1, 64},
|
||||
// This denormal is pathological because the lower/upper
|
||||
// halfways to neighboring floats are:
|
||||
// 622666234635.321003e-320 ~= 622666234635.321e-320
|
||||
// 622666234635.321497e-320 ~= 622666234635.3215e-320
|
||||
// making it hard to find the 3rd digit
|
||||
{"SlowpathDenormal64", 622666234635.3213e-320, 'e', -1, 64},
|
||||
}
|
||||
|
||||
func BenchmarkFormatFloat(b *testing.B) {
|
||||
|
@ -218,6 +218,109 @@ func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int
|
||||
d.dp = d.nd + trimmed
|
||||
}
|
||||
|
||||
// ryuFtoaShortest formats mant*2^exp with prec decimal digits.
|
||||
func ryuFtoaShortest(d *decimalSlice, mant uint64, exp int, flt *floatInfo) {
|
||||
if mant == 0 {
|
||||
d.nd, d.dp = 0, 0
|
||||
return
|
||||
}
|
||||
// If input is an exact integer with fewer bits than the mantissa,
|
||||
// the previous and next integer are not admissible representations.
|
||||
if exp <= 0 && bits.TrailingZeros64(mant) >= -exp {
|
||||
mant >>= uint(-exp)
|
||||
ryuDigits(d, mant, mant, mant, true, false)
|
||||
return
|
||||
}
|
||||
ml, mc, mu, e2 := computeBounds(mant, exp, flt)
|
||||
if e2 == 0 {
|
||||
ryuDigits(d, ml, mc, mu, true, false)
|
||||
return
|
||||
}
|
||||
// Find 10^q *larger* than 2^-e2
|
||||
q := mulByLog2Log10(-e2) + 1
|
||||
|
||||
// We are going to multiply by 10^q using 128-bit arithmetic.
|
||||
// The exponent is the same for all 3 numbers.
|
||||
var dl, dc, du uint64
|
||||
var dl0, dc0, du0 bool
|
||||
if flt == &float32info {
|
||||
var dl32, dc32, du32 uint32
|
||||
dl32, _, dl0 = mult64bitPow10(uint32(ml), e2, q)
|
||||
dc32, _, dc0 = mult64bitPow10(uint32(mc), e2, q)
|
||||
du32, e2, du0 = mult64bitPow10(uint32(mu), e2, q)
|
||||
dl, dc, du = uint64(dl32), uint64(dc32), uint64(du32)
|
||||
} else {
|
||||
dl, _, dl0 = mult128bitPow10(ml, e2, q)
|
||||
dc, _, dc0 = mult128bitPow10(mc, e2, q)
|
||||
du, e2, du0 = mult128bitPow10(mu, e2, q)
|
||||
}
|
||||
if e2 >= 0 {
|
||||
panic("not enough significant bits after mult128bitPow10")
|
||||
}
|
||||
// Is it an exact computation?
|
||||
if q > 55 {
|
||||
// Large positive powers of ten are not exact
|
||||
dl0, dc0, du0 = false, false, false
|
||||
}
|
||||
if q < 0 && q >= -24 {
|
||||
// Division by a power of ten may be exact.
|
||||
// (note that 5^25 is a 59-bit number so division by 5^25 is never exact).
|
||||
if divisibleByPower5(ml, -q) {
|
||||
dl0 = true
|
||||
}
|
||||
if divisibleByPower5(mc, -q) {
|
||||
dc0 = true
|
||||
}
|
||||
if divisibleByPower5(mu, -q) {
|
||||
du0 = true
|
||||
}
|
||||
}
|
||||
// Express the results (dl, dc, du)*2^e2 as integers.
|
||||
// Extra bits must be removed and rounding hints computed.
|
||||
extra := uint(-e2)
|
||||
extraMask := uint64(1<<extra - 1)
|
||||
// Now compute the floored, integral base 10 mantissas.
|
||||
dl, fracl := dl>>extra, dl&extraMask
|
||||
dc, fracc := dc>>extra, dc&extraMask
|
||||
du, fracu := du>>extra, du&extraMask
|
||||
// Is it allowed to use 'du' as a result?
|
||||
// It is always allowed when it is truncated, but also
|
||||
// if it is exact and the original binary mantissa is even
|
||||
// When disallowed, we can substract 1.
|
||||
uok := !du0 || fracu > 0
|
||||
if du0 && fracu == 0 {
|
||||
uok = mant&1 == 0
|
||||
}
|
||||
if !uok {
|
||||
du--
|
||||
}
|
||||
// Is 'dc' the correctly rounded base 10 mantissa?
|
||||
// The correct rounding might be dc+1
|
||||
cup := false // don't round up.
|
||||
if dc0 {
|
||||
// If we computed an exact product, the half integer
|
||||
// should round to next (even) integer if 'dc' is odd.
|
||||
cup = fracc > 1<<(extra-1) ||
|
||||
(fracc == 1<<(extra-1) && dc&1 == 1)
|
||||
} else {
|
||||
// otherwise, the result is a lower truncation of the ideal
|
||||
// result.
|
||||
cup = fracc>>(extra-1) == 1
|
||||
}
|
||||
// Is 'dl' an allowed representation?
|
||||
// Only if it is an exact value, and if the original binary mantissa
|
||||
// was even.
|
||||
lok := dl0 && fracl == 0 && (mant&1 == 0)
|
||||
if !lok {
|
||||
dl++
|
||||
}
|
||||
// We need to remember whether the trimmed digits of 'dc' are zero.
|
||||
c0 := dc0 && fracc == 0
|
||||
// render digits
|
||||
ryuDigits(d, dl, dc, du, c0, cup)
|
||||
d.dp -= q
|
||||
}
|
||||
|
||||
// mulByLog2Log10 returns math.Floor(x * log(2)/log(10)) for an integer x in
|
||||
// the range -1600 <= x && x <= +1600.
|
||||
//
|
||||
@ -238,6 +341,140 @@ func mulByLog10Log2(x int) int {
|
||||
return (x * 108853) >> 15
|
||||
}
|
||||
|
||||
// computeBounds returns a floating-point vector (l, c, u)×2^e2
|
||||
// where the mantissas are 55-bit (or 26-bit) integers, describing the interval
|
||||
// represented by the input float64 or float32.
|
||||
func computeBounds(mant uint64, exp int, flt *floatInfo) (lower, central, upper uint64, e2 int) {
|
||||
if mant != 1<<flt.mantbits || exp == flt.bias+1-int(flt.mantbits) {
|
||||
// regular case (or denormals)
|
||||
lower, central, upper = 2*mant-1, 2*mant, 2*mant+1
|
||||
e2 = exp - 1
|
||||
return
|
||||
} else {
|
||||
// border of an exponent
|
||||
lower, central, upper = 4*mant-1, 4*mant, 4*mant+2
|
||||
e2 = exp - 2
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func ryuDigits(d *decimalSlice, lower, central, upper uint64,
|
||||
c0, cup bool) {
|
||||
lhi, llo := divmod1e9(lower)
|
||||
chi, clo := divmod1e9(central)
|
||||
uhi, ulo := divmod1e9(upper)
|
||||
if uhi == 0 {
|
||||
// only low digits (for denormals)
|
||||
ryuDigits32(d, llo, clo, ulo, c0, cup, 8)
|
||||
} else if lhi < uhi {
|
||||
// truncate 9 digits at once.
|
||||
if llo != 0 {
|
||||
lhi++
|
||||
}
|
||||
c0 = c0 && clo == 0
|
||||
cup = (clo > 5e8) || (clo == 5e8 && cup)
|
||||
ryuDigits32(d, lhi, chi, uhi, c0, cup, 8)
|
||||
d.dp += 9
|
||||
} else {
|
||||
d.nd = 0
|
||||
// emit high part
|
||||
n := uint(9)
|
||||
for v := chi; v > 0; {
|
||||
v1, v2 := v/10, v%10
|
||||
v = v1
|
||||
n--
|
||||
d.d[n] = byte(v2 + '0')
|
||||
}
|
||||
d.d = d.d[n:]
|
||||
d.nd = int(9 - n)
|
||||
// emit low part
|
||||
ryuDigits32(d, llo, clo, ulo,
|
||||
c0, cup, d.nd+8)
|
||||
}
|
||||
// trim trailing zeros
|
||||
for d.nd > 0 && d.d[d.nd-1] == '0' {
|
||||
d.nd--
|
||||
}
|
||||
// trim initial zeros
|
||||
for d.nd > 0 && d.d[0] == '0' {
|
||||
d.nd--
|
||||
d.dp--
|
||||
d.d = d.d[1:]
|
||||
}
|
||||
}
|
||||
|
||||
// ryuDigits32 emits decimal digits for a number less than 1e9.
|
||||
func ryuDigits32(d *decimalSlice, lower, central, upper uint32,
|
||||
c0, cup bool, endindex int) {
|
||||
if upper == 0 {
|
||||
d.dp = endindex + 1
|
||||
return
|
||||
}
|
||||
trimmed := 0
|
||||
// Remember last trimmed digit to check for round-up.
|
||||
// c0 will be used to remember zeroness of following digits.
|
||||
cNextDigit := 0
|
||||
for upper > 0 {
|
||||
// Repeatedly compute:
|
||||
// l = Ceil(lower / 10^k)
|
||||
// c = Round(central / 10^k)
|
||||
// u = Floor(upper / 10^k)
|
||||
// and stop when c goes out of the (l, u) interval.
|
||||
l := (lower + 9) / 10
|
||||
c, cdigit := central/10, central%10
|
||||
u := upper / 10
|
||||
if l > u {
|
||||
// don't trim the last digit as it is forbidden to go below l
|
||||
// other, trim and exit now.
|
||||
break
|
||||
}
|
||||
// Check that we didn't cross the lower boundary.
|
||||
// The case where l < u but c == l-1 is essentially impossible,
|
||||
// but may happen if:
|
||||
// lower = ..11
|
||||
// central = ..19
|
||||
// upper = ..31
|
||||
// and means that 'central' is very close but less than
|
||||
// an integer ending with many zeros, and usually
|
||||
// the "round-up" logic hides the problem.
|
||||
if l == c+1 && c < u {
|
||||
c++
|
||||
cdigit = 0
|
||||
cup = false
|
||||
}
|
||||
trimmed++
|
||||
// Remember trimmed digits of c
|
||||
c0 = c0 && cNextDigit == 0
|
||||
cNextDigit = int(cdigit)
|
||||
lower, central, upper = l, c, u
|
||||
}
|
||||
// should we round up?
|
||||
if trimmed > 0 {
|
||||
cup = cNextDigit > 5 ||
|
||||
(cNextDigit == 5 && !c0) ||
|
||||
(cNextDigit == 5 && c0 && central&1 == 1)
|
||||
}
|
||||
if central < upper && cup {
|
||||
central++
|
||||
}
|
||||
// We know where the number ends, fill directly
|
||||
endindex -= trimmed
|
||||
v := central
|
||||
n := endindex
|
||||
for n > d.nd {
|
||||
v1, v2 := v/100, v%100
|
||||
d.d[n] = smallsString[2*v2+1]
|
||||
d.d[n-1] = smallsString[2*v2+0]
|
||||
n -= 2
|
||||
v = v1
|
||||
}
|
||||
if n == d.nd {
|
||||
d.d[n] = byte(v + '0')
|
||||
}
|
||||
d.nd = endindex + 1
|
||||
d.dp = d.nd + trimmed
|
||||
}
|
||||
|
||||
// mult64bitPow10 takes a floating-point input with a 25-bit
|
||||
// mantissa and multiplies it with 10^q. The resulting mantissa
|
||||
// is m*P >> 57 where P is a 64-bit element of the detailedPowersOfTen tables.
|
||||
@ -249,7 +486,8 @@ func mulByLog10Log2(x int) int {
|
||||
// exact = ε == 0
|
||||
func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
|
||||
if q == 0 {
|
||||
return m << 7, e2 - 7, true
|
||||
// P == 1<<63
|
||||
return m << 6, e2 - 6, true
|
||||
}
|
||||
if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
|
||||
// This never happens due to the range of float32/float64 exponent
|
||||
@ -276,7 +514,8 @@ func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
|
||||
// exact = ε == 0
|
||||
func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) {
|
||||
if q == 0 {
|
||||
return m << 9, e2 - 9, true
|
||||
// P == 1<<127
|
||||
return m << 8, e2 - 8, true
|
||||
}
|
||||
if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
|
||||
// This never happens due to the range of float32/float64 exponent
|
||||
@ -309,3 +548,15 @@ func divisibleByPower5(m uint64, k int) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// divmod1e9 computes quotient and remainder of division by 1e9,
|
||||
// avoiding runtime uint64 division on 32-bit platforms.
|
||||
func divmod1e9(x uint64) (uint32, uint32) {
|
||||
if !host32bit {
|
||||
return uint32(x / 1e9), uint32(x % 1e9)
|
||||
}
|
||||
// Use the same sequence of operations as the amd64 compiler.
|
||||
hi, _ := bits.Mul64(x>>1, 0x89705f4136b4a598) // binary digits of 1e-9
|
||||
q := hi >> 28
|
||||
return uint32(q), uint32(x - q*1e9)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user