math/big: fix known bug in Float.Float64

- handle exponent over- and underflow - handle denormalized numbers - added test cases Change-Id: I1bbb9904b0c104f54696944e1f57559881f6eeeb Reviewed-on: https://go-review.googlesource.com/7982 Reviewed-by: Alan Donovan <adonovan@google.com>
2024-11-19 05:54:44 -07:00 · 2015-03-23 17:31:25 -07:00 · 2015-03-23 17:31:25 -07:00 · 2c4cf2f6f2
commit 2c4cf2f6f2
parent 0e55f201d6
2 changed files with 118 additions and 19 deletions
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@ -872,9 +872,14 @@ func (x *Float) Int64() (int64, Accuracy) {
 	panic("unreachable")
 }
-// Float64 returns the closest float64 value of x
+// Float64 returns the float64 value nearest to x by rounding ToNearestEven
-// by rounding to nearest with 53 bits precision.
+// with 53 bits of precision.
-// BUG(gri) Float.Float64 doesn't handle exponent overflow.
+// If x is too small to be represented by a float64
 // (|x| < math.SmallestNonzeroFloat64), the result is (0, Below) or
 // (-0, Above), respectively, depending on the sign of x.
 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
 // The result is (NaN, Undef) for NaNs.
 func (x *Float) Float64() (float64, Accuracy) {
 	if debugFloat {
 		x.validate()
@ -886,27 +891,67 @@ func (x *Float) Float64() (float64, Accuracy) {
 		var r Float
 		r.prec = 53
 		r.Set(x)
-		var s uint64
+
 		// Rounding via Set may have caused r to overflow
 		// to ±Inf (rounding never causes underflows to 0).
 		if r.form == inf {
 			r.exp = 10000 // cause overflow below
 		}
 		// see also implementation of math.Ldexp
 		e := int64(r.exp) + 1022
 		if e <= -52 {
 			// underflow
 			if x.neg {
 				z := 0.0
 				return -z, Above
 			}
 			return 0.0, Below
 		}
 		// e > -52
 		if e >= 2047 {
 			// overflow
 			if x.neg {
 				return math.Inf(-1), Below
 			}
 			return math.Inf(+1), Above
 		}
 		// -52 < e < 2047
 		denormal := false
 		if e < 0 {
 			denormal = true
 			e += 52
 		}
 		// 0 < e < 2047
 		s := uint64(0)
 		if r.neg {
 			s = 1 << 63
 		}
-		e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow
+		m := high64(r.mant) >> 11 & (1<<52 - 1) // cut off msb (implicit 1 bit)
-		m := high64(r.mant) >> 11 & (1<<52 - 1)
+		z := math.Float64frombits(s | uint64(e)<<52 | m)
-		return math.Float64frombits(s | e<<52 | m), r.acc
+		if denormal {
 			// adjust for denormal
 			// TODO(gri) does this change accuracy?
 			z /= 1 << 52
 		}
 		return z, r.acc
 	case zero:
 		z := 0.0
 		if x.neg {
-			z = -z
+			z := 0.0
 			return -z, Exact
 		}
-		return z, Exact
+		return 0.0, Exact
 	case inf:
 		sign := +1
 		if x.neg {
-			sign = -1
+			return math.Inf(-1), Exact
 		}
-		return math.Inf(sign), Exact
+		return math.Inf(+1), Exact
 	case nan:
 		return math.NaN(), Undef
--- a/src/math/big/float_test.go
+++ b/src/math/big/float_test.go
@ -627,6 +627,10 @@ func TestFloatSetFloat64(t *testing.T) {
 		3.14159265e10,
 		2.718281828e-123,
 		1.0 / 3,
 		math.MaxFloat32,
 		math.MaxFloat64,
 		math.SmallestNonzeroFloat32,
 		math.SmallestNonzeroFloat64,
 		math.Inf(-1),
 		math.Inf(0),
 		-math.Inf(1),
@ -637,8 +641,8 @@ func TestFloatSetFloat64(t *testing.T) {
 			}
 			var f Float
 			f.SetFloat64(want)
-			if got, _ := f.Float64(); got != want {
+			if got, acc := f.Float64(); got != want || acc != Exact {
-				t.Errorf("got %g (%s); want %g", got, f.Format('p', 0), want)
+				t.Errorf("got %g (%s, %s); want %g (Exact)", got, f.Format('p', 0), acc, want)
 			}
 		}
 	}
@ -833,6 +837,56 @@ func TestFloatInt64(t *testing.T) {
 	}
 }
 func TestFloatFloat64(t *testing.T) {
 	for _, test := range []struct {
 		x   string
 		out float64
 		acc Accuracy
 	}{
 		{"-Inf", math.Inf(-1), Exact},
 		{"-0x1.fffffffffffff8p2147483646", -math.Inf(+1), Below}, // overflow in rounding
 		{"-1e10000", math.Inf(-1), Below},                        // overflow
 		{"-0x1p1024", math.Inf(-1), Below},                       // overflow
 		{"-0x1.fffffffffffff8p1023", -math.Inf(+1), Below},       // overflow
 		{"-0x1.fffffffffffff4p1023", -math.MaxFloat64, Above},
 		{"-0x1.fffffffffffffp1023", -math.MaxFloat64, Exact},
 		{"-12345.000000000000000000001", -12345, Above},
 		{"-12345.0", -12345, Exact},
 		{"-1.000000000000000000001", -1, Above},
 		{"-1", -1, Exact},
 		{"-0x0.0000000000001p-1022", -math.SmallestNonzeroFloat64, Exact},
 		{"-0x0.0000000000001p-1023", -0, Above}, // underflow
 		{"-1e-1000", -0, Above},                 // underflow
 		{"0", 0, Exact},
 		{"1e-1000", 0, Below},                 // underflow
 		{"0x0.0000000000001p-1023", 0, Below}, // underflow
 		{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact},
 		{"1", 1, Exact},
 		{"1.000000000000000000001", 1, Below},
 		{"12345.0", 12345, Exact},
 		{"12345.000000000000000000001", 12345, Below},
 		{"0x1.fffffffffffffp1023", math.MaxFloat64, Exact},
 		{"0x1.fffffffffffff4p1023", math.MaxFloat64, Below},
 		{"0x1.fffffffffffff8p1023", math.Inf(+1), Above},       // overflow
 		{"0x1p1024", math.Inf(+1), Above},                      // overflow
 		{"1e10000", math.Inf(+1), Above},                       // overflow
 		{"0x1.fffffffffffff8p2147483646", math.Inf(+1), Above}, // overflow in rounding
 		{"+Inf", math.Inf(+1), Exact},
 	} {
 		x := makeFloat(test.x)
 		out, acc := x.Float64()
 		if out != test.out || acc != test.acc {
 			t.Errorf("%s: got %g (%s); want %g (%s)", test.x, out, acc, test.out, test.acc)
 		}
 	}
 	// test NaN
 	x := makeFloat("NaN")
 	if out, acc := x.Float64(); out == out || acc != Undef {
 		t.Errorf("NaN: got %g (%s); want NaN (Undef)", out, acc)
 	}
 }
 func TestFloatInt(t *testing.T) {
 	for _, test := range []struct {
 		x    string
@ -1073,14 +1127,14 @@ func TestFloatAdd32(t *testing.T) {
 			got, acc := z.Float64()
 			want := float64(float32(y0) + float32(x0))
 			if got != want || acc != Exact {
-				t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
+				t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
 			}
 			z.Sub(z, y)
 			got, acc = z.Float64()
 			want = float64(float32(want) - float32(y0))
 			if got != want || acc != Exact {
-				t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
+				t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
 			}
 		}
 	}
@ -1106,14 +1160,14 @@ func TestFloatAdd64(t *testing.T) {
 			got, acc := z.Float64()
 			want := x0 + y0
 			if got != want || acc != Exact {
-				t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
+				t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
 			}
 			z.Sub(z, y)
 			got, acc = z.Float64()
 			want -= y0
 			if got != want || acc != Exact {
-				t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
+				t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
 			}
 		}
 	}