mirror of
https://github.com/golang/go
synced 2024-11-19 05:54:44 -07:00
math/big: fix known bug in Float.Float64
- handle exponent over- and underflow - handle denormalized numbers - added test cases Change-Id: I1bbb9904b0c104f54696944e1f57559881f6eeeb Reviewed-on: https://go-review.googlesource.com/7982 Reviewed-by: Alan Donovan <adonovan@google.com>
This commit is contained in:
parent
0e55f201d6
commit
2c4cf2f6f2
@ -872,9 +872,14 @@ func (x *Float) Int64() (int64, Accuracy) {
|
|||||||
panic("unreachable")
|
panic("unreachable")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Float64 returns the closest float64 value of x
|
// Float64 returns the float64 value nearest to x by rounding ToNearestEven
|
||||||
// by rounding to nearest with 53 bits precision.
|
// with 53 bits of precision.
|
||||||
// BUG(gri) Float.Float64 doesn't handle exponent overflow.
|
// If x is too small to be represented by a float64
|
||||||
|
// (|x| < math.SmallestNonzeroFloat64), the result is (0, Below) or
|
||||||
|
// (-0, Above), respectively, depending on the sign of x.
|
||||||
|
// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
|
||||||
|
// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
|
||||||
|
// The result is (NaN, Undef) for NaNs.
|
||||||
func (x *Float) Float64() (float64, Accuracy) {
|
func (x *Float) Float64() (float64, Accuracy) {
|
||||||
if debugFloat {
|
if debugFloat {
|
||||||
x.validate()
|
x.validate()
|
||||||
@ -886,27 +891,67 @@ func (x *Float) Float64() (float64, Accuracy) {
|
|||||||
var r Float
|
var r Float
|
||||||
r.prec = 53
|
r.prec = 53
|
||||||
r.Set(x)
|
r.Set(x)
|
||||||
var s uint64
|
|
||||||
|
// Rounding via Set may have caused r to overflow
|
||||||
|
// to ±Inf (rounding never causes underflows to 0).
|
||||||
|
if r.form == inf {
|
||||||
|
r.exp = 10000 // cause overflow below
|
||||||
|
}
|
||||||
|
|
||||||
|
// see also implementation of math.Ldexp
|
||||||
|
|
||||||
|
e := int64(r.exp) + 1022
|
||||||
|
if e <= -52 {
|
||||||
|
// underflow
|
||||||
|
if x.neg {
|
||||||
|
z := 0.0
|
||||||
|
return -z, Above
|
||||||
|
}
|
||||||
|
return 0.0, Below
|
||||||
|
}
|
||||||
|
// e > -52
|
||||||
|
|
||||||
|
if e >= 2047 {
|
||||||
|
// overflow
|
||||||
|
if x.neg {
|
||||||
|
return math.Inf(-1), Below
|
||||||
|
}
|
||||||
|
return math.Inf(+1), Above
|
||||||
|
}
|
||||||
|
// -52 < e < 2047
|
||||||
|
|
||||||
|
denormal := false
|
||||||
|
if e < 0 {
|
||||||
|
denormal = true
|
||||||
|
e += 52
|
||||||
|
}
|
||||||
|
// 0 < e < 2047
|
||||||
|
|
||||||
|
s := uint64(0)
|
||||||
if r.neg {
|
if r.neg {
|
||||||
s = 1 << 63
|
s = 1 << 63
|
||||||
}
|
}
|
||||||
e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow
|
m := high64(r.mant) >> 11 & (1<<52 - 1) // cut off msb (implicit 1 bit)
|
||||||
m := high64(r.mant) >> 11 & (1<<52 - 1)
|
z := math.Float64frombits(s | uint64(e)<<52 | m)
|
||||||
return math.Float64frombits(s | e<<52 | m), r.acc
|
if denormal {
|
||||||
|
// adjust for denormal
|
||||||
|
// TODO(gri) does this change accuracy?
|
||||||
|
z /= 1 << 52
|
||||||
|
}
|
||||||
|
return z, r.acc
|
||||||
|
|
||||||
case zero:
|
case zero:
|
||||||
z := 0.0
|
|
||||||
if x.neg {
|
if x.neg {
|
||||||
z = -z
|
z := 0.0
|
||||||
|
return -z, Exact
|
||||||
}
|
}
|
||||||
return z, Exact
|
return 0.0, Exact
|
||||||
|
|
||||||
case inf:
|
case inf:
|
||||||
sign := +1
|
|
||||||
if x.neg {
|
if x.neg {
|
||||||
sign = -1
|
return math.Inf(-1), Exact
|
||||||
}
|
}
|
||||||
return math.Inf(sign), Exact
|
return math.Inf(+1), Exact
|
||||||
|
|
||||||
case nan:
|
case nan:
|
||||||
return math.NaN(), Undef
|
return math.NaN(), Undef
|
||||||
|
@ -627,6 +627,10 @@ func TestFloatSetFloat64(t *testing.T) {
|
|||||||
3.14159265e10,
|
3.14159265e10,
|
||||||
2.718281828e-123,
|
2.718281828e-123,
|
||||||
1.0 / 3,
|
1.0 / 3,
|
||||||
|
math.MaxFloat32,
|
||||||
|
math.MaxFloat64,
|
||||||
|
math.SmallestNonzeroFloat32,
|
||||||
|
math.SmallestNonzeroFloat64,
|
||||||
math.Inf(-1),
|
math.Inf(-1),
|
||||||
math.Inf(0),
|
math.Inf(0),
|
||||||
-math.Inf(1),
|
-math.Inf(1),
|
||||||
@ -637,8 +641,8 @@ func TestFloatSetFloat64(t *testing.T) {
|
|||||||
}
|
}
|
||||||
var f Float
|
var f Float
|
||||||
f.SetFloat64(want)
|
f.SetFloat64(want)
|
||||||
if got, _ := f.Float64(); got != want {
|
if got, acc := f.Float64(); got != want || acc != Exact {
|
||||||
t.Errorf("got %g (%s); want %g", got, f.Format('p', 0), want)
|
t.Errorf("got %g (%s, %s); want %g (Exact)", got, f.Format('p', 0), acc, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -833,6 +837,56 @@ func TestFloatInt64(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFloatFloat64(t *testing.T) {
|
||||||
|
for _, test := range []struct {
|
||||||
|
x string
|
||||||
|
out float64
|
||||||
|
acc Accuracy
|
||||||
|
}{
|
||||||
|
{"-Inf", math.Inf(-1), Exact},
|
||||||
|
{"-0x1.fffffffffffff8p2147483646", -math.Inf(+1), Below}, // overflow in rounding
|
||||||
|
{"-1e10000", math.Inf(-1), Below}, // overflow
|
||||||
|
{"-0x1p1024", math.Inf(-1), Below}, // overflow
|
||||||
|
{"-0x1.fffffffffffff8p1023", -math.Inf(+1), Below}, // overflow
|
||||||
|
{"-0x1.fffffffffffff4p1023", -math.MaxFloat64, Above},
|
||||||
|
{"-0x1.fffffffffffffp1023", -math.MaxFloat64, Exact},
|
||||||
|
{"-12345.000000000000000000001", -12345, Above},
|
||||||
|
{"-12345.0", -12345, Exact},
|
||||||
|
{"-1.000000000000000000001", -1, Above},
|
||||||
|
{"-1", -1, Exact},
|
||||||
|
{"-0x0.0000000000001p-1022", -math.SmallestNonzeroFloat64, Exact},
|
||||||
|
{"-0x0.0000000000001p-1023", -0, Above}, // underflow
|
||||||
|
{"-1e-1000", -0, Above}, // underflow
|
||||||
|
{"0", 0, Exact},
|
||||||
|
{"1e-1000", 0, Below}, // underflow
|
||||||
|
{"0x0.0000000000001p-1023", 0, Below}, // underflow
|
||||||
|
{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact},
|
||||||
|
{"1", 1, Exact},
|
||||||
|
{"1.000000000000000000001", 1, Below},
|
||||||
|
{"12345.0", 12345, Exact},
|
||||||
|
{"12345.000000000000000000001", 12345, Below},
|
||||||
|
{"0x1.fffffffffffffp1023", math.MaxFloat64, Exact},
|
||||||
|
{"0x1.fffffffffffff4p1023", math.MaxFloat64, Below},
|
||||||
|
{"0x1.fffffffffffff8p1023", math.Inf(+1), Above}, // overflow
|
||||||
|
{"0x1p1024", math.Inf(+1), Above}, // overflow
|
||||||
|
{"1e10000", math.Inf(+1), Above}, // overflow
|
||||||
|
{"0x1.fffffffffffff8p2147483646", math.Inf(+1), Above}, // overflow in rounding
|
||||||
|
{"+Inf", math.Inf(+1), Exact},
|
||||||
|
} {
|
||||||
|
x := makeFloat(test.x)
|
||||||
|
out, acc := x.Float64()
|
||||||
|
if out != test.out || acc != test.acc {
|
||||||
|
t.Errorf("%s: got %g (%s); want %g (%s)", test.x, out, acc, test.out, test.acc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test NaN
|
||||||
|
x := makeFloat("NaN")
|
||||||
|
if out, acc := x.Float64(); out == out || acc != Undef {
|
||||||
|
t.Errorf("NaN: got %g (%s); want NaN (Undef)", out, acc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestFloatInt(t *testing.T) {
|
func TestFloatInt(t *testing.T) {
|
||||||
for _, test := range []struct {
|
for _, test := range []struct {
|
||||||
x string
|
x string
|
||||||
@ -1073,14 +1127,14 @@ func TestFloatAdd32(t *testing.T) {
|
|||||||
got, acc := z.Float64()
|
got, acc := z.Float64()
|
||||||
want := float64(float32(y0) + float32(x0))
|
want := float64(float32(y0) + float32(x0))
|
||||||
if got != want || acc != Exact {
|
if got != want || acc != Exact {
|
||||||
t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
|
t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
z.Sub(z, y)
|
z.Sub(z, y)
|
||||||
got, acc = z.Float64()
|
got, acc = z.Float64()
|
||||||
want = float64(float32(want) - float32(y0))
|
want = float64(float32(want) - float32(y0))
|
||||||
if got != want || acc != Exact {
|
if got != want || acc != Exact {
|
||||||
t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
|
t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1106,14 +1160,14 @@ func TestFloatAdd64(t *testing.T) {
|
|||||||
got, acc := z.Float64()
|
got, acc := z.Float64()
|
||||||
want := x0 + y0
|
want := x0 + y0
|
||||||
if got != want || acc != Exact {
|
if got != want || acc != Exact {
|
||||||
t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
|
t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
z.Sub(z, y)
|
z.Sub(z, y)
|
||||||
got, acc = z.Float64()
|
got, acc = z.Float64()
|
||||||
want -= y0
|
want -= y0
|
||||||
if got != want || acc != Exact {
|
if got != want || acc != Exact {
|
||||||
t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
|
t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user