diff --git a/src/math/big/float.go b/src/math/big/float.go index ea42a9166e..44e75cbf39 100644 --- a/src/math/big/float.go +++ b/src/math/big/float.go @@ -18,10 +18,6 @@ import ( "math" ) -// TODO(gri): Determine if there's a more natural way to set the precision. -// Should there be a special meaning for prec 0? Such as "full precision"? -// (would be possible for all ops except quotient). - const debugFloat = true // enable for debugging // Internal representation: A floating-point value x != 0 consists @@ -45,14 +41,15 @@ const debugFloat = true // enable for debugging // // sign * mantissa * 2**exponent // -// Each value also has a precision, rounding mode, and accuracy value: -// The precision is the number of mantissa bits used to represent a -// value, and the result of operations is rounded to that many bits -// according to the value's rounding mode (unless specified othewise). +// Each value also has a precision, rounding mode, and accuracy value. +// The precision is the number of mantissa bits used to represent the +// value, and the result of an operation is rounded to that many bits +// according to the value's rounding mode (unless specified otherwise). // The accuracy value indicates the rounding error with respect to the // exact (not rounded) value. // -// The zero value for a Float represents the number 0. +// The zero (uninitialized) value for a Float is ready to use and +// represents the number 0.0 of 0 bit precision. // // By setting the desired precision to 24 (or 53) and using ToNearestEven // rounding, Float arithmetic operations emulate the corresponding float32 @@ -71,14 +68,26 @@ type Float struct { // NewFloat returns a new Float with value x rounded // to prec bits according to the given rounding mode. +// If prec == 0, the result has value 0.0 independent +// of the value of x. +// BUG(gri) For prec == 0 and x == Inf, the result +// should be Inf as well. func NewFloat(x float64, prec uint, mode RoundingMode) *Float { - // TODO(gri) should make this more efficient - z := new(Float).SetFloat64(x) - return z.Round(z, prec, mode) + var z Float + if prec > 0 { + // TODO(gri) should make this more efficient + z.SetFloat64(x) + return z.Round(&z, prec, mode) + } + z.mode = mode // TODO(gri) don't do this twice for prec > 0 + return &z } -// infExp is the exponent value for infinity. -const infExp = 1<<31 - 1 +// Special exponent values. +const ( + maxExp = math.MaxInt32 + infExp = -maxExp - 1 // exponent value for Inf values +) // NewInf returns a new Float with value positive infinity (sign >= 0), // or negative infinity (sign < 0). @@ -86,12 +95,16 @@ func NewInf(sign int) *Float { return &Float{neg: sign < 0, exp: infExp} } +// setExp sets the exponent for z. +// If the exponent is too small or too large, z becomes +/-Inf. func (z *Float) setExp(e int64) { - e32 := int32(e) - if int64(e32) != e { - panic("exponent overflow") // TODO(gri) handle this gracefully + if -maxExp <= e && e <= maxExp { + z.exp = int32(e) + return } - z.exp = e32 + // Inf + z.mant = z.mant[:0] + z.exp = infExp } // Accuracy describes the rounding error produced by the most recent @@ -155,7 +168,7 @@ func (mode RoundingMode) String() string { } // Precision returns the mantissa precision of x in bits. -// The precision may be 0 if x == 0. // TODO(gri) Determine a better approach. +// The precision may be 0 for |x| == 0 or |x| == Inf. func (x *Float) Precision() uint { return uint(x.prec) } @@ -170,9 +183,17 @@ func (x *Float) Mode() RoundingMode { return x.mode } +// IsInf reports whether x is an infinity, according to sign. +// If sign > 0, IsInf reports whether x is positive infinity. +// If sign < 0, IsInf reports whether x is negative infinity. +// If sign == 0, IsInf reports whether x is either infinity. +func (x *Float) IsInf(sign int) bool { + return x.exp == infExp && (sign == 0 || x.neg == (sign < 0)) +} + // debugging support func (x *Float) validate() { - // assumes x != 0 + // assumes x != 0 && x != Inf const msb = 1 << (_W - 1) m := len(x.mant) if x.mant[m-1]&msb == 0 { @@ -196,6 +217,9 @@ func (z *Float) round(sbit uint) { return } + // handle Inf + // TODO(gri) handle Inf + if debugFloat { z.validate() } @@ -399,10 +423,15 @@ func (z *Float) SetInt64(x int64) *Float { // SetFloat64 sets z to x and returns z. // Precision is set to 53 bits. -// TODO(gri) test denormals, +/-Inf, disallow NaN. +// TODO(gri) test denormals, disallow NaN. func (z *Float) SetFloat64(x float64) *Float { - z.prec = 53 z.neg = math.Signbit(x) // handle -0 correctly (-0 == 0) + z.prec = 53 + if math.IsInf(x, 0) { + z.mant = z.mant[:0] + z.exp = infExp + return z + } if x == 0 { z.mant = z.mant[:0] z.exp = 0 @@ -484,7 +513,7 @@ func high64(x nat) uint64 { return v } -// TODO(gri) FIX THIS (rounding mode, errors, accuracy, etc.) +// TODO(gri) FIX THIS (Inf, rounding mode, errors, accuracy, etc.) func (x *Float) Uint64() uint64 { m := high64(x.mant) s := x.exp @@ -494,7 +523,7 @@ func (x *Float) Uint64() uint64 { return 0 // imprecise } -// TODO(gri) FIX THIS (rounding mode, errors, etc.) +// TODO(gri) FIX THIS (inf, rounding mode, errors, etc.) func (x *Float) Int64() int64 { v := int64(x.Uint64()) if x.neg { @@ -507,6 +536,15 @@ func (x *Float) Int64() int64 { // by rounding to nearest with 53 bits precision. // TODO(gri) implement/document error scenarios. func (x *Float) Float64() (float64, Accuracy) { + // x == +/-Inf + if x.exp == infExp { + var sign int + if x.neg { + sign = -1 + } + return math.Inf(sign), Exact + } + // x == 0 if len(x.mant) == 0 { return 0, Exact } @@ -561,7 +599,7 @@ func (z *Float) Neg(x *Float) *Float { } // z = x + y, ignoring signs of x and y. -// x and y must not be 0. +// x and y must not be 0 or an Inf. func (z *Float) uadd(x, y *Float) { // Note: This implementation requires 2 shifts most of the // time. It is also inefficient if exponents or precisions @@ -603,7 +641,7 @@ func (z *Float) uadd(x, y *Float) { } // z = x - y for x >= y, ignoring signs of x and y. -// x and y must not be zero. +// x and y must not be 0 or an Inf. func (z *Float) usub(x, y *Float) { // This code is symmetric to uadd. // We have not factored the common code out because @@ -643,7 +681,7 @@ func (z *Float) usub(x, y *Float) { } // z = x * y, ignoring signs of x and y. -// x and y must not be zero. +// x and y must not be 0 or an Inf. func (z *Float) umul(x, y *Float) { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("umul called with 0 argument") @@ -664,7 +702,7 @@ func (z *Float) umul(x, y *Float) { } // z = x / y, ignoring signs of x and y. -// x and y must not be zero. +// x and y must not be 0 or an Inf. func (z *Float) uquo(x, y *Float) { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("uquo called with 0 argument") @@ -708,7 +746,7 @@ func (z *Float) uquo(x, y *Float) { } // ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y, -// while ignoring the signs of x and y. x and y must not be zero. +// while ignoring the signs of x and y. x and y must not be 0 or an Inf. func (x *Float) ucmp(y *Float) int { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("ucmp called with 0 argument") @@ -765,16 +803,24 @@ func (x *Float) ucmp(y *Float) int { // sign as x even when x is zero. // Add sets z to the rounded sum x+y and returns z. +// If z's precision is 0, it is set to the larger of +// x's or y's precision before the operation. // Rounding is performed according to z's precision // and rounding mode; and z's accuracy reports the // result error relative to the exact (not rounded) // result. func (z *Float) Add(x, y *Float) *Float { + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + // TODO(gri) what about -0? if len(y.mant) == 0 { + // TODO(gri) handle Inf return z.Round(x, z.prec, z.mode) } if len(x.mant) == 0 { + // TODO(gri) handle Inf return z.Round(y, z.prec, z.mode) } @@ -799,13 +845,15 @@ func (z *Float) Add(x, y *Float) *Float { } // Sub sets z to the rounded difference x-y and returns z. -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Sub(x, y *Float) *Float { + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + // TODO(gri) what about -0? if len(y.mant) == 0 { + // TODO(gri) handle Inf return z.Round(x, z.prec, z.mode) } if len(x.mant) == 0 { @@ -836,11 +884,14 @@ func (z *Float) Sub(x, y *Float) *Float { } // Mul sets z to the rounded product x*y and returns z. -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Mul(x, y *Float) *Float { + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + + // TODO(gri) handle Inf + // TODO(gri) what about -0? if len(x.mant) == 0 || len(y.mant) == 0 { z.neg = false @@ -858,46 +909,61 @@ func (z *Float) Mul(x, y *Float) *Float { // Quo sets z to the rounded quotient x/y and returns z. // If y == 0, a division-by-zero run-time panic occurs. TODO(gri) this should become Inf -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Quo(x, y *Float) *Float { - // TODO(gri) what about -0? + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + + // TODO(gri) handle Inf + + // TODO(gri) check that this is correct + z.neg = x.neg != y.neg + + if len(y.mant) == 0 { + z.setExp(infExp) + return z + } + if len(x.mant) == 0 { - z.neg = false z.mant = z.mant[:0] z.exp = 0 z.acc = Exact return z } - if len(y.mant) == 0 { - panic("division-by-zero") // TODO(gri) handle this better - } // x, y != 0 z.uquo(x, y) - z.neg = x.neg != y.neg return z } // Lsh sets z to the rounded x * (1< y // func (x *Float) Cmp(y *Float) int { + // TODO(gri) handle Inf + // special cases switch { case len(x.mant) == 0: @@ -943,7 +1011,7 @@ func (x *Float) Cmp(y *Float) int { // Sign returns: // // -1 if x < 0 -// 0 if x == 0 (incl. x == -0) +// 0 if x == 0 (incl. x == -0) // TODO(gri) is this correct? // +1 if x > 0 // func (x *Float) Sign() int { @@ -955,3 +1023,10 @@ func (x *Float) Sign() int { } return 1 } + +func umax(x, y uint) uint { + if x < y { + return x + } + return y +} diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go index e37d2ed365..979b739b08 100644 --- a/src/math/big/float_test.go +++ b/src/math/big/float_test.go @@ -6,11 +6,70 @@ package big import ( "fmt" + "math" "sort" "strconv" "testing" ) +func TestFloatZeroValue(t *testing.T) { + // zero (uninitialized) value is a ready-to-use 0.0 + var x Float + if s := x.Format('f', 1); s != "0.0" { + t.Errorf("zero value = %s; want 0.0", s) + } + + // zero value has precision 0 + if prec := x.Precision(); prec != 0 { + t.Errorf("prec = %d; want 0", prec) + } + + // zero value can be used in any and all positions of binary operations + make := func(x int) *Float { + if x == 0 { + return new(Float) // 0 translates into the zero value + } + return NewFloat(float64(x), 10, 0) + } + for _, test := range []struct { + z, x, y, want int + opname rune + op func(z, x, y *Float) *Float + }{ + {0, 0, 0, 0, '+', (*Float).Add}, + {0, 1, 2, 3, '+', (*Float).Add}, + {1, 2, 0, 2, '+', (*Float).Add}, + {2, 0, 1, 1, '+', (*Float).Add}, + + {0, 0, 0, 0, '-', (*Float).Sub}, + {0, 1, 2, -1, '-', (*Float).Sub}, + {1, 2, 0, 2, '-', (*Float).Sub}, + {2, 0, 1, -1, '-', (*Float).Sub}, + + {0, 0, 0, 0, '*', (*Float).Mul}, + {0, 1, 2, 2, '*', (*Float).Mul}, + {1, 2, 0, 0, '*', (*Float).Mul}, + {2, 0, 1, 0, '*', (*Float).Mul}, + + {0, 0, 0, 0, '/', (*Float).Quo}, + {0, 2, 1, 2, '/', (*Float).Quo}, + {1, 2, 0, 0, '/', (*Float).Quo}, + {2, 0, 1, 0, '/', (*Float).Quo}, + } { + z := make(test.z) + test.op(z, make(test.x), make(test.y)) + if got := int(z.Int64()); got != test.want { + t.Errorf("%d %c %d = %d; want %d", test.x, test.opname, test.y, got, test.want) + } + } + + // TODO(gri) test how precision is set for zero value results +} + +func TestFloatInf(t *testing.T) { + // TODO(gri) implement this +} + func fromBinary(s string) int64 { x, err := strconv.ParseInt(s, 2, 64) if err != nil { @@ -244,6 +303,9 @@ func TestFloatSetFloat64(t *testing.T) { 3.14159265e10, 2.718281828e-123, 1.0 / 3, + math.Inf(-1), + math.Inf(0), + -math.Inf(1), } { for i := range [2]int{} { if i&1 != 0 { diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go index 7628e77d9a..e3611b234b 100644 --- a/src/math/big/floatconv.go +++ b/src/math/big/floatconv.go @@ -191,13 +191,26 @@ func (x *Float) Format(format byte, prec int) string { // Append appends the string form of the floating-point number x, // as generated by x.Format, to buf and returns the extended buffer. func (x *Float) Append(buf []byte, format byte, prec int) []byte { - // pick off simple cases + // TODO(gri) factor out handling of sign? + + // Inf + if x.IsInf(0) { + var ch byte = '+' + if x.neg { + ch = '-' + } + buf = append(buf, ch) + return append(buf, "Inf"...) + } + + // easy formats switch format { case 'b': return x.bstring(buf) case 'p': return x.pstring(buf) } + return x.bigFtoa(buf, format, prec) } @@ -212,7 +225,6 @@ func (x *Float) String() string { // The mantissa is normalized such that is uses x.Precision() bits in binary // representation. func (x *Float) bstring(buf []byte) []byte { - // TODO(gri) handle Inf if x.neg { buf = append(buf, '-') } @@ -240,7 +252,6 @@ func (x *Float) bstring(buf []byte) []byte { // ad returns the extended buffer. // The mantissa is normalized such that 0.5 <= 0.mantissa < 1.0. func (x *Float) pstring(buf []byte) []byte { - // TODO(gri) handle Inf if x.neg { buf = append(buf, '-') } diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go index d3290dddd0..11e5df448a 100644 --- a/src/math/big/floatconv_test.go +++ b/src/math/big/floatconv_test.go @@ -192,9 +192,9 @@ func TestFloat64Format(t *testing.T) { // {math.NaN(), 'g', -1, "NaN"}, // {-math.NaN(), 'g', -1, "NaN"}, - // {math.Inf(0), 'g', -1, "+Inf"}, - // {math.Inf(-1), 'g', -1, "-Inf"}, - // {-math.Inf(0), 'g', -1, "-Inf"}, + {math.Inf(0), 'g', -1, "+Inf"}, + {math.Inf(-1), 'g', -1, "-Inf"}, + {-math.Inf(0), 'g', -1, "-Inf"}, {-1, 'b', -1, "-4503599627370496p-52"},