mirror of
https://github.com/golang/go
synced 2024-11-21 21:54:40 -07:00
math: handle denormals in Frexp, Ilogb, Ldexp, and Logb
Also: * document special cases for Frexp and Ldexp * handle ±Inf in Ldexp * correctly return -0 on underflow in Ldexp * test special cases for Ldexp * test boundary cases for Frexp, Ilogb, Ldexp, and Logb R=rsc CC=golang-dev https://golang.org/cl/3676041
This commit is contained in:
parent
01fad6a6b0
commit
13c2e62966
@ -1112,6 +1112,33 @@ var jM3SC = []float64{
|
|||||||
NaN(),
|
NaN(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var vfldexpSC = []fi{
|
||||||
|
{0, 0},
|
||||||
|
{0, -1075},
|
||||||
|
{0, 1024},
|
||||||
|
{Copysign(0, -1), 0},
|
||||||
|
{Copysign(0, -1), -1075},
|
||||||
|
{Copysign(0, -1), 1024},
|
||||||
|
{Inf(1), 0},
|
||||||
|
{Inf(1), -1024},
|
||||||
|
{Inf(-1), 0},
|
||||||
|
{Inf(-1), -1024},
|
||||||
|
{NaN(), -1024},
|
||||||
|
}
|
||||||
|
var ldexpSC = []float64{
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
Copysign(0, -1),
|
||||||
|
Copysign(0, -1),
|
||||||
|
Copysign(0, -1),
|
||||||
|
Inf(1),
|
||||||
|
Inf(1),
|
||||||
|
Inf(-1),
|
||||||
|
Inf(-1),
|
||||||
|
NaN(),
|
||||||
|
}
|
||||||
|
|
||||||
var vflgammaSC = []float64{
|
var vflgammaSC = []float64{
|
||||||
Inf(-1),
|
Inf(-1),
|
||||||
-3,
|
-3,
|
||||||
@ -1440,6 +1467,65 @@ var yM3SC = []float64{
|
|||||||
NaN(),
|
NaN(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// arguments and expected results for boundary cases
|
||||||
|
const (
|
||||||
|
SmallestNormalFloat64 = 2.2250738585072014e-308 // 2**-1022
|
||||||
|
LargestSubnormalFloat64 = SmallestNormalFloat64 - SmallestNonzeroFloat64
|
||||||
|
)
|
||||||
|
|
||||||
|
var vffrexpBC = []float64{
|
||||||
|
SmallestNormalFloat64,
|
||||||
|
LargestSubnormalFloat64,
|
||||||
|
SmallestNonzeroFloat64,
|
||||||
|
MaxFloat64,
|
||||||
|
-SmallestNormalFloat64,
|
||||||
|
-LargestSubnormalFloat64,
|
||||||
|
-SmallestNonzeroFloat64,
|
||||||
|
-MaxFloat64,
|
||||||
|
}
|
||||||
|
var frexpBC = []fi{
|
||||||
|
{0.5, -1021},
|
||||||
|
{0.99999999999999978, -1022},
|
||||||
|
{0.5, -1073},
|
||||||
|
{0.99999999999999989, 1024},
|
||||||
|
{-0.5, -1021},
|
||||||
|
{-0.99999999999999978, -1022},
|
||||||
|
{-0.5, -1073},
|
||||||
|
{-0.99999999999999989, 1024},
|
||||||
|
}
|
||||||
|
|
||||||
|
var vfldexpBC = []fi{
|
||||||
|
{SmallestNormalFloat64, -52},
|
||||||
|
{LargestSubnormalFloat64, -51},
|
||||||
|
{SmallestNonzeroFloat64, 1074},
|
||||||
|
{MaxFloat64, -(1023 + 1074)},
|
||||||
|
{1, -1075},
|
||||||
|
{-1, -1075},
|
||||||
|
{1, 1024},
|
||||||
|
{-1, 1024},
|
||||||
|
}
|
||||||
|
var ldexpBC = []float64{
|
||||||
|
SmallestNonzeroFloat64,
|
||||||
|
1e-323, // 2**-1073
|
||||||
|
1,
|
||||||
|
1e-323, // 2**-1073
|
||||||
|
0,
|
||||||
|
Copysign(0, -1),
|
||||||
|
Inf(1),
|
||||||
|
Inf(-1),
|
||||||
|
}
|
||||||
|
|
||||||
|
var logbBC = []float64{
|
||||||
|
-1022,
|
||||||
|
-1023,
|
||||||
|
-1074,
|
||||||
|
1023,
|
||||||
|
-1022,
|
||||||
|
-1023,
|
||||||
|
-1074,
|
||||||
|
1023,
|
||||||
|
}
|
||||||
|
|
||||||
func tolerance(a, b, e float64) bool {
|
func tolerance(a, b, e float64) bool {
|
||||||
d := a - b
|
d := a - b
|
||||||
if d < 0 {
|
if d < 0 {
|
||||||
@ -1792,6 +1878,11 @@ func TestFrexp(t *testing.T) {
|
|||||||
t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpSC[i], f, j, frexpSC[i].f, frexpSC[i].i)
|
t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpSC[i], f, j, frexpSC[i].f, frexpSC[i].i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for i := 0; i < len(vffrexpBC); i++ {
|
||||||
|
if f, j := Frexp(vffrexpBC[i]); !alike(frexpBC[i].f, f) || frexpBC[i].i != j {
|
||||||
|
t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpBC[i], f, j, frexpBC[i].f, frexpBC[i].i)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGamma(t *testing.T) {
|
func TestGamma(t *testing.T) {
|
||||||
@ -1833,6 +1924,11 @@ func TestIlogb(t *testing.T) {
|
|||||||
t.Errorf("Ilogb(%g) = %d, want %d", vflogbSC[i], e, ilogbSC[i])
|
t.Errorf("Ilogb(%g) = %d, want %d", vflogbSC[i], e, ilogbSC[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for i := 0; i < len(vffrexpBC); i++ {
|
||||||
|
if e := Ilogb(vffrexpBC[i]); int(logbBC[i]) != e {
|
||||||
|
t.Errorf("Ilogb(%g) = %d, want %d", vffrexpBC[i], e, int(logbBC[i]))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestJ0(t *testing.T) {
|
func TestJ0(t *testing.T) {
|
||||||
@ -1891,6 +1987,21 @@ func TestLdexp(t *testing.T) {
|
|||||||
t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpSC[i].f, frexpSC[i].i, f, vffrexpSC[i])
|
t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpSC[i].f, frexpSC[i].i, f, vffrexpSC[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for i := 0; i < len(vfldexpSC); i++ {
|
||||||
|
if f := Ldexp(vfldexpSC[i].f, vfldexpSC[i].i); !alike(ldexpSC[i], f) {
|
||||||
|
t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpSC[i].f, vfldexpSC[i].i, f, ldexpSC[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := 0; i < len(vffrexpBC); i++ {
|
||||||
|
if f := Ldexp(frexpBC[i].f, frexpBC[i].i); !alike(vffrexpBC[i], f) {
|
||||||
|
t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpBC[i].f, frexpBC[i].i, f, vffrexpBC[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := 0; i < len(vfldexpBC); i++ {
|
||||||
|
if f := Ldexp(vfldexpBC[i].f, vfldexpBC[i].i); !alike(ldexpBC[i], f) {
|
||||||
|
t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpBC[i].f, vfldexpBC[i].i, f, ldexpBC[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLgamma(t *testing.T) {
|
func TestLgamma(t *testing.T) {
|
||||||
@ -1934,6 +2045,11 @@ func TestLogb(t *testing.T) {
|
|||||||
t.Errorf("Logb(%g) = %g, want %g", vflogbSC[i], f, logbSC[i])
|
t.Errorf("Logb(%g) = %g, want %g", vflogbSC[i], f, logbSC[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for i := 0; i < len(vffrexpBC); i++ {
|
||||||
|
if e := Logb(vffrexpBC[i]); !alike(logbBC[i], e) {
|
||||||
|
t.Errorf("Ilogb(%g) = %g, want %g", vffrexpBC[i], e, logbBC[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLog10(t *testing.T) {
|
func TestLog10(t *testing.T) {
|
||||||
|
@ -47,3 +47,13 @@ func IsInf(f float64, sign int) bool {
|
|||||||
// return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf;
|
// return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf;
|
||||||
return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64
|
return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalize returns a normal number y and exponent exp
|
||||||
|
// satisfying x == y × 2**exp. It assumes x is finite and non-zero.
|
||||||
|
func normalize(x float64) (y float64, exp int) {
|
||||||
|
const SmallestNormal = 2.2250738585072014e-308 // 2**-1022
|
||||||
|
if Fabs(x) < SmallestNormal {
|
||||||
|
return x * (1 << 52), -52
|
||||||
|
}
|
||||||
|
return x, 0
|
||||||
|
}
|
||||||
|
@ -8,6 +8,11 @@ package math
|
|||||||
// and an integral power of two.
|
// and an integral power of two.
|
||||||
// It returns frac and exp satisfying f == frac × 2**exp,
|
// It returns frac and exp satisfying f == frac × 2**exp,
|
||||||
// with the absolute value of frac in the interval [½, 1).
|
// with the absolute value of frac in the interval [½, 1).
|
||||||
|
//
|
||||||
|
// Special cases are:
|
||||||
|
// Frexp(±0) = ±0, 0
|
||||||
|
// Frexp(±Inf) = ±Inf, 0
|
||||||
|
// Frexp(NaN) = NaN, 0
|
||||||
func Frexp(f float64) (frac float64, exp int) {
|
func Frexp(f float64) (frac float64, exp int) {
|
||||||
// TODO(rsc): Remove manual inlining of IsNaN, IsInf
|
// TODO(rsc): Remove manual inlining of IsNaN, IsInf
|
||||||
// when compiler does it for us
|
// when compiler does it for us
|
||||||
@ -18,8 +23,9 @@ func Frexp(f float64) (frac float64, exp int) {
|
|||||||
case f < -MaxFloat64 || f > MaxFloat64 || f != f: // IsInf(f, 0) || IsNaN(f):
|
case f < -MaxFloat64 || f > MaxFloat64 || f != f: // IsInf(f, 0) || IsNaN(f):
|
||||||
return f, 0
|
return f, 0
|
||||||
}
|
}
|
||||||
|
f, exp = normalize(f)
|
||||||
x := Float64bits(f)
|
x := Float64bits(f)
|
||||||
exp = int((x>>shift)&mask) - bias + 1
|
exp += int((x>>shift)&mask) - bias + 1
|
||||||
x &^= mask << shift
|
x &^= mask << shift
|
||||||
x |= (-1 + bias) << shift
|
x |= (-1 + bias) << shift
|
||||||
frac = Float64frombits(x)
|
frac = Float64frombits(x)
|
||||||
|
@ -6,6 +6,11 @@ package math
|
|||||||
|
|
||||||
// Ldexp is the inverse of Frexp.
|
// Ldexp is the inverse of Frexp.
|
||||||
// It returns frac × 2**exp.
|
// It returns frac × 2**exp.
|
||||||
|
//
|
||||||
|
// Special cases are:
|
||||||
|
// Ldexp(±0, exp) = ±0
|
||||||
|
// Ldexp(±Inf, exp) = ±Inf
|
||||||
|
// Ldexp(NaN, exp) = NaN
|
||||||
func Ldexp(frac float64, exp int) float64 {
|
func Ldexp(frac float64, exp int) float64 {
|
||||||
// TODO(rsc): Remove manual inlining of IsNaN, IsInf
|
// TODO(rsc): Remove manual inlining of IsNaN, IsInf
|
||||||
// when compiler does it for us
|
// when compiler does it for us
|
||||||
@ -13,21 +18,28 @@ func Ldexp(frac float64, exp int) float64 {
|
|||||||
switch {
|
switch {
|
||||||
case frac == 0:
|
case frac == 0:
|
||||||
return frac // correctly return -0
|
return frac // correctly return -0
|
||||||
case frac != frac: // IsNaN(frac):
|
case frac < -MaxFloat64 || frac > MaxFloat64 || frac != frac: // IsInf(frac, 0) || IsNaN(frac):
|
||||||
return NaN()
|
return frac
|
||||||
}
|
}
|
||||||
|
frac, e := normalize(frac)
|
||||||
|
exp += e
|
||||||
x := Float64bits(frac)
|
x := Float64bits(frac)
|
||||||
exp += int(x>>shift) & mask
|
exp += int(x>>shift)&mask - bias
|
||||||
if exp <= 0 {
|
if exp < -1074 {
|
||||||
return 0 // underflow
|
return Copysign(0, frac) // underflow
|
||||||
}
|
}
|
||||||
if exp >= mask { // overflow
|
if exp > 1023 { // overflow
|
||||||
if frac < 0 {
|
if frac < 0 {
|
||||||
return Inf(-1)
|
return Inf(-1)
|
||||||
}
|
}
|
||||||
return Inf(1)
|
return Inf(1)
|
||||||
}
|
}
|
||||||
|
var m float64 = 1
|
||||||
|
if exp < -1022 { // denormal
|
||||||
|
exp += 52
|
||||||
|
m = 1.0 / (1 << 52) // 2**-52
|
||||||
|
}
|
||||||
x &^= mask << shift
|
x &^= mask << shift
|
||||||
x |= uint64(exp) << shift
|
x |= uint64(exp+bias) << shift
|
||||||
return Float64frombits(x)
|
return m * Float64frombits(x)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
package math
|
package math
|
||||||
|
|
||||||
// Logb(x) returns the binary exponent of non-zero x.
|
// Logb(x) returns the binary exponent of x.
|
||||||
//
|
//
|
||||||
// Special cases are:
|
// Special cases are:
|
||||||
// Logb(±Inf) = +Inf
|
// Logb(±Inf) = +Inf
|
||||||
@ -22,10 +22,10 @@ func Logb(x float64) float64 {
|
|||||||
case x != x: // IsNaN(x):
|
case x != x: // IsNaN(x):
|
||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
return float64(int((Float64bits(x)>>shift)&mask) - bias)
|
return float64(ilogb(x))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ilogb(x) returns the binary exponent of non-zero x as an integer.
|
// Ilogb(x) returns the binary exponent of x as an integer.
|
||||||
//
|
//
|
||||||
// Special cases are:
|
// Special cases are:
|
||||||
// Ilogb(±Inf) = MaxInt32
|
// Ilogb(±Inf) = MaxInt32
|
||||||
@ -43,5 +43,12 @@ func Ilogb(x float64) int {
|
|||||||
case x < -MaxFloat64 || x > MaxFloat64: // IsInf(x, 0):
|
case x < -MaxFloat64 || x > MaxFloat64: // IsInf(x, 0):
|
||||||
return MaxInt32
|
return MaxInt32
|
||||||
}
|
}
|
||||||
return int((Float64bits(x)>>shift)&mask) - bias
|
return ilogb(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// logb returns the binary exponent of x. It assumes x is finite and
|
||||||
|
// non-zero.
|
||||||
|
func ilogb(x float64) int {
|
||||||
|
x, exp := normalize(x)
|
||||||
|
return int((Float64bits(x)>>shift)&mask) - bias + exp
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user