From fd1db67e87852eb9f9721a7c31c00b8aa5883a62 Mon Sep 17 00:00:00 2001 From: "Charles L. Dorian" Date: Fri, 8 Jan 2010 14:12:10 -0800 Subject: [PATCH] math: special cases for Atan, Asin and Acos Added tests for NaN and out-of-range values. Combined asin.go and atan.go into atan.go. R=rsc CC=golang-dev https://golang.org/cl/180065 --- src/pkg/math/all_test.go | 107 +++++++++++++++++-------- src/pkg/math/asin.go | 11 +-- src/pkg/math/atan.go | 3 +- src/pkg/math/sqrt.go | 163 ++++++++++++++++++++++++++++----------- 4 files changed, 198 insertions(+), 86 deletions(-) diff --git a/src/pkg/math/all_test.go b/src/pkg/math/all_test.go index dc6177dad61..58728801b48 100644 --- a/src/pkg/math/all_test.go +++ b/src/pkg/math/all_test.go @@ -22,6 +22,18 @@ var vf = []float64{ 1.8253080916808550e+00, -8.6859247685756013e+00, } +var acos = []float64{ + 1.0496193546107222e+00, + 6.858401281366443e-01, + 1.598487871457716e+00, + 2.095619936147586e+00, + 2.7053008467824158e-01, + 1.2738121680361776e+00, + 1.0205369421140630e+00, + 1.2945003481781246e+00, + 1.3872364345374451e+00, + 2.6231510803970464e+00, +} var asin = []float64{ 5.2117697218417440e-01, 8.8495619865825236e-01, @@ -154,39 +166,26 @@ var tanh = []float64{ 9.4936501296239700e-01, -9.9999994291374019e-01, } -var vfsin = []float64{ - NaN(), - Inf(-1), - 0, - Inf(1), -} -var vfasin = []float64{ + +// arguments and expected results for special cases +var vfasinSC = []float64{ NaN(), -Pi, - 0, Pi, } -var vf1 = []float64{ +var asinSC = []float64{ + NaN(), + NaN(), NaN(), - Inf(-1), - -Pi, - -1, - 0, - 1, - Pi, - Inf(1), } -var vfhypot = [][2]float64{ - [2]float64{Inf(-1), 1}, - [2]float64{Inf(1), 1}, - [2]float64{1, Inf(-1)}, - [2]float64{1, Inf(1)}, - [2]float64{NaN(), Inf(-1)}, - [2]float64{NaN(), Inf(1)}, - [2]float64{1, NaN()}, - [2]float64{NaN(), 1}, + +var vfatanSC = []float64{ + NaN(), } -var vf2 = [][2]float64{ +var atanSC = []float64{ + NaN(), +} +var vfpowSC = [][2]float64{ [2]float64{-Pi, Pi}, [2]float64{-Pi, -Pi}, [2]float64{Inf(-1), 3}, @@ -230,7 +229,7 @@ var vf2 = [][2]float64{ [2]float64{Inf(1), 0}, [2]float64{NaN(), 0}, } -var pow2 = []float64{ +var powSC = []float64{ NaN(), NaN(), Inf(-1), @@ -306,12 +305,31 @@ func alike(a, b float64) bool { return false } +func TestAcos(t *testing.T) { + for i := 0; i < len(vf); i++ { + // if f := Acos(vf[i] / 10); !veryclose(acos[i], f) { + if f := Acos(vf[i] / 10); !close(acos[i], f) { + t.Errorf("Acos(%g) = %g, want %g\n", vf[i]/10, f, acos[i]) + } + } + for i := 0; i < len(vfasinSC); i++ { + if f := Acos(vfasinSC[i]); !alike(asinSC[i], f) { + t.Errorf("Acos(%g) = %g, want %g\n", vfasinSC[i], f, asinSC[i]) + } + } +} + func TestAsin(t *testing.T) { for i := 0; i < len(vf); i++ { if f := Asin(vf[i] / 10); !veryclose(asin[i], f) { t.Errorf("Asin(%g) = %g, want %g\n", vf[i]/10, f, asin[i]) } } + for i := 0; i < len(vfasinSC); i++ { + if f := Asin(vfasinSC[i]); !alike(asinSC[i], f) { + t.Errorf("Asin(%g) = %g, want %g\n", vfasinSC[i], f, asinSC[i]) + } + } } func TestAtan(t *testing.T) { @@ -320,6 +338,11 @@ func TestAtan(t *testing.T) { t.Errorf("Atan(%g) = %g, want %g\n", vf[i], f, atan[i]) } } + for i := 0; i < len(vfatanSC); i++ { + if f := Atan(vfatanSC[i]); !alike(atanSC[i], f) { + t.Errorf("Atan(%g) = %g, want %g\n", vfatanSC[i], f, atanSC[i]) + } + } } func TestExp(t *testing.T) { @@ -356,9 +379,9 @@ func TestPow(t *testing.T) { t.Errorf("Pow(10, %.17g) = %.17g, want %.17g\n", vf[i], f, pow[i]) } } - for i := 0; i < len(vf2); i++ { - if f := Pow(vf2[i][0], vf2[i][1]); !alike(pow2[i], f) { - t.Errorf("Pow(%.17g, %.17g) = %.17g, want %.17g\n", vf2[i][0], vf2[i][1], f, pow2[i]) + for i := 0; i < len(vfpowSC); i++ { + if f := Pow(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) { + t.Errorf("Pow(%.17g, %.17g) = %.17g, want %.17g\n", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i]) } } } @@ -421,7 +444,7 @@ func TestLargeSin(t *testing.T) { f1 := Sin(vf[i]) f2 := Sin(vf[i] + large) if !kindaclose(f1, f2) { - t.Errorf("Sin(%g) = %g, want %g\n", vf[i]+large, f1, f2) + t.Errorf("Sin(%g) = %g, want %g\n", vf[i]+large, f2, f1) } } } @@ -432,7 +455,7 @@ func TestLargeCos(t *testing.T) { f1 := Cos(vf[i]) f2 := Cos(vf[i] + large) if !kindaclose(f1, f2) { - t.Errorf("Cos(%g) = %g, want %g\n", vf[i]+large, f1, f2) + t.Errorf("Cos(%g) = %g, want %g\n", vf[i]+large, f2, f1) } } } @@ -444,7 +467,7 @@ func TestLargeTan(t *testing.T) { f1 := Tan(vf[i]) f2 := Tan(vf[i] + large) if !kindaclose(f1, f2) { - t.Errorf("Tan(%g) = %g, want %g\n", vf[i]+large, f1, f2) + t.Errorf("Tan(%g) = %g, want %g\n", vf[i]+large, f2, f1) } } } @@ -488,3 +511,21 @@ func BenchmarkPowFrac(b *testing.B) { Pow(2.5, 1.5) } } + +func BenchmarkAtan(b *testing.B) { + for i := 0; i < b.N; i++ { + Atan(.5) + } +} + +func BenchmarkAsin(b *testing.B) { + for i := 0; i < b.N; i++ { + Asin(.5) + } +} + +func BenchmarkAcos(b *testing.B) { + for i := 0; i < b.N; i++ { + Acos(.5) + } +} diff --git a/src/pkg/math/asin.go b/src/pkg/math/asin.go index a138aac06f7..439673a3a7e 100644 --- a/src/pkg/math/asin.go +++ b/src/pkg/math/asin.go @@ -25,9 +25,9 @@ func Asin(x float64) float64 { temp := Sqrt(1 - x*x) if x > 0.7 { - temp = Pi/2 - Atan(temp/x) + temp = Pi/2 - satan(temp/x) } else { - temp = Atan(x / temp) + temp = satan(x / temp) } if sign { @@ -37,9 +37,4 @@ func Asin(x float64) float64 { } // Acos returns the arc cosine of x. -func Acos(x float64) float64 { - if x > 1 || x < -1 { - return NaN() - } - return Pi/2 - Asin(x) -} +func Acos(x float64) float64 { return Pi/2 - Asin(x) } diff --git a/src/pkg/math/atan.go b/src/pkg/math/atan.go index c811a39d949..99a986ac77a 100644 --- a/src/pkg/math/atan.go +++ b/src/pkg/math/atan.go @@ -4,7 +4,6 @@ package math - /* * floating-point arctangent * @@ -52,7 +51,7 @@ func satan(arg float64) float64 { } /* - * atan makes its argument positive and + * Atan makes its argument positive and * calls the inner routine satan. */ diff --git a/src/pkg/math/sqrt.go b/src/pkg/math/sqrt.go index 1e2209f2a88..a3a3119fedf 100644 --- a/src/pkg/math/sqrt.go +++ b/src/pkg/math/sqrt.go @@ -4,13 +4,83 @@ package math - -/* - * sqrt returns the square root of its floating - * point argument. Newton's method. - * - * calls frexp - */ +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_sqrt(x) +// Return correctly rounded sqrt. +// ----------------------------------------- +// | Use the hardware sqrt if you have one | +// ----------------------------------------- +// Method: +// Bit by bit method using integer arithmetic. (Slow, but portable) +// 1. Normalization +// Scale x to y in [1,4) with even powers of 2: +// find an integer k such that 1 <= (y=x*2^(2k)) < 4, then +// sqrt(x) = 2^k * sqrt(y) +// 2. Bit by bit computation +// Let q = sqrt(y) truncated to i bit after binary point (q = 1), +// i 0 +// i+1 2 +// s = 2*q , and y = 2 * ( y - q ). (1) +// i i i i +// +// To compute q from q , one checks whether +// i+1 i +// +// -(i+1) 2 +// (q + 2 ) <= y. (2) +// i +// -(i+1) +// If (2) is false, then q = q ; otherwise q = q + 2 . +// i+1 i i+1 i +// +// With some algebric manipulation, it is not difficult to see +// that (2) is equivalent to +// -(i+1) +// s + 2 <= y (3) +// i i +// +// The advantage of (3) is that s and y can be computed by +// i i +// the following recurrence formula: +// if (3) is false +// +// s = s , y = y ; (4) +// i+1 i i+1 i +// +// otherwise, +// -i -(i+1) +// s = s + 2 , y = y - s - 2 (5) +// i+1 i i+1 i i +// +// One may easily use induction to prove (4) and (5). +// Note. Since the left hand side of (3) contain only i+2 bits, +// it does not necessary to do a full (53-bit) comparison +// in (3). +// 3. Final rounding +// After generating the 53 bits result, we compute one more bit. +// Together with the remainder, we can decide whether the +// result is exact, bigger than 1/2ulp, or less than 1/2ulp +// (it will never equal to 1/2ulp). +// The rounding mode can be detected by checking whether +// huge + tiny is equal to huge, and whether huge - tiny is +// equal to huge for some floating point number "huge" and "tiny". +// +// +// Notes: Rounding mode detection omitted. The constants "mask", "shift", +// and "bias" are found in src/pkg/math/bits.go // Sqrt returns the square root of x. // @@ -18,48 +88,55 @@ package math // Sqrt(+Inf) = +Inf // Sqrt(0) = 0 // Sqrt(x < 0) = NaN +// Sqrt(NaN) = NaN func Sqrt(x float64) float64 { - if IsInf(x, 1) { + // special cases + // TODO(rsc): Remove manual inlining of IsNaN, IsInf + // when compiler does it for us + switch { + case x != x || x > MaxFloat64: // IsNaN(x) || IsInf(x, 1): return x - } - - if x <= 0 { - if x < 0 { - return NaN() - } + case x == 0: return 0 + case x < 0: + return NaN() } - - y, exp := Frexp(x) - for y < 0.5 { - y = y * 2 - exp = exp - 1 + ix := Float64bits(x) + // normalize x + exp := int((ix >> shift) & mask) + if exp == 0 { // subnormal x + for ix&1< 60 { - temp = temp * float64(1<<30) - exp = exp - 60 + exp >>= 1 // exp = exp/2, exponent of square root + // generate sqrt(x) bit by bit + ix <<= 1 + var q, s uint64 // q = sqrt(x) + r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB + for r != 0 { + t := s + r + if t <= ix { + s = t + r + ix -= t + q += r + } + ix <<= 1 + r >>= 1 } - for exp < -60 { - temp = temp / float64(1<<30) - exp = exp + 60 + // final rounding + if ix != 0 { // remainder, result not exact + q += q & 1 // round according to extra bit } - if exp >= 0 { - exp = 1 << uint(exp/2) - temp = temp * float64(exp) - } else { - exp = 1 << uint(-exp/2) - temp = temp / float64(exp) - } - - for i := 0; i <= 4; i++ { - temp = 0.5 * (temp + x/temp) - } - return temp + ix = q>>1 + 0x3fe0000000000000 // q/2 + 0.5 + ix += uint64(exp) << shift + return Float64frombits(ix) }