mirror of
https://github.com/golang/go
synced 2024-11-18 12:14:42 -07:00
math: faster Cbrt
Old 45.3 ns/op, new 19.9 ns/op. Change-Id: If2a201981dcc259846631ecbc694c401e0a80287 Reviewed-on: https://go-review.googlesource.com/5260 Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
6a10f720f2
commit
b48d2a5f25
@ -4,13 +4,17 @@
|
|||||||
|
|
||||||
package math
|
package math
|
||||||
|
|
||||||
/*
|
// The go code is a modified version of the original C code from
|
||||||
The algorithm is based in part on "Optimal Partitioning of
|
// http://www.netlib.org/fdlibm/s_cbrt.c and came with this notice.
|
||||||
Newton's Method for Calculating Roots", by Gunter Meinardus
|
//
|
||||||
and G. D. Taylor, Mathematics of Computation © 1980 American
|
// ====================================================
|
||||||
Mathematical Society.
|
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
(http://www.jstor.org/stable/2006387?seq=9, accessed 11-Feb-2010)
|
//
|
||||||
*/
|
// Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
// Permission to use, copy, modify, and distribute this
|
||||||
|
// software is freely granted, provided that this notice
|
||||||
|
// is preserved.
|
||||||
|
// ====================================================
|
||||||
|
|
||||||
// Cbrt returns the cube root of x.
|
// Cbrt returns the cube root of x.
|
||||||
//
|
//
|
||||||
@ -20,57 +24,54 @@ package math
|
|||||||
// Cbrt(NaN) = NaN
|
// Cbrt(NaN) = NaN
|
||||||
func Cbrt(x float64) float64 {
|
func Cbrt(x float64) float64 {
|
||||||
const (
|
const (
|
||||||
A1 = 1.662848358e-01
|
B1 = 715094163 // (682-0.03306235651)*2**20
|
||||||
A2 = 1.096040958e+00
|
B2 = 696219795 // (664-0.03306235651)*2**20
|
||||||
A3 = 4.105032829e-01
|
C = 5.42857142857142815906e-01 // 19/35 = 0x3FE15F15F15F15F1
|
||||||
A4 = 5.649335816e-01
|
D = -7.05306122448979611050e-01 // -864/1225 = 0xBFE691DE2532C834
|
||||||
B1 = 2.639607233e-01
|
E = 1.41428571428571436819e+00 // 99/70 = 0x3FF6A0EA0EA0EA0F
|
||||||
B2 = 8.699282849e-01
|
F = 1.60714285714285720630e+00 // 45/28 = 0x3FF9B6DB6DB6DB6E
|
||||||
B3 = 1.629083358e-01
|
G = 3.57142857142857150787e-01 // 5/14 = 0x3FD6DB6DB6DB6DB7
|
||||||
B4 = 2.824667908e-01
|
SmallestNormal = 2.22507385850720138309e-308 // 2**-1022 = 0x0010000000000000
|
||||||
C1 = 4.190115298e-01
|
|
||||||
C2 = 6.904625373e-01
|
|
||||||
C3 = 6.46502159e-02
|
|
||||||
C4 = 1.412333954e-01
|
|
||||||
)
|
)
|
||||||
// special cases
|
// special cases
|
||||||
switch {
|
switch {
|
||||||
case x == 0 || IsNaN(x) || IsInf(x, 0):
|
case x == 0 || IsNaN(x) || IsInf(x, 0):
|
||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
|
|
||||||
sign := false
|
sign := false
|
||||||
if x < 0 {
|
if x < 0 {
|
||||||
x = -x
|
x = -x
|
||||||
sign = true
|
sign = true
|
||||||
}
|
}
|
||||||
// Reduce argument and estimate cube root
|
|
||||||
f, e := Frexp(x) // 0.5 <= f < 1.0
|
|
||||||
m := e % 3
|
|
||||||
if m > 0 {
|
|
||||||
m -= 3
|
|
||||||
e -= m // e is multiple of 3
|
|
||||||
}
|
|
||||||
switch m {
|
|
||||||
case 0: // 0.5 <= f < 1.0
|
|
||||||
f = A1*f + A2 - A3/(A4+f)
|
|
||||||
case -1:
|
|
||||||
f *= 0.5 // 0.25 <= f < 0.5
|
|
||||||
f = B1*f + B2 - B3/(B4+f)
|
|
||||||
default: // m == -2
|
|
||||||
f *= 0.25 // 0.125 <= f < 0.25
|
|
||||||
f = C1*f + C2 - C3/(C4+f)
|
|
||||||
}
|
|
||||||
y := Ldexp(f, e/3) // e/3 = exponent of cube root
|
|
||||||
|
|
||||||
// Iterate
|
// rough cbrt to 5 bits
|
||||||
s := y * y * y
|
t := Float64frombits(Float64bits(x)/3 + B1<<32)
|
||||||
t := s + x
|
if x < SmallestNormal {
|
||||||
y *= (t + x) / (s + t)
|
// subnormal number
|
||||||
// Reiterate
|
t = float64(1 << 54) // set t= 2**54
|
||||||
s = (y*y*y - x) / x
|
t *= x
|
||||||
y -= y * (((14.0/81.0)*s-(2.0/9.0))*s + (1.0 / 3.0)) * s
|
t = Float64frombits(Float64bits(t)/3 + B2<<32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// new cbrt to 23 bits
|
||||||
|
r := t * t / x
|
||||||
|
s := C + r*t
|
||||||
|
t *= G + F/(s+E+D/s)
|
||||||
|
|
||||||
|
// chop to 22 bits, make larger than cbrt(x)
|
||||||
|
t = Float64frombits(Float64bits(t)&(0xFFFFFFFFC<<28) + 1<<30)
|
||||||
|
|
||||||
|
// one step newton iteration to 53 bits with error less than 0.667ulps
|
||||||
|
s = t * t // t*t is exact
|
||||||
|
r = x / s
|
||||||
|
w := t + t
|
||||||
|
r = (r - t) / (w + r) // r-s is exact
|
||||||
|
t = t + t*r
|
||||||
|
|
||||||
|
// restore the sign bit
|
||||||
if sign {
|
if sign {
|
||||||
y = -y
|
t = -t
|
||||||
}
|
}
|
||||||
return y
|
return t
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user