mirror of
https://github.com/golang/go
synced 2024-11-21 14:14:40 -07:00
math: amd64 versions of exp and fabs
Benchmark of exp to 28 ns/op from 64 ns/op, on 2.53GHz Intel Core 2 Duo. R=rsc CC=golang-dev https://golang.org/cl/1594041
This commit is contained in:
parent
0e25775518
commit
a0117bafa0
@ -7,6 +7,8 @@ include ../../Make.$(GOARCH)
|
||||
TARG=math
|
||||
|
||||
OFILES_amd64=\
|
||||
exp_amd64.$O\
|
||||
fabs_amd64.$O\
|
||||
fdim_amd64.$O\
|
||||
sqrt_amd64.$O\
|
||||
|
||||
@ -48,6 +50,7 @@ ALLGOFILES=\
|
||||
copysign.go\
|
||||
erf.go\
|
||||
exp.go\
|
||||
exp2.go\
|
||||
expm1.go\
|
||||
fabs.go\
|
||||
fdim.go\
|
||||
|
@ -2343,9 +2343,9 @@ func BenchmarkExp2(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFloor(b *testing.B) {
|
||||
func BenchmarkFabs(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Floor(.5)
|
||||
Fabs(.5)
|
||||
}
|
||||
}
|
||||
|
||||
@ -2355,6 +2355,12 @@ func BenchmarkFdim(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFloor(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Floor(.5)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFmax(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Fmax(10, 3)
|
||||
@ -2445,18 +2451,18 @@ func BenchmarkLogb(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLog10(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Log10(.5)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLog1p(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Log1p(.5)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLog10(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Log10(.5)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLog2(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Log2(.5)
|
||||
|
@ -139,8 +139,3 @@ func Exp(x float64) float64 {
|
||||
// TODO(rsc): make sure Ldexp can handle boundary k
|
||||
return Ldexp(y, k)
|
||||
}
|
||||
|
||||
// Exp2 returns 2**x, the base-2 exponential of x.
|
||||
//
|
||||
// Special cases are the same as Exp.
|
||||
func Exp2(x float64) float64 { return Exp(x * Ln2) }
|
||||
|
10
src/pkg/math/exp2.go
Normal file
10
src/pkg/math/exp2.go
Normal file
@ -0,0 +1,10 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package math
|
||||
|
||||
// Exp2 returns 2**x, the base-2 exponential of x.
|
||||
//
|
||||
// Special cases are the same as Exp.
|
||||
func Exp2(x float64) float64 { return Exp(x * Ln2) }
|
104
src/pkg/math/exp_amd64.s
Normal file
104
src/pkg/math/exp_amd64.s
Normal file
@ -0,0 +1,104 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// The method is based on a paper by Naoki Shibata: "Efficient evaluation
|
||||
// methods of elementary functions suitable for SIMD computation", Proc.
|
||||
// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
|
||||
// (May 2010). The paper is available at
|
||||
// http://www.springerlink.com/content/340228x165742104/
|
||||
//
|
||||
// The original code and the constants below are from the author's
|
||||
// implementation available at http://freshmeat.net/projects/sleef.
|
||||
// The README file says, "The software is in public domain.
|
||||
// You can use the software without any obligation."
|
||||
//
|
||||
// This code is a simplified version of the original.
|
||||
|
||||
#define LN2 0.6931471805599453094172321214581766 // log_e(2)
|
||||
#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
|
||||
#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
|
||||
#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
|
||||
|
||||
// func Exp(x float64) float64
|
||||
TEXT ·Exp(SB),7,$0
|
||||
// test bits for not-finite
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ $0x7ff0000000000000, BX
|
||||
ANDQ BX, AX
|
||||
CMPQ BX, AX
|
||||
JEQ not_finite
|
||||
MOVSD x+0(FP), X0
|
||||
MOVSD $LOG2E, X1
|
||||
MULSD X0, X1
|
||||
CVTTSD2SQ X1, BX // BX = exponent
|
||||
CVTSQ2SD BX, X1
|
||||
MOVSD $LN2U, X2
|
||||
MULSD X1, X2
|
||||
SUBSD X2, X0
|
||||
MOVSD $LN2L, X2
|
||||
MULSD X1, X2
|
||||
SUBSD X2, X0
|
||||
// reduce argument
|
||||
MOVSD $0.0625, X1
|
||||
MULSD X1, X0
|
||||
// Taylor series evaluation
|
||||
MOVSD $2.4801587301587301587e-5, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $1.9841269841269841270e-4, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $1.3888888888888888889e-3, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $8.3333333333333333333e-3, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $4.1666666666666666667e-2, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $1.6666666666666666667e-1, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $0.5, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X0, X1
|
||||
MOVSD $1.0, X2
|
||||
ADDSD X2, X1
|
||||
MULSD X1, X0
|
||||
MOVSD $2.0, X1
|
||||
ADDSD X0, X1
|
||||
MULSD X1, X0
|
||||
MOVSD $2.0, X1
|
||||
ADDSD X0, X1
|
||||
MULSD X1, X0
|
||||
MOVSD $2.0, X1
|
||||
ADDSD X0, X1
|
||||
MULSD X1, X0
|
||||
MOVSD $2.0, X1
|
||||
ADDSD X0, X1
|
||||
MULSD X1, X0
|
||||
MOVSD $1.0, X1
|
||||
ADDSD X1, X0
|
||||
// return ldexp(fr, exp)
|
||||
MOVQ $0x3ff, AX // bias + 1
|
||||
ADDQ AX, BX
|
||||
MOVQ BX, X1
|
||||
MOVQ $52, AX // shift
|
||||
MOVQ AX, X2
|
||||
PSLLQ X2, X1
|
||||
MULSD X1, X0
|
||||
MOVSD X0, r+8(FP)
|
||||
RET
|
||||
not_finite:
|
||||
// test bits for -Inf
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ $0xfff0000000000000, BX
|
||||
CMPQ BX, AX
|
||||
JNE not_neginf
|
||||
XORQ AX, AX
|
||||
MOVQ AX, r+8(FP)
|
||||
RET
|
||||
not_neginf:
|
||||
MOVQ AX, r+8(FP)
|
||||
RET
|
12
src/pkg/math/fabs_amd64.s
Normal file
12
src/pkg/math/fabs_amd64.s
Normal file
@ -0,0 +1,12 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// func Fabs(x float64) float64
|
||||
TEXT ·Fabs(SB),7,$0
|
||||
MOVQ $(1<<63), BX
|
||||
MOVQ BX, X0 // movsd $(-0.0), x0
|
||||
MOVSD x+0(FP), X1
|
||||
ANDNPD X1, X0
|
||||
MOVSD X0, r+8(FP)
|
||||
RET
|
Loading…
Reference in New Issue
Block a user