1
0
mirror of https://github.com/golang/go synced 2024-11-12 07:10:22 -07:00

math: amd64 versions of exp and fabs

Benchmark of exp to 28 ns/op from 64 ns/op,
on 2.53GHz Intel Core 2 Duo.

R=rsc
CC=golang-dev
https://golang.org/cl/1594041
This commit is contained in:
Charles L. Dorian 2010-06-30 14:44:27 -07:00 committed by Russ Cox
parent 0e25775518
commit a0117bafa0
6 changed files with 143 additions and 13 deletions

View File

@ -7,6 +7,8 @@ include ../../Make.$(GOARCH)
TARG=math TARG=math
OFILES_amd64=\ OFILES_amd64=\
exp_amd64.$O\
fabs_amd64.$O\
fdim_amd64.$O\ fdim_amd64.$O\
sqrt_amd64.$O\ sqrt_amd64.$O\
@ -48,6 +50,7 @@ ALLGOFILES=\
copysign.go\ copysign.go\
erf.go\ erf.go\
exp.go\ exp.go\
exp2.go\
expm1.go\ expm1.go\
fabs.go\ fabs.go\
fdim.go\ fdim.go\

View File

@ -2343,9 +2343,9 @@ func BenchmarkExp2(b *testing.B) {
} }
} }
func BenchmarkFloor(b *testing.B) { func BenchmarkFabs(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Floor(.5) Fabs(.5)
} }
} }
@ -2355,6 +2355,12 @@ func BenchmarkFdim(b *testing.B) {
} }
} }
func BenchmarkFloor(b *testing.B) {
for i := 0; i < b.N; i++ {
Floor(.5)
}
}
func BenchmarkFmax(b *testing.B) { func BenchmarkFmax(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Fmax(10, 3) Fmax(10, 3)
@ -2445,18 +2451,18 @@ func BenchmarkLogb(b *testing.B) {
} }
} }
func BenchmarkLog10(b *testing.B) {
for i := 0; i < b.N; i++ {
Log10(.5)
}
}
func BenchmarkLog1p(b *testing.B) { func BenchmarkLog1p(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Log1p(.5) Log1p(.5)
} }
} }
func BenchmarkLog10(b *testing.B) {
for i := 0; i < b.N; i++ {
Log10(.5)
}
}
func BenchmarkLog2(b *testing.B) { func BenchmarkLog2(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Log2(.5) Log2(.5)

View File

@ -139,8 +139,3 @@ func Exp(x float64) float64 {
// TODO(rsc): make sure Ldexp can handle boundary k // TODO(rsc): make sure Ldexp can handle boundary k
return Ldexp(y, k) return Ldexp(y, k)
} }
// Exp2 returns 2**x, the base-2 exponential of x.
//
// Special cases are the same as Exp.
func Exp2(x float64) float64 { return Exp(x * Ln2) }

10
src/pkg/math/exp2.go Normal file
View File

@ -0,0 +1,10 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package math
// Exp2 returns 2**x, the base-2 exponential of x.
//
// Special cases are the same as Exp.
func Exp2(x float64) float64 { return Exp(x * Ln2) }

104
src/pkg/math/exp_amd64.s Normal file
View File

@ -0,0 +1,104 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The method is based on a paper by Naoki Shibata: "Efficient evaluation
// methods of elementary functions suitable for SIMD computation", Proc.
// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
// (May 2010). The paper is available at
// http://www.springerlink.com/content/340228x165742104/
//
// The original code and the constants below are from the author's
// implementation available at http://freshmeat.net/projects/sleef.
// The README file says, "The software is in public domain.
// You can use the software without any obligation."
//
// This code is a simplified version of the original.
#define LN2 0.6931471805599453094172321214581766 // log_e(2)
#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
// func Exp(x float64) float64
TEXT ·Exp(SB),7,$0
// test bits for not-finite
MOVQ x+0(FP), AX
MOVQ $0x7ff0000000000000, BX
ANDQ BX, AX
CMPQ BX, AX
JEQ not_finite
MOVSD x+0(FP), X0
MOVSD $LOG2E, X1
MULSD X0, X1
CVTTSD2SQ X1, BX // BX = exponent
CVTSQ2SD BX, X1
MOVSD $LN2U, X2
MULSD X1, X2
SUBSD X2, X0
MOVSD $LN2L, X2
MULSD X1, X2
SUBSD X2, X0
// reduce argument
MOVSD $0.0625, X1
MULSD X1, X0
// Taylor series evaluation
MOVSD $2.4801587301587301587e-5, X1
MULSD X0, X1
MOVSD $1.9841269841269841270e-4, X2
ADDSD X2, X1
MULSD X0, X1
MOVSD $1.3888888888888888889e-3, X2
ADDSD X2, X1
MULSD X0, X1
MOVSD $8.3333333333333333333e-3, X2
ADDSD X2, X1
MULSD X0, X1
MOVSD $4.1666666666666666667e-2, X2
ADDSD X2, X1
MULSD X0, X1
MOVSD $1.6666666666666666667e-1, X2
ADDSD X2, X1
MULSD X0, X1
MOVSD $0.5, X2
ADDSD X2, X1
MULSD X0, X1
MOVSD $1.0, X2
ADDSD X2, X1
MULSD X1, X0
MOVSD $2.0, X1
ADDSD X0, X1
MULSD X1, X0
MOVSD $2.0, X1
ADDSD X0, X1
MULSD X1, X0
MOVSD $2.0, X1
ADDSD X0, X1
MULSD X1, X0
MOVSD $2.0, X1
ADDSD X0, X1
MULSD X1, X0
MOVSD $1.0, X1
ADDSD X1, X0
// return ldexp(fr, exp)
MOVQ $0x3ff, AX // bias + 1
ADDQ AX, BX
MOVQ BX, X1
MOVQ $52, AX // shift
MOVQ AX, X2
PSLLQ X2, X1
MULSD X1, X0
MOVSD X0, r+8(FP)
RET
not_finite:
// test bits for -Inf
MOVQ x+0(FP), AX
MOVQ $0xfff0000000000000, BX
CMPQ BX, AX
JNE not_neginf
XORQ AX, AX
MOVQ AX, r+8(FP)
RET
not_neginf:
MOVQ AX, r+8(FP)
RET

12
src/pkg/math/fabs_amd64.s Normal file
View File

@ -0,0 +1,12 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// func Fabs(x float64) float64
TEXT ·Fabs(SB),7,$0
MOVQ $(1<<63), BX
MOVQ BX, X0 // movsd $(-0.0), x0
MOVSD x+0(FP), X1
ANDNPD X1, X0
MOVSD X0, r+8(FP)
RET