mirror of
https://github.com/golang/go
synced 2024-11-12 07:10:22 -07:00
math: amd64 versions of exp and fabs
Benchmark of exp to 28 ns/op from 64 ns/op, on 2.53GHz Intel Core 2 Duo. R=rsc CC=golang-dev https://golang.org/cl/1594041
This commit is contained in:
parent
0e25775518
commit
a0117bafa0
@ -7,6 +7,8 @@ include ../../Make.$(GOARCH)
|
|||||||
TARG=math
|
TARG=math
|
||||||
|
|
||||||
OFILES_amd64=\
|
OFILES_amd64=\
|
||||||
|
exp_amd64.$O\
|
||||||
|
fabs_amd64.$O\
|
||||||
fdim_amd64.$O\
|
fdim_amd64.$O\
|
||||||
sqrt_amd64.$O\
|
sqrt_amd64.$O\
|
||||||
|
|
||||||
@ -48,6 +50,7 @@ ALLGOFILES=\
|
|||||||
copysign.go\
|
copysign.go\
|
||||||
erf.go\
|
erf.go\
|
||||||
exp.go\
|
exp.go\
|
||||||
|
exp2.go\
|
||||||
expm1.go\
|
expm1.go\
|
||||||
fabs.go\
|
fabs.go\
|
||||||
fdim.go\
|
fdim.go\
|
||||||
|
@ -2343,9 +2343,9 @@ func BenchmarkExp2(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkFloor(b *testing.B) {
|
func BenchmarkFabs(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
Floor(.5)
|
Fabs(.5)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2355,6 +2355,12 @@ func BenchmarkFdim(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkFloor(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
Floor(.5)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkFmax(b *testing.B) {
|
func BenchmarkFmax(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
Fmax(10, 3)
|
Fmax(10, 3)
|
||||||
@ -2445,18 +2451,18 @@ func BenchmarkLogb(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkLog10(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
Log10(.5)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkLog1p(b *testing.B) {
|
func BenchmarkLog1p(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
Log1p(.5)
|
Log1p(.5)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkLog10(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
Log10(.5)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkLog2(b *testing.B) {
|
func BenchmarkLog2(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
Log2(.5)
|
Log2(.5)
|
||||||
|
@ -139,8 +139,3 @@ func Exp(x float64) float64 {
|
|||||||
// TODO(rsc): make sure Ldexp can handle boundary k
|
// TODO(rsc): make sure Ldexp can handle boundary k
|
||||||
return Ldexp(y, k)
|
return Ldexp(y, k)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exp2 returns 2**x, the base-2 exponential of x.
|
|
||||||
//
|
|
||||||
// Special cases are the same as Exp.
|
|
||||||
func Exp2(x float64) float64 { return Exp(x * Ln2) }
|
|
||||||
|
10
src/pkg/math/exp2.go
Normal file
10
src/pkg/math/exp2.go
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
// Copyright 2010 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package math
|
||||||
|
|
||||||
|
// Exp2 returns 2**x, the base-2 exponential of x.
|
||||||
|
//
|
||||||
|
// Special cases are the same as Exp.
|
||||||
|
func Exp2(x float64) float64 { return Exp(x * Ln2) }
|
104
src/pkg/math/exp_amd64.s
Normal file
104
src/pkg/math/exp_amd64.s
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
// Copyright 2010 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// The method is based on a paper by Naoki Shibata: "Efficient evaluation
|
||||||
|
// methods of elementary functions suitable for SIMD computation", Proc.
|
||||||
|
// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
|
||||||
|
// (May 2010). The paper is available at
|
||||||
|
// http://www.springerlink.com/content/340228x165742104/
|
||||||
|
//
|
||||||
|
// The original code and the constants below are from the author's
|
||||||
|
// implementation available at http://freshmeat.net/projects/sleef.
|
||||||
|
// The README file says, "The software is in public domain.
|
||||||
|
// You can use the software without any obligation."
|
||||||
|
//
|
||||||
|
// This code is a simplified version of the original.
|
||||||
|
|
||||||
|
#define LN2 0.6931471805599453094172321214581766 // log_e(2)
|
||||||
|
#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
|
||||||
|
#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
|
||||||
|
#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
|
||||||
|
|
||||||
|
// func Exp(x float64) float64
|
||||||
|
TEXT ·Exp(SB),7,$0
|
||||||
|
// test bits for not-finite
|
||||||
|
MOVQ x+0(FP), AX
|
||||||
|
MOVQ $0x7ff0000000000000, BX
|
||||||
|
ANDQ BX, AX
|
||||||
|
CMPQ BX, AX
|
||||||
|
JEQ not_finite
|
||||||
|
MOVSD x+0(FP), X0
|
||||||
|
MOVSD $LOG2E, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
CVTTSD2SQ X1, BX // BX = exponent
|
||||||
|
CVTSQ2SD BX, X1
|
||||||
|
MOVSD $LN2U, X2
|
||||||
|
MULSD X1, X2
|
||||||
|
SUBSD X2, X0
|
||||||
|
MOVSD $LN2L, X2
|
||||||
|
MULSD X1, X2
|
||||||
|
SUBSD X2, X0
|
||||||
|
// reduce argument
|
||||||
|
MOVSD $0.0625, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
// Taylor series evaluation
|
||||||
|
MOVSD $2.4801587301587301587e-5, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $1.9841269841269841270e-4, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $1.3888888888888888889e-3, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $8.3333333333333333333e-3, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $4.1666666666666666667e-2, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $1.6666666666666666667e-1, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $0.5, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X0, X1
|
||||||
|
MOVSD $1.0, X2
|
||||||
|
ADDSD X2, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
MOVSD $2.0, X1
|
||||||
|
ADDSD X0, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
MOVSD $2.0, X1
|
||||||
|
ADDSD X0, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
MOVSD $2.0, X1
|
||||||
|
ADDSD X0, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
MOVSD $2.0, X1
|
||||||
|
ADDSD X0, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
MOVSD $1.0, X1
|
||||||
|
ADDSD X1, X0
|
||||||
|
// return ldexp(fr, exp)
|
||||||
|
MOVQ $0x3ff, AX // bias + 1
|
||||||
|
ADDQ AX, BX
|
||||||
|
MOVQ BX, X1
|
||||||
|
MOVQ $52, AX // shift
|
||||||
|
MOVQ AX, X2
|
||||||
|
PSLLQ X2, X1
|
||||||
|
MULSD X1, X0
|
||||||
|
MOVSD X0, r+8(FP)
|
||||||
|
RET
|
||||||
|
not_finite:
|
||||||
|
// test bits for -Inf
|
||||||
|
MOVQ x+0(FP), AX
|
||||||
|
MOVQ $0xfff0000000000000, BX
|
||||||
|
CMPQ BX, AX
|
||||||
|
JNE not_neginf
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVQ AX, r+8(FP)
|
||||||
|
RET
|
||||||
|
not_neginf:
|
||||||
|
MOVQ AX, r+8(FP)
|
||||||
|
RET
|
12
src/pkg/math/fabs_amd64.s
Normal file
12
src/pkg/math/fabs_amd64.s
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// Copyright 2010 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// func Fabs(x float64) float64
|
||||||
|
TEXT ·Fabs(SB),7,$0
|
||||||
|
MOVQ $(1<<63), BX
|
||||||
|
MOVQ BX, X0 // movsd $(-0.0), x0
|
||||||
|
MOVSD x+0(FP), X1
|
||||||
|
ANDNPD X1, X0
|
||||||
|
MOVSD X0, r+8(FP)
|
||||||
|
RET
|
Loading…
Reference in New Issue
Block a user