1
0
mirror of https://github.com/golang/go synced 2024-09-30 20:28:32 -06:00

math: use portable Exp instead of 387 instructions on 386

The 387 implementation is less accurate and slower.

name     old time/op  new time/op  delta
Exp-8    29.7ns ± 2%  24.0ns ± 2%  -19.08%  (p=0.000 n=10+10)

This makes Gamma more accurate too.

Change-Id: Iad33b9cce0b087ccbce3e08ba7a6d285c4999d02
Reviewed-on: https://go-review.googlesource.com/30230
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Quentin Smith <quentin@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Russ Cox 2016-10-03 22:33:49 -04:00
parent 84743c348b
commit aab849e429
2 changed files with 4 additions and 34 deletions

View File

@ -1974,7 +1974,7 @@ func TestExp(t *testing.T) {
func testExp(t *testing.T, Exp func(float64) float64, name string) {
for i := 0; i < len(vf); i++ {
if f := Exp(vf[i]); !close(exp[i], f) {
if f := Exp(vf[i]); !veryclose(exp[i], f) {
t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i])
}
}

View File

@ -6,36 +6,6 @@
// func Exp(x float64) float64
TEXT ·Exp(SB),NOSPLIT,$0
// test bits for not-finite
MOVL x_hi+4(FP), AX
ANDL $0x7ff00000, AX
CMPL AX, $0x7ff00000
JEQ not_finite
FLDL2E // F0=log2(e)
FMULD x+0(FP), F0 // F0=x*log2(e)
FMOVD F0, F1 // F0=x*log2(e), F1=x*log2(e)
FRNDINT // F0=int(x*log2(e)), F1=x*log2(e)
FSUBD F0, F1 // F0=int(x*log2(e)), F1=x*log2(e)-int(x*log2(e))
FXCHD F0, F1 // F0=x*log2(e)-int(x*log2(e)), F1=int(x*log2(e))
F2XM1 // F0=2**(x*log2(e)-int(x*log2(e)))-1, F1=int(x*log2(e))
FLD1 // F0=1, F1=2**(x*log2(e)-int(x*log2(e)))-1, F2=int(x*log2(e))
FADDDP F0, F1 // F0=2**(x*log2(e)-int(x*log2(e))), F1=int(x*log2(e))
FSCALE // F0=e**x, F1=int(x*log2(e))
FMOVDP F0, F1 // F0=e**x
FMOVDP F0, ret+8(FP)
RET
not_finite:
// test bits for -Inf
MOVL x_hi+4(FP), BX
MOVL x_lo+0(FP), CX
CMPL BX, $0xfff00000
JNE not_neginf
CMPL CX, $0
JNE not_neginf
FLDZ // F0=0
FMOVDP F0, ret+8(FP)
RET
not_neginf:
MOVL CX, ret_lo+8(FP)
MOVL BX, ret_hi+12(FP)
RET
// Used to use 387 assembly (FLDL2E+F2XM1) here,
// but it was both slower and less accurate than the portable Go code.
JMP ·exp(SB)