math: use portable Exp instead of 387 instructions on 386

The 387 implementation is less accurate and slower. name old time/op new time/op delta Exp-8 29.7ns ± 2% 24.0ns ± 2% -19.08% (p=0.000 n=10+10) This makes Gamma more accurate too. Change-Id: Iad33b9cce0b087ccbce3e08ba7a6d285c4999d02 Reviewed-on: https://go-review.googlesource.com/30230 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Quentin Smith <quentin@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2024-11-18 15:04:44 -07:00 · 2016-10-03 22:33:49 -04:00 · 2016-10-03 22:33:49 -04:00 · aab849e429
commit aab849e429
parent 84743c348b
2 changed files with 4 additions and 34 deletions
--- a/src/math/all_test.go
+++ b/src/math/all_test.go
@ -1974,7 +1974,7 @@ func TestExp(t *testing.T) {
 func testExp(t *testing.T, Exp func(float64) float64, name string) {
 	for i := 0; i < len(vf); i++ {
-		if f := Exp(vf[i]); !close(exp[i], f) {
+		if f := Exp(vf[i]); !veryclose(exp[i], f) {
 			t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i])
 		}
 	}
--- a/src/math/exp_386.s
+++ b/src/math/exp_386.s
@ -6,36 +6,6 @@
 // func Exp(x float64) float64
 TEXT ·Exp(SB),NOSPLIT,$0
-// test bits for not-finite
+	// Used to use 387 assembly (FLDL2E+F2XM1) here,
-	MOVL    x_hi+4(FP), AX
+	// but it was both slower and less accurate than the portable Go code.
-	ANDL    $0x7ff00000, AX
+	JMP ·exp(SB)
 	CMPL    AX, $0x7ff00000
 	JEQ     not_finite
 	FLDL2E                // F0=log2(e)
 	FMULD   x+0(FP), F0   // F0=x*log2(e)
 	FMOVD   F0, F1        // F0=x*log2(e), F1=x*log2(e)
 	FRNDINT               // F0=int(x*log2(e)), F1=x*log2(e)
 	FSUBD   F0, F1        // F0=int(x*log2(e)), F1=x*log2(e)-int(x*log2(e))
 	FXCHD   F0, F1        // F0=x*log2(e)-int(x*log2(e)), F1=int(x*log2(e))
 	F2XM1                 // F0=2**(x*log2(e)-int(x*log2(e)))-1, F1=int(x*log2(e))
 	FLD1                  // F0=1, F1=2**(x*log2(e)-int(x*log2(e)))-1, F2=int(x*log2(e))
 	FADDDP  F0, F1        // F0=2**(x*log2(e)-int(x*log2(e))), F1=int(x*log2(e))
 	FSCALE                // F0=e**x, F1=int(x*log2(e))
 	FMOVDP  F0, F1        // F0=e**x
 	FMOVDP  F0, ret+8(FP)
 	RET
 not_finite:
 // test bits for -Inf
 	MOVL    x_hi+4(FP), BX
 	MOVL    x_lo+0(FP), CX
 	CMPL    BX, $0xfff00000
 	JNE     not_neginf
 	CMPL    CX, $0
 	JNE     not_neginf
 	FLDZ                  // F0=0
 	FMOVDP  F0, ret+8(FP)
 	RET
 not_neginf:
 	MOVL    CX, ret_lo+8(FP)
 	MOVL    BX, ret_hi+12(FP)
 	RET