diff --git a/src/math/arith_s390x.go b/src/math/arith_s390x.go new file mode 100644 index 00000000000..892935a1eff --- /dev/null +++ b/src/math/arith_s390x.go @@ -0,0 +1,29 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +func log10TrampolineSetup(x float64) float64 +func log10Asm(x float64) float64 + +func cosTrampolineSetup(x float64) float64 +func cosAsm(x float64) float64 + +func coshTrampolineSetup(x float64) float64 +func coshAsm(x float64) float64 + +func sinTrampolineSetup(x float64) float64 +func sinAsm(x float64) float64 + +func sinhTrampolineSetup(x float64) float64 +func sinhAsm(x float64) float64 + +func tanhTrampolineSetup(x float64) float64 +func tanhAsm(x float64) float64 + +// hasVectorFacility reports whether the machine has the z/Architecture +// vector facility installed and enabled. +func hasVectorFacility() bool + +var hasVX = hasVectorFacility() diff --git a/src/math/arith_s390x_test.go b/src/math/arith_s390x_test.go new file mode 100644 index 00000000000..b4f3070c6ec --- /dev/null +++ b/src/math/arith_s390x_test.go @@ -0,0 +1,144 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests whether the non vector routines are working, even when the tests are run on a +// vector-capable machine. +package math_test + +import ( + . "math" + "testing" +) + +func TestCosNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := CosNoVec(vf[i]); !veryclose(cos[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i]) + } + } + for i := 0; i < len(vfcosSC); i++ { + if f := CosNoVec(vfcosSC[i]); !alike(cosSC[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i]) + } + } +} + +func TestCoshNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := CoshNoVec(vf[i]); !close(cosh[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i]) + } + } + for i := 0; i < len(vfcoshSC); i++ { + if f := CoshNoVec(vfcoshSC[i]); !alike(coshSC[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i]) + } + } +} +func TestSinNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := SinNoVec(vf[i]); !veryclose(sin[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i]) + } + } + for i := 0; i < len(vfsinSC); i++ { + if f := SinNoVec(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestSinhNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := SinhNoVec(vf[i]); !close(sinh[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i]) + } + } + for i := 0; i < len(vfsinhSC); i++ { + if f := SinhNoVec(vfsinhSC[i]); !alike(sinhSC[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i]) + } + } +} + +// Check that math functions of high angle values +// return accurate results. [Since (vf[i] + large) - large != vf[i], +// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is +// a multiple of 2*Pi, is misleading.] +func TestLargeCosNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := cosLarge[i] + f2 := CosNoVec(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeSinNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := sinLarge[i] + f2 := SinNoVec(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestTanhNovec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + if f := TanhNoVec(vf[i]); !veryclose(tanh[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i]) + } + } + for i := 0; i < len(vftanhSC); i++ { + if f := TanhNoVec(vftanhSC[i]); !alike(tanhSC[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i]) + } + } + +} + +func TestLog10Novec(t *testing.T) { + if !HasVX { + t.Skipf("no vector support") + } + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log10NoVec(a); !veryclose(log10[i], f) { + t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i]) + } + } + if f := Log10NoVec(E); f != Log10E { + t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log10NoVec(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} diff --git a/src/math/cosh_s390x.s b/src/math/cosh_s390x.s new file mode 100644 index 00000000000..d061bd06804 --- /dev/null +++ b/src/math/cosh_s390x.s @@ -0,0 +1,227 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Constants +DATA coshrodataL23<>+0(SB)/8, $0.231904681384629956E-16 +DATA coshrodataL23<>+8(SB)/8, $0.693147180559945286E+00 +DATA coshrodataL23<>+16(SB)/8, $0.144269504088896339E+01 +DATA coshrodataL23<>+24(SB)/8, $704.E0 +GLOBL coshrodataL23<>+0(SB), RODATA, $32 +DATA coshxinf<>+0(SB)/8, $0x7FF0000000000000 +GLOBL coshxinf<>+0(SB), RODATA, $8 +DATA coshxlim1<>+0(SB)/8, $800.E0 +GLOBL coshxlim1<>+0(SB), RODATA, $8 +DATA coshxaddhy<>+0(SB)/8, $0xc2f0000100003fdf +GLOBL coshxaddhy<>+0(SB), RODATA, $8 +DATA coshx4ff<>+0(SB)/8, $0x4ff0000000000000 +GLOBL coshx4ff<>+0(SB), RODATA, $8 +DATA coshe1<>+0(SB)/8, $0x3ff000000000000a +GLOBL coshe1<>+0(SB), RODATA, $8 + +// Log multiplier table +DATA coshtab<>+0(SB)/8, $0.442737824274138381E-01 +DATA coshtab<>+8(SB)/8, $0.263602189790660309E-01 +DATA coshtab<>+16(SB)/8, $0.122565642281703586E-01 +DATA coshtab<>+24(SB)/8, $0.143757052860721398E-02 +DATA coshtab<>+32(SB)/8, $-.651375034121276075E-02 +DATA coshtab<>+40(SB)/8, $-.119317678849450159E-01 +DATA coshtab<>+48(SB)/8, $-.150868749549871069E-01 +DATA coshtab<>+56(SB)/8, $-.161992609578469234E-01 +DATA coshtab<>+64(SB)/8, $-.154492360403337917E-01 +DATA coshtab<>+72(SB)/8, $-.129850717389178721E-01 +DATA coshtab<>+80(SB)/8, $-.892902649276657891E-02 +DATA coshtab<>+88(SB)/8, $-.338202636596794887E-02 +DATA coshtab<>+96(SB)/8, $0.357266307045684762E-02 +DATA coshtab<>+104(SB)/8, $0.118665304327406698E-01 +DATA coshtab<>+112(SB)/8, $0.214434994118118914E-01 +DATA coshtab<>+120(SB)/8, $0.322580645161290314E-01 +GLOBL coshtab<>+0(SB), RODATA, $128 + +// Minimax polynomial approximations +DATA coshe2<>+0(SB)/8, $0.500000000000004237e+00 +GLOBL coshe2<>+0(SB), RODATA, $8 +DATA coshe3<>+0(SB)/8, $0.166666666630345592e+00 +GLOBL coshe3<>+0(SB), RODATA, $8 +DATA coshe4<>+0(SB)/8, $0.416666664838056960e-01 +GLOBL coshe4<>+0(SB), RODATA, $8 +DATA coshe5<>+0(SB)/8, $0.833349307718286047e-02 +GLOBL coshe5<>+0(SB), RODATA, $8 +DATA coshe6<>+0(SB)/8, $0.138926439368309441e-02 +GLOBL coshe6<>+0(SB), RODATA, $8 + +// Cosh returns the hyperbolic cosine of x. +// +// Special cases are: +// Cosh(±0) = 1 +// Cosh(±Inf) = +Inf +// Cosh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·coshAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + MOVD $coshrodataL23<>+0(SB), R9 + WORD $0xB3120000 //ltdbr %f0,%f0 + MOVD $0x4086000000000000, R2 + MOVD $0x4086000000000000, R3 + BLTU L19 + FMOVD F0, F4 +L2: + WORD $0xED409018 //cdb %f4,.L24-.L23(%r9) + BYTE $0x00 + BYTE $0x19 + BGE L14 //jnl .L14 + BVS L14 + WFCEDBS V4, V4, V2 + BEQ L20 +L1: + FMOVD F0, ret+8(FP) + RET + +L14: + WFCEDBS V4, V4, V2 + BVS L1 + MOVD $coshxlim1<>+0(SB), R1 + FMOVD 0(R1), F2 + WFCHEDBS V4, V2, V2 + BEQ L21 + MOVD $coshxaddhy<>+0(SB), R1 + FMOVD coshrodataL23<>+16(SB), F5 + FMOVD 0(R1), F2 + WFMSDB V0, V5, V2, V5 + FMOVD coshrodataL23<>+8(SB), F3 + FADD F5, F2 + MOVD $coshe6<>+0(SB), R1 + WFMSDB V2, V3, V0, V3 + FMOVD 0(R1), F6 + WFMDB V3, V3, V1 + MOVD $coshe4<>+0(SB), R1 + FMOVD coshrodataL23<>+0(SB), F7 + WFMADB V2, V7, V3, V2 + FMOVD 0(R1), F3 + MOVD $coshe5<>+0(SB), R1 + WFMADB V1, V6, V3, V6 + FMOVD 0(R1), F7 + MOVD $coshe3<>+0(SB), R1 + FMOVD 0(R1), F3 + WFMADB V1, V7, V3, V7 + FNEG F2, F3 + WORD $0xB3CD0015 //lgdr %r1,%f5 + MOVD $coshe2<>+0(SB), R3 + WFCEDBS V4, V0, V0 + FMOVD 0(R3), F5 + MOVD $coshe1<>+0(SB), R3 + WFMADB V1, V6, V5, V6 + FMOVD 0(R3), F5 + WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WFMADB V1, V7, V5, V1 + BVS L22 + WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3 + BYTE $0x03 + BYTE $0x55 + MOVD $coshtab<>+0(SB), R3 + WFMADB V3, V6, V1, V6 + WORD $0x68043000 //ld %f0,0(%r4,%r3) + FMSUB F0, F3, F2, F2 + WORD $0xA71AF000 //ahi %r1,-4096 + WFMADB V2, V6, V0, V6 +L17: + WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WORD $0xB3C10022 //ldgr %f2,%r2 + FMADD F2, F6, F2, F2 + MOVD $coshx4ff<>+0(SB), R1 + FMOVD 0(R1), F0 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET + +L19: + FNEG F0, F4 + BR L2 +L20: + MOVD $coshxaddhy<>+0(SB), R1 + FMOVD coshrodataL23<>+16(SB), F3 + FMOVD 0(R1), F2 + WFMSDB V0, V3, V2, V3 + FMOVD coshrodataL23<>+8(SB), F4 + FADD F3, F2 + MOVD $coshe6<>+0(SB), R1 + FMSUB F4, F2, F0, F0 + FMOVD 0(R1), F6 + WFMDB V0, V0, V1 + MOVD $coshe4<>+0(SB), R1 + FMOVD 0(R1), F4 + MOVD $coshe5<>+0(SB), R1 + FMOVD coshrodataL23<>+0(SB), F5 + WFMADB V1, V6, V4, V6 + FMADD F5, F2, F0, F0 + FMOVD 0(R1), F2 + MOVD $coshe3<>+0(SB), R1 + FMOVD 0(R1), F4 + WFMADB V1, V2, V4, V2 + MOVD $coshe2<>+0(SB), R1 + FMOVD 0(R1), F5 + FNEG F0, F4 + WFMADB V1, V6, V5, V6 + MOVD $coshe1<>+0(SB), R1 + FMOVD 0(R1), F5 + WFMADB V1, V2, V5, V1 + WORD $0xB3CD0013 //lgdr %r1,%f3 + MOVD $coshtab<>+0(SB), R5 + WFMADB V4, V6, V1, V3 + WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3 + BYTE $0x03 + BYTE $0x55 + WFMSDB V4, V6, V1, V6 + WORD $0x68145000 //ld %f1,0(%r4,%r5) + WFMSDB V4, V1, V0, V2 + WORD $0xA7487FBE //lhi %r4,32702 + FMADD F3, F2, F1, F1 + SUBW R1, R4 + WORD $0xECC439BC //risbg %r12,%r4,57,128+60,3 + BYTE $0x03 + BYTE $0x55 + WORD $0x682C5000 //ld %f2,0(%r12,%r5) + FMSUB F2, F4, F0, F0 + WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WFMADB V0, V6, V2, V6 + WORD $0xEC34000F //risbgn %r3,%r4,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WORD $0xB3C10022 //ldgr %f2,%r2 + WORD $0xB3C10003 //ldgr %f0,%r3 + FMADD F2, F1, F2, F2 + FMADD F0, F6, F0, F0 + FADD F2, F0 + FMOVD F0, ret+8(FP) + RET + +L22: + WORD $0xA7387FBE //lhi %r3,32702 + MOVD $coshtab<>+0(SB), R4 + SUBW R1, R3 + WFMSDB V3, V6, V1, V6 + WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,3 + BYTE $0x03 + BYTE $0x55 + WORD $0x68034000 //ld %f0,0(%r3,%r4) + FMSUB F0, F3, F2, F2 + WORD $0xA7386FBE //lhi %r3,28606 + WFMADB V2, V6, V0, V6 + SUBW R1, R3, R1 + BR L17 +L21: + MOVD $coshxinf<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + diff --git a/src/math/export_s390x_test.go b/src/math/export_s390x_test.go new file mode 100644 index 00000000000..3fdbd8640fe --- /dev/null +++ b/src/math/export_s390x_test.go @@ -0,0 +1,14 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Export internal functions and variable for testing. +var Log10NoVec = log10 +var CosNoVec = cos +var CoshNoVec = cosh +var SinNoVec = sin +var SinhNoVec = sinh +var TanhNoVec = tanh +var HasVX = hasVX diff --git a/src/math/log10_s390x.s b/src/math/log10_s390x.s new file mode 100644 index 00000000000..460bcd95bc8 --- /dev/null +++ b/src/math/log10_s390x.s @@ -0,0 +1,170 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial coefficients and other constants +DATA log10rodataL19<>+0(SB)/8, $0.000000000000000000E+00 +DATA log10rodataL19<>+8(SB)/8, $-1.0 +DATA log10rodataL19<>+16(SB)/8, $0x7FF8000000000000 //+NanN +DATA log10rodataL19<>+24(SB)/8, $.15375570329280596749 +DATA log10rodataL19<>+32(SB)/8, $.60171950900703668594E+04 +DATA log10rodataL19<>+40(SB)/8, $-1.9578460454940795898 +DATA log10rodataL19<>+48(SB)/8, $0.78962633073318517310E-01 +DATA log10rodataL19<>+56(SB)/8, $-.71784211884836937993E-02 +DATA log10rodataL19<>+64(SB)/8, $0.87011165920689940661E-03 +DATA log10rodataL19<>+72(SB)/8, $-.11865158981621437541E-03 +DATA log10rodataL19<>+80(SB)/8, $0.17258413403018680410E-04 +DATA log10rodataL19<>+88(SB)/8, $0.40752932047883484315E-06 +DATA log10rodataL19<>+96(SB)/8, $-.26149194688832680410E-05 +DATA log10rodataL19<>+104(SB)/8, $0.92453396963875026759E-08 +DATA log10rodataL19<>+112(SB)/8, $-.64572084905921579630E-07 +DATA log10rodataL19<>+120(SB)/8, $-5.5 +DATA log10rodataL19<>+128(SB)/8, $18446744073709551616. +GLOBL log10rodataL19<>+0(SB), RODATA, $136 + +// Table of log10 correction terms +DATA log10tab2074<>+0(SB)/8, $0.254164497922885069E-01 +DATA log10tab2074<>+8(SB)/8, $0.179018857989381839E-01 +DATA log10tab2074<>+16(SB)/8, $0.118926768029048674E-01 +DATA log10tab2074<>+24(SB)/8, $0.722595568238080033E-02 +DATA log10tab2074<>+32(SB)/8, $0.376393570022739135E-02 +DATA log10tab2074<>+40(SB)/8, $0.138901135928814326E-02 +DATA log10tab2074<>+48(SB)/8, $0 +DATA log10tab2074<>+56(SB)/8, $-0.490780466387818203E-03 +DATA log10tab2074<>+64(SB)/8, $-0.159811431402137571E-03 +DATA log10tab2074<>+72(SB)/8, $0.925796337165100494E-03 +DATA log10tab2074<>+80(SB)/8, $0.270683176738357035E-02 +DATA log10tab2074<>+88(SB)/8, $0.513079030821304758E-02 +DATA log10tab2074<>+96(SB)/8, $0.815089785397996303E-02 +DATA log10tab2074<>+104(SB)/8, $0.117253060262419215E-01 +DATA log10tab2074<>+112(SB)/8, $0.158164239345343963E-01 +DATA log10tab2074<>+120(SB)/8, $0.203903595489229786E-01 +GLOBL log10tab2074<>+0(SB), RODATA, $128 + +// Log10 returns the decimal logarithm of the argument. +// +// Special cases are: +// Log(+Inf) = +Inf +// Log(0) = -Inf +// Log(x < 0) = NaN +// Log(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·log10Asm(SB),NOSPLIT,$8-16 + FMOVD x+0(FP), F0 + MOVD $log10rodataL19<>+0(SB), R9 + FMOVD F0, x-8(SP) + WORD $0xC0298006 //iilf %r2,2147909631 + BYTE $0x7F + BYTE $0xFF + WORD $0x5840F008 //l %r4, 8(%r15) + SUBW R4, R2, R3 + WORD $0xEC5320AF //risbg %r5,%r3,32,128+47,0 + BYTE $0x00 + BYTE $0x55 + MOVH $0x0, R1 + WORD $0xEC15001F //risbgn %r1,%r5,64-64+0,64-64+0+32-1,64-0-32 + BYTE $0x20 + BYTE $0x59 + WORD $0xC0590016 //iilf %r5,1507327 + BYTE $0xFF + BYTE $0xFF + MOVW R4, R10 + MOVW R5, R11 + CMPBLE R10, R11, L2 + WORD $0xC0297FEF //iilf %r2,2146435071 + BYTE $0xFF + BYTE $0xFF + MOVW R4, R10 + MOVW R2, R11 + CMPBLE R10, R11, L16 +L3: +L1: + FMOVD F0, ret+8(FP) + RET + +L2: + WORD $0xB3120000 //ltdbr %f0,%f0 + BLEU L13 + WORD $0xED009080 //mdb %f0,.L20-.L19(%r9) + BYTE $0x00 + BYTE $0x1C + FMOVD F0, x-8(SP) + WORD $0x5B20F008 //s %r2, 8(%r15) + WORD $0xEC3239BC //risbg %r3,%r2,57,128+60,64-13 + BYTE $0x33 + BYTE $0x55 + ANDW $0xFFFF0000, R2 + WORD $0xEC12001F //risbgn %r1,%r2,64-64+0,64-64+0+32-1,64-0-32 + BYTE $0x20 + BYTE $0x59 + ADDW $0x4000000, R2 + BLEU L17 +L8: + SRW $8, R2, R2 + ORW $0x45000000, R2 +L4: + FMOVD log10rodataL19<>+120(SB), F2 + WORD $0xB3C10041 //ldgr %f4,%r1 + WFMADB V4, V0, V2, V0 + FMOVD log10rodataL19<>+112(SB), F4 + FMOVD log10rodataL19<>+104(SB), F6 + WFMADB V0, V6, V4, V6 + FMOVD log10rodataL19<>+96(SB), F4 + FMOVD log10rodataL19<>+88(SB), F1 + WFMADB V0, V1, V4, V1 + WFMDB V0, V0, V4 + FMOVD log10rodataL19<>+80(SB), F2 + WFMADB V6, V4, V1, V6 + FMOVD log10rodataL19<>+72(SB), F1 + WFMADB V0, V2, V1, V2 + FMOVD log10rodataL19<>+64(SB), F1 + WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,0 + BYTE $0x00 + BYTE $0x55 + WFMADB V4, V6, V2, V6 + FMOVD log10rodataL19<>+56(SB), F2 + WFMADB V0, V1, V2, V1 + VLVGF $0, R2, V2 + WFMADB V4, V6, V1, V4 + LDEBR F2, F2 + FMOVD log10rodataL19<>+48(SB), F6 + WFMADB V0, V4, V6, V4 + FMOVD log10rodataL19<>+40(SB), F1 + FMOVD log10rodataL19<>+32(SB), F6 + MOVD $log10tab2074<>+0(SB), R1 + WFMADB V2, V1, V6, V2 + WORD $0x68331000 //ld %f3,0(%r3,%r1) + WFMADB V0, V4, V3, V0 + FMOVD log10rodataL19<>+24(SB), F4 + FMADD F4, F2, F0, F0 + FMOVD F0, ret+8(FP) + RET + +L16: + WORD $0xEC2328B7 //risbg %r2,%r3,40,128+55,64-8 + BYTE $0x38 + BYTE $0x55 + WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13 + BYTE $0x33 + BYTE $0x55 + ORW $0x45000000, R2 + BR L4 +L13: + BGE L18 //jnl .L18 + BVS L18 + FMOVD log10rodataL19<>+16(SB), F0 + BR L1 +L17: + SRAW $1, R2, R2 + SUBW $0x40000000, R2 + BR L8 +L18: + FMOVD log10rodataL19<>+8(SB), F0 + WORD $0xED009000 //ddb %f0,.L36-.L19(%r9) + BYTE $0x00 + BYTE $0x1D + BR L1 diff --git a/src/math/sin_s390x.s b/src/math/sin_s390x.s new file mode 100644 index 00000000000..5dc823c07f5 --- /dev/null +++ b/src/math/sin_s390x.s @@ -0,0 +1,356 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Various constants +DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000 +GLOBL sincosxnan<>+0(SB), RODATA, $8 +DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19 +GLOBL sincosxlim<>+0(SB), RODATA, $8 +DATA sincosxadd<>+0(SB)/8, $0xc338000000000000 +GLOBL sincosxadd<>+0(SB), RODATA, $8 +DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31 +GLOBL sincosxpi2l<>+0(SB), RODATA, $8 +DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16 +GLOBL sincosxpi2m<>+0(SB), RODATA, $8 +DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01 +GLOBL sincosxpi2h<>+0(SB), RODATA, $8 +DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00 +GLOBL sincosrpi2<>+0(SB), RODATA, $8 + +// Minimax polynomial approximations +DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01 +GLOBL sincosc0<>+0(SB), RODATA, $8 +DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00 +GLOBL sincosc1<>+0(SB), RODATA, $8 +DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01 +GLOBL sincosc2<>+0(SB), RODATA, $8 +DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02 +GLOBL sincosc3<>+0(SB), RODATA, $8 +DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04 +GLOBL sincosc4<>+0(SB), RODATA, $8 +DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06 +GLOBL sincosc5<>+0(SB), RODATA, $8 +DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08 +GLOBL sincosc6<>+0(SB), RODATA, $8 +DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10 +GLOBL sincosc7<>+0(SB), RODATA, $8 +DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01 +GLOBL sincoss0<>+0(SB), RODATA, $8 +DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00 +GLOBL sincoss1<>+0(SB), RODATA, $8 +DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02 +GLOBL sincoss2<>+0(SB), RODATA, $8 +DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03 +GLOBL sincoss3<>+0(SB), RODATA, $8 +DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05 +GLOBL sincoss4<>+0(SB), RODATA, $8 +DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07 +GLOBL sincoss5<>+0(SB), RODATA, $8 +DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09 +GLOBL sincoss6<>+0(SB), RODATA, $8 +DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12 +GLOBL sincoss7<>+0(SB), RODATA, $8 + +// Sin returns the sine of the radian argument x. +// +// Special cases are: +// Sin(±0) = ±0 +// Sin(±Inf) = NaN +// Sin(NaN) = NaN +// The algorithm used is minimax polynomial approximation. +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·sinAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + //special case Sin(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ sinIsZero + WORD $0xB3120000 //ltdbr %f0,%f0 + BLTU L17 + FMOVD F0, F5 +L2: + MOVD $sincoss7<>+0(SB), R1 + FMOVD 0(R1), F4 + MOVD $sincoss6<>+0(SB), R1 + FMOVD 0(R1), F1 + MOVD $sincoss5<>+0(SB), R1 + VLEG $0, 0(R1), V18 + MOVD $sincoss4<>+0(SB), R1 + FMOVD 0(R1), F6 + MOVD $sincoss2<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD $sincoss3<>+0(SB), R1 + FMOVD 0(R1), F7 + MOVD $sincoss1<>+0(SB), R1 + FMOVD 0(R1), F3 + MOVD $sincoss0<>+0(SB), R1 + FMOVD 0(R1), F2 + WFCHDBS V2, V5, V2 + BEQ L18 + MOVD $sincosrpi2<>+0(SB), R1 + FMOVD 0(R1), F3 + MOVD $sincosxadd<>+0(SB), R1 + FMOVD 0(R1), F2 + WFMSDB V0, V3, V2, V3 + FMOVD 0(R1), F6 + FADD F3, F6 + MOVD $sincosxpi2h<>+0(SB), R1 + FMOVD 0(R1), F2 + FMSUB F2, F6, F0, F0 + MOVD $sincosxpi2m<>+0(SB), R1 + FMOVD 0(R1), F4 + FMADD F4, F6, F0, F0 + MOVD $sincosxpi2l<>+0(SB), R1 + WFMDB V0, V0, V1 + FMOVD 0(R1), F7 + WFMDB V1, V1, V2 + WORD $0xB3CD0013 //lgdr %r1,%f3 + MOVD $sincosxlim<>+0(SB), R2 + WORD $0xA7110001 //tmll %r1,1 + BEQ L6 + FMOVD 0(R2), F0 + WFCHDBS V0, V5, V0 + BNE L14 + MOVD $sincosc7<>+0(SB), R2 + FMOVD 0(R2), F0 + MOVD $sincosc6<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincosc5<>+0(SB), R2 + WFMADB V1, V0, V4, V0 + FMOVD 0(R2), F6 + MOVD $sincosc4<>+0(SB), R2 + WFMADB V1, V0, V6, V0 + FMOVD 0(R2), F4 + MOVD $sincosc2<>+0(SB), R2 + FMOVD 0(R2), F6 + WFMADB V2, V4, V6, V4 + MOVD $sincosc3<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincosc1<>+0(SB), R2 + WFMADB V2, V0, V3, V0 + FMOVD 0(R2), F6 + WFMADB V1, V4, V6, V4 + WORD $0xA7110002 //tmll %r1,2 + WFMADB V2, V0, V4, V0 + MOVD $sincosc0<>+0(SB), R1 + FMOVD 0(R1), F2 + WFMADB V1, V0, V2, V0 + BNE L15 + FMOVD F0, ret+8(FP) + RET + +L6: + FMOVD 0(R2), F4 + WFCHDBS V4, V5, V4 + BNE L14 + MOVD $sincoss7<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincoss6<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincoss5<>+0(SB), R2 + WFMADB V1, V4, V3, V4 + WFMADB V6, V7, V0, V6 + FMOVD 0(R2), F0 + MOVD $sincoss4<>+0(SB), R2 + FMADD F4, F1, F0, F0 + FMOVD 0(R2), F3 + MOVD $sincoss2<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincoss3<>+0(SB), R2 + WFMADB V2, V3, V4, V3 + FMOVD 0(R2), F4 + MOVD $sincoss1<>+0(SB), R2 + WFMADB V2, V0, V4, V0 + FMOVD 0(R2), F4 + WFMADB V1, V3, V4, V3 + FNEG F6, F4 + WFMADB V2, V0, V3, V2 + WFMDB V4, V1, V0 + WORD $0xA7110002 //tmll %r1,2 + WFMSDB V0, V2, V6, V0 + BNE L15 + FMOVD F0, ret+8(FP) + RET + +L14: + MOVD $sincosxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L18: + WFMDB V0, V0, V2 + WFMADB V2, V4, V1, V4 + WFMDB V2, V2, V1 + WFMADB V2, V4, V18, V4 + WFMADB V1, V6, V16, V6 + WFMADB V1, V4, V7, V4 + WFMADB V2, V6, V3, V6 + FMUL F0, F2 + WFMADB V1, V4, V6, V4 + FMADD F4, F2, F0, F0 + FMOVD F0, ret+8(FP) + RET + +L17: + FNEG F0, F5 + BR L2 +L15: + FNEG F0, F0 + FMOVD F0, ret+8(FP) + RET + + +sinIsZero: + FMOVD F0, ret+8(FP) + RET + +// Cos returns the cosine of the radian argument. +// +// Special cases are: +// Cos(±Inf) = NaN +// Cos(NaN) = NaN +// The algorithm used is minimax polynomial approximation. +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·cosAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + WORD $0xB3120000 //ltdbr %f0,%f0 + BLTU L35 + FMOVD F0, F1 +L21: + MOVD $sincosc7<>+0(SB), R1 + FMOVD 0(R1), F4 + MOVD $sincosc6<>+0(SB), R1 + VLEG $0, 0(R1), V20 + MOVD $sincosc5<>+0(SB), R1 + VLEG $0, 0(R1), V18 + MOVD $sincosc4<>+0(SB), R1 + FMOVD 0(R1), F6 + MOVD $sincosc2<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD $sincosc3<>+0(SB), R1 + FMOVD 0(R1), F7 + MOVD $sincosc1<>+0(SB), R1 + FMOVD 0(R1), F5 + MOVD $sincosrpi2<>+0(SB), R1 + FMOVD 0(R1), F2 + MOVD $sincosxadd<>+0(SB), R1 + FMOVD 0(R1), F3 + MOVD $sincoss0<>+0(SB), R1 + WFMSDB V0, V2, V3, V2 + FMOVD 0(R1), F3 + WFCHDBS V3, V1, V3 + WORD $0xB3CD0012 //lgdr %r1,%f2 + BEQ L36 + MOVD $sincosxadd<>+0(SB), R2 + FMOVD 0(R2), F4 + FADD F2, F4 + MOVD $sincosxpi2h<>+0(SB), R2 + FMOVD 0(R2), F2 + WFMSDB V4, V2, V0, V2 + MOVD $sincosxpi2m<>+0(SB), R2 + FMOVD 0(R2), F0 + WFMADB V4, V0, V2, V0 + MOVD $sincosxpi2l<>+0(SB), R2 + WFMDB V0, V0, V2 + FMOVD 0(R2), F5 + WFMDB V2, V2, V6 + MOVD $sincosxlim<>+0(SB), R2 + WORD $0xA7110001 //tmll %r1,1 + BNE L25 + FMOVD 0(R2), F0 + WFCHDBS V0, V1, V0 + BNE L33 + MOVD $sincosc7<>+0(SB), R2 + FMOVD 0(R2), F0 + MOVD $sincosc6<>+0(SB), R2 + FMOVD 0(R2), F4 + MOVD $sincosc5<>+0(SB), R2 + WFMADB V2, V0, V4, V0 + FMOVD 0(R2), F1 + MOVD $sincosc4<>+0(SB), R2 + WFMADB V2, V0, V1, V0 + FMOVD 0(R2), F4 + MOVD $sincosc2<>+0(SB), R2 + FMOVD 0(R2), F1 + WFMADB V6, V4, V1, V4 + MOVD $sincosc3<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincosc1<>+0(SB), R2 + WFMADB V6, V0, V3, V0 + FMOVD 0(R2), F1 + WFMADB V2, V4, V1, V4 + WORD $0xA7110002 //tmll %r1,2 + WFMADB V6, V0, V4, V0 + MOVD $sincosc0<>+0(SB), R1 + FMOVD 0(R1), F4 + WFMADB V2, V0, V4, V0 + BNE L34 + FMOVD F0, ret+8(FP) + RET + +L25: + FMOVD 0(R2), F3 + WFCHDBS V3, V1, V1 + BNE L33 + MOVD $sincoss7<>+0(SB), R2 + FMOVD 0(R2), F1 + MOVD $sincoss6<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sincoss5<>+0(SB), R2 + WFMADB V2, V1, V3, V1 + FMOVD 0(R2), F3 + MOVD $sincoss4<>+0(SB), R2 + WFMADB V2, V1, V3, V1 + FMOVD 0(R2), F3 + MOVD $sincoss2<>+0(SB), R2 + FMOVD 0(R2), F7 + WFMADB V6, V3, V7, V3 + MOVD $sincoss3<>+0(SB), R2 + FMADD F5, F4, F0, F0 + FMOVD 0(R2), F4 + MOVD $sincoss1<>+0(SB), R2 + FMADD F1, F6, F4, F4 + FMOVD 0(R2), F1 + FMADD F3, F2, F1, F1 + FMUL F0, F2 + WFMADB V6, V4, V1, V6 + WORD $0xA7110002 //tmll %r1,2 + FMADD F6, F2, F0, F0 + BNE L34 + FMOVD F0, ret+8(FP) + RET + +L33: + MOVD $sincosxnan<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L36: + FMUL F0, F0 + MOVD $sincosc0<>+0(SB), R1 + WFMDB V0, V0, V1 + WFMADB V0, V4, V20, V4 + WFMADB V1, V6, V16, V6 + WFMADB V0, V4, V18, V4 + WFMADB V0, V6, V5, V6 + WFMADB V1, V4, V7, V4 + FMOVD 0(R1), F2 + WFMADB V1, V4, V6, V4 + WFMADB V0, V4, V2, V0 + FMOVD F0, ret+8(FP) + RET + +L35: + FNEG F0, F1 + BR L21 +L34: + FNEG F0, F0 + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/sinh.go b/src/math/sinh.go index 139b911fe65..2bdd7b12057 100644 --- a/src/math/sinh.go +++ b/src/math/sinh.go @@ -22,7 +22,9 @@ package math // Sinh(±0) = ±0 // Sinh(±Inf) = ±Inf // Sinh(NaN) = NaN -func Sinh(x float64) float64 { +func Sinh(x float64) float64 + +func sinh(x float64) float64 { // The coefficients are #2029 from Hart & Cheney. (20.36D) const ( P0 = -0.6307673640497716991184787251e+6 @@ -66,7 +68,9 @@ func Sinh(x float64) float64 { // Cosh(±0) = 1 // Cosh(±Inf) = +Inf // Cosh(NaN) = NaN -func Cosh(x float64) float64 { +func Cosh(x float64) float64 + +func cosh(x float64) float64 { if x < 0 { x = -x } diff --git a/src/math/sinh_s390x.s b/src/math/sinh_s390x.s new file mode 100644 index 00000000000..e492415e84a --- /dev/null +++ b/src/math/sinh_s390x.s @@ -0,0 +1,261 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + + +#include "textflag.h" + +// Constants +DATA sinhrodataL21<>+0(SB)/8, $0.231904681384629956E-16 +DATA sinhrodataL21<>+8(SB)/8, $0.693147180559945286E+00 +DATA sinhrodataL21<>+16(SB)/8, $704.E0 +GLOBL sinhrodataL21<>+0(SB), RODATA, $24 +DATA sinhrlog2<>+0(SB)/8, $0x3ff7154760000000 +GLOBL sinhrlog2<>+0(SB), RODATA, $8 +DATA sinhxinf<>+0(SB)/8, $0x7ff0000000000000 +GLOBL sinhxinf<>+0(SB), RODATA, $8 +DATA sinhxinit<>+0(SB)/8, $0x3ffb504f333f9de6 +GLOBL sinhxinit<>+0(SB), RODATA, $8 +DATA sinhxlim1<>+0(SB)/8, $800.E0 +GLOBL sinhxlim1<>+0(SB), RODATA, $8 +DATA sinhxadd<>+0(SB)/8, $0xc3200001610007fb +GLOBL sinhxadd<>+0(SB), RODATA, $8 +DATA sinhx4ff<>+0(SB)/8, $0x4ff0000000000000 +GLOBL sinhx4ff<>+0(SB), RODATA, $8 + +// Minimax polynomial approximations +DATA sinhe0<>+0(SB)/8, $0.11715728752538099300E+01 +GLOBL sinhe0<>+0(SB), RODATA, $8 +DATA sinhe1<>+0(SB)/8, $0.11715728752538099300E+01 +GLOBL sinhe1<>+0(SB), RODATA, $8 +DATA sinhe2<>+0(SB)/8, $0.58578643762688526692E+00 +GLOBL sinhe2<>+0(SB), RODATA, $8 +DATA sinhe3<>+0(SB)/8, $0.19526214587563004497E+00 +GLOBL sinhe3<>+0(SB), RODATA, $8 +DATA sinhe4<>+0(SB)/8, $0.48815536475176217404E-01 +GLOBL sinhe4<>+0(SB), RODATA, $8 +DATA sinhe5<>+0(SB)/8, $0.97631072948627397816E-02 +GLOBL sinhe5<>+0(SB), RODATA, $8 +DATA sinhe6<>+0(SB)/8, $0.16271839297756073153E-02 +GLOBL sinhe6<>+0(SB), RODATA, $8 +DATA sinhe7<>+0(SB)/8, $0.23245485387271142509E-03 +GLOBL sinhe7<>+0(SB), RODATA, $8 +DATA sinhe8<>+0(SB)/8, $0.29080955860869629131E-04 +GLOBL sinhe8<>+0(SB), RODATA, $8 +DATA sinhe9<>+0(SB)/8, $0.32311267157667725278E-05 +GLOBL sinhe9<>+0(SB), RODATA, $8 + +// Sinh returns the hyperbolic sine of the argument. +// +// Special cases are: +// Sinh(±0) = ±0 +// Sinh(±Inf) = ±Inf +// Sinh(NaN) = NaN +// The algorithm used is minimax polynomial approximation +// with coefficients determined with a Remez exchange algorithm. + +TEXT ·sinhAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + //specail case Sinh(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ sinhIsZero + //specail case Sinh(±Inf = ±Inf + FMOVD $1.797693134862315708145274237317043567981e+308, F1 + FCMPU F1, F0 + BLEU sinhIsInf + FMOVD $-1.797693134862315708145274237317043567981e+308, F1 + FCMPU F1, F0 + BGT sinhIsInf + + MOVD $sinhrodataL21<>+0(SB), R5 + WORD $0xB3120000 //ltdbr %f0,%f0 + MOVD sinhxinit<>+0(SB), R1 + FMOVD F0, F4 + MOVD R1, R3 + BLTU L19 + FMOVD F0, F2 +L2: + WORD $0xED205010 //cdb %f2,.L22-.L21(%r5) + BYTE $0x00 + BYTE $0x19 + BGE L15 //jnl .L15 + BVS L15 + WFCEDBS V2, V2, V0 + BEQ L20 +L12: + FMOVD F4, F0 + FMOVD F0, ret+8(FP) + RET + +L15: + WFCEDBS V2, V2, V0 + BVS L12 + MOVD $sinhxlim1<>+0(SB), R2 + FMOVD 0(R2), F0 + WFCHDBS V0, V2, V0 + BEQ L6 + WFCHEDBS V4, V2, V6 + MOVD $sinhxinf<>+0(SB), R1 + FMOVD 0(R1), F0 + BNE LEXITTAGsinh + WFCHDBS V2, V4, V2 + BNE L16 + FNEG F0, F0 + FMOVD F0, ret+8(FP) + RET + +L19: + FNEG F0, F2 + BR L2 +L6: + MOVD $sinhxadd<>+0(SB), R2 + FMOVD 0(R2), F0 + MOVD sinhrlog2<>+0(SB), R2 + WORD $0xB3C10062 //ldgr %f6,%r2 + WFMSDB V4, V6, V0, V16 + FMOVD sinhrodataL21<>+8(SB), F6 + WFADB V0, V16, V0 + FMOVD sinhrodataL21<>+0(SB), F3 + WFMSDB V0, V6, V4, V6 + MOVD $sinhe9<>+0(SB), R2 + WFMADB V0, V3, V6, V0 + FMOVD 0(R2), F1 + MOVD $sinhe7<>+0(SB), R2 + WFMDB V0, V0, V6 + FMOVD 0(R2), F5 + MOVD $sinhe8<>+0(SB), R2 + FMOVD 0(R2), F3 + MOVD $sinhe6<>+0(SB), R2 + WFMADB V6, V1, V5, V1 + FMOVD 0(R2), F5 + MOVD $sinhe5<>+0(SB), R2 + FMOVD 0(R2), F7 + MOVD $sinhe3<>+0(SB), R2 + WFMADB V6, V3, V5, V3 + FMOVD 0(R2), F5 + MOVD $sinhe4<>+0(SB), R2 + WFMADB V6, V7, V5, V7 + FMOVD 0(R2), F5 + MOVD $sinhe2<>+0(SB), R2 + VLEG $0, 0(R2), V20 + WFMDB V6, V6, V18 + WFMADB V6, V5, V20, V5 + WFMADB V1, V18, V7, V1 + FNEG F0, F0 + WFMADB V3, V18, V5, V3 + MOVD $sinhe1<>+0(SB), R3 + WFCEDBS V2, V4, V2 + FMOVD 0(R3), F5 + MOVD $sinhe0<>+0(SB), R3 + WFMADB V6, V1, V5, V1 + FMOVD 0(R3), F5 + VLGVG $0, V16, R2 + WFMADB V6, V3, V5, V6 + RLL $3, R2, R2 + WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + BEQ L9 + WFMSDB V0, V1, V6, V0 + MOVD $sinhx4ff<>+0(SB), R3 + FNEG F0, F0 + FMOVD 0(R3), F2 + FMUL F2, F0 + ANDW $0xFFFF, R2 + WORD $0xA53FEFB6 //llill %r3,61366 + SUBW R2, R3, R2 + WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WORD $0xB3C10021 //ldgr %f2,%r1 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET + +L20: + MOVD $sinhxadd<>+0(SB), R2 + FMOVD 0(R2), F2 + MOVD sinhrlog2<>+0(SB), R2 + WORD $0xB3C10002 //ldgr %f0,%r2 + WFMSDB V4, V0, V2, V6 + FMOVD sinhrodataL21<>+8(SB), F0 + FADD F6, F2 + MOVD $sinhe9<>+0(SB), R2 + FMSUB F0, F2, F4, F4 + FMOVD 0(R2), F1 + FMOVD sinhrodataL21<>+0(SB), F3 + MOVD $sinhe7<>+0(SB), R2 + FMADD F3, F2, F4, F4 + FMOVD 0(R2), F0 + MOVD $sinhe8<>+0(SB), R2 + WFMDB V4, V4, V2 + FMOVD 0(R2), F3 + MOVD $sinhe6<>+0(SB), R2 + FMOVD 0(R2), F5 + WORD $0xB3CD0026 //lgdr %r2,%f6 + RLL $3, R2, R2 + WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WFMADB V2, V1, V0, V1 + WORD $0xB3C10001 //ldgr %f0,%r1 + MOVD $sinhe5<>+0(SB), R1 + WFMADB V2, V3, V5, V3 + FMOVD 0(R1), F5 + MOVD $sinhe3<>+0(SB), R1 + FMOVD 0(R1), F6 + WFMDB V2, V2, V7 + WFMADB V2, V5, V6, V5 + WORD $0xA7487FB6 //lhi %r4,32694 + FNEG F4, F4 + ANDW $0xFFFF, R2 + SUBW R2, R4, R2 + WORD $0xEC32000F //risbgn %r3,%r2,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WORD $0xB3C10063 //ldgr %f6,%r3 + WFADB V0, V6, V16 + MOVD $sinhe4<>+0(SB), R1 + WFMADB V1, V7, V5, V1 + WFMDB V4, V16, V4 + FMOVD 0(R1), F5 + MOVD $sinhe2<>+0(SB), R1 + VLEG $0, 0(R1), V16 + MOVD $sinhe1<>+0(SB), R1 + WFMADB V2, V5, V16, V5 + VLEG $0, 0(R1), V16 + WFMADB V3, V7, V5, V3 + WFMADB V2, V1, V16, V1 + FSUB F6, F0 + FMUL F1, F4 + MOVD $sinhe0<>+0(SB), R1 + FMOVD 0(R1), F6 + WFMADB V2, V3, V6, V2 + WFMADB V0, V2, V4, V0 + FMOVD F0, ret+8(FP) + RET + +L9: + WFMADB V0, V1, V6, V0 + MOVD $sinhx4ff<>+0(SB), R3 + FMOVD 0(R3), F2 + FMUL F2, F0 + WORD $0xA72AF000 //ahi %r2,-4096 + WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WORD $0xB3C10021 //ldgr %f2,%r1 + FMUL F2, F0 + FMOVD F0, ret+8(FP) + RET + +L16: + FMOVD F0, ret+8(FP) + RET + +LEXITTAGsinh: +sinhIsInf: +sinhIsZero: + FMOVD F0, ret+8(FP) + RET diff --git a/src/math/sinh_stub.s b/src/math/sinh_stub.s new file mode 100644 index 00000000000..4caaa0c034f --- /dev/null +++ b/src/math/sinh_stub.s @@ -0,0 +1,17 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 arm + +#include "textflag.h" + +TEXT ·Sinh(SB),NOSPLIT,$0 + JMP ·sinh(SB) + +TEXT ·Cosh(SB),NOSPLIT,$0 + JMP ·cosh(SB) + +TEXT ·Tanh(SB),NOSPLIT,$0 + JMP ·tanh(SB) + diff --git a/src/math/stubs_arm64.s b/src/math/stubs_arm64.s index bbd0d175e0d..d8c9aa8c01f 100644 --- a/src/math/stubs_arm64.s +++ b/src/math/stubs_arm64.s @@ -21,6 +21,9 @@ TEXT ·Atan(SB),NOSPLIT,$0 TEXT ·Exp2(SB),NOSPLIT,$0 B ·exp2(SB) +TEXT ·Cosh(SB),NOSPLIT,$0 + B ·cosh(SB) + TEXT ·Expm1(SB),NOSPLIT,$0 B ·expm1(SB) @@ -60,8 +63,14 @@ TEXT ·Sincos(SB),NOSPLIT,$0 TEXT ·Sin(SB),NOSPLIT,$0 B ·sin(SB) +TEXT ·Sinh(SB),NOSPLIT,$0 + B ·sinh(SB) + TEXT ·Cos(SB),NOSPLIT,$0 B ·cos(SB) TEXT ·Tan(SB),NOSPLIT,$0 B ·tan(SB) + +TEXT ·Tanh(SB),NOSPLIT,$0 + B ·tanh(SB) diff --git a/src/math/stubs_mips64x.s b/src/math/stubs_mips64x.s index 97e6e4ccb10..21df5cc9a6e 100644 --- a/src/math/stubs_mips64x.s +++ b/src/math/stubs_mips64x.s @@ -81,11 +81,20 @@ TEXT ·Sincos(SB),NOSPLIT,$0 TEXT ·Sin(SB),NOSPLIT,$0 JMP ·sin(SB) +TEXT ·Sinh(SB),NOSPLIT,$0 + JMP ·sinh(SB) + TEXT ·Cos(SB),NOSPLIT,$0 JMP ·cos(SB) +TEXT ·Cosh(SB),NOSPLIT,$0 + JMP ·cosh(SB) + TEXT ·Sqrt(SB),NOSPLIT,$0 JMP ·sqrt(SB) TEXT ·Tan(SB),NOSPLIT,$0 JMP ·tan(SB) + +TEXT ·Tanh(SB),NOSPLIT,$0 + JMP ·tanh(SB) diff --git a/src/math/stubs_mipsx.s b/src/math/stubs_mipsx.s index 48df75aa90b..b8697681731 100644 --- a/src/math/stubs_mipsx.s +++ b/src/math/stubs_mipsx.s @@ -81,8 +81,18 @@ TEXT ·Sincos(SB),NOSPLIT,$0 TEXT ·Sin(SB),NOSPLIT,$0 JMP ·sin(SB) +TEXT ·Sinh(SB),NOSPLIT,$0 + JMP ·sinh(SB) + TEXT ·Cos(SB),NOSPLIT,$0 JMP ·cos(SB) +TEXT ·Cosh(SB),NOSPLIT,$0 + JMP ·cosh(SB) + TEXT ·Tan(SB),NOSPLIT,$0 JMP ·tan(SB) + +TEXT ·Tanh(SB),NOSPLIT,$0 + JMP ·tanh(SB) + diff --git a/src/math/stubs_ppc64x.s b/src/math/stubs_ppc64x.s index de8a5ff8bf8..b6220167209 100644 --- a/src/math/stubs_ppc64x.s +++ b/src/math/stubs_ppc64x.s @@ -72,8 +72,17 @@ TEXT ·Sincos(SB),NOSPLIT,$0 TEXT ·Sin(SB),NOSPLIT,$0 BR ·sin(SB) +TEXT ·Sinh(SB),NOSPLIT,$0 + BR ·sinh(SB) + TEXT ·Cos(SB),NOSPLIT,$0 BR ·cos(SB) +TEXT ·Cosh(SB),NOSPLIT,$0 + BR ·cosh(SB) + TEXT ·Tan(SB),NOSPLIT,$0 BR ·tan(SB) + +TEXT ·Tanh(SB),NOSPLIT,$0 + BR ·tanh(SB) diff --git a/src/math/stubs_s390x.s b/src/math/stubs_s390x.s index c3aed13e879..8da55c54ab1 100644 --- a/src/math/stubs_s390x.s +++ b/src/math/stubs_s390x.s @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "../runtime/textflag.h" +#include "textflag.h" TEXT ·Asin(SB),NOSPLIT,$0 BR ·asin(SB) @@ -34,9 +34,6 @@ TEXT ·Hypot(SB),NOSPLIT,$0 TEXT ·Ldexp(SB),NOSPLIT,$0 BR ·ldexp(SB) -TEXT ·Log10(SB),NOSPLIT,$0 - BR ·log10(SB) - TEXT ·Log2(SB),NOSPLIT,$0 BR ·log2(SB) @@ -58,11 +55,154 @@ TEXT ·Remainder(SB),NOSPLIT,$0 TEXT ·Sincos(SB),NOSPLIT,$0 BR ·sincos(SB) -TEXT ·Sin(SB),NOSPLIT,$0 - BR ·sin(SB) - -TEXT ·Cos(SB),NOSPLIT,$0 - BR ·cos(SB) - TEXT ·Tan(SB),NOSPLIT,$0 BR ·tan(SB) + +//if go assembly use vector instruction +TEXT ·hasVectorFacility(SB),NOSPLIT,$24-1 + MOVD $x-24(SP), R1 + XC $24, 0(R1), 0(R1) // clear the storage + MOVD $2, R0 // R0 is the number of double words stored -1 + WORD $0xB2B01000 // STFLE 0(R1) + XOR R0, R0 // reset the value of R0 + MOVBZ z-8(SP), R1 + AND $0x40, R1 + BEQ novector +vectorinstalled: + // check if the vector instruction has been enabled + VLEIB $0, $0xF, V16 + VLGVB $0, V16, R1 + CMPBNE R1, $0xF, novector + MOVB $1, ret+0(FP) // have vx + RET +novector: + MOVB $0, ret+0(FP) // no vx + RET + +TEXT ·Log10(SB),NOSPLIT,$0 + MOVD log10vectorfacility+0x00(SB),R1 + BR (R1) + +TEXT ·log10TrampolineSetup(SB),NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $log10vectorfacility+0x00(SB), R1 + MOVD $·log10(SB), R2 + MOVD R2, 0(R1) + BR ·log10(SB) +vectorimpl: + MOVD $log10vectorfacility+0x00(SB), R1 + MOVD $·log10Asm(SB), R2 + MOVD R2, 0(R1) + BR ·log10Asm(SB) + +GLOBL log10vectorfacility+0x00(SB), NOPTR, $8 +DATA log10vectorfacility+0x00(SB)/8, $·log10TrampolineSetup(SB) + + +TEXT ·Cos(SB),NOSPLIT,$0 + MOVD cosvectorfacility+0x00(SB),R1 + BR (R1) + +TEXT ·cosTrampolineSetup(SB),NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $cosvectorfacility+0x00(SB), R1 + MOVD $·cos(SB), R2 + MOVD R2, 0(R1) + BR ·cos(SB) +vectorimpl: + MOVD $cosvectorfacility+0x00(SB), R1 + MOVD $·cosAsm(SB), R2 + MOVD R2, 0(R1) + BR ·cosAsm(SB) + +GLOBL cosvectorfacility+0x00(SB), NOPTR, $8 +DATA cosvectorfacility+0x00(SB)/8, $·cosTrampolineSetup(SB) + + +TEXT ·Cosh(SB),NOSPLIT,$0 + MOVD coshvectorfacility+0x00(SB),R1 + BR (R1) + +TEXT ·coshTrampolineSetup(SB),NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $coshvectorfacility+0x00(SB), R1 + MOVD $·cosh(SB), R2 + MOVD R2, 0(R1) + BR ·cosh(SB) +vectorimpl: + MOVD $coshvectorfacility+0x00(SB), R1 + MOVD $·coshAsm(SB), R2 + MOVD R2, 0(R1) + BR ·coshAsm(SB) + +GLOBL coshvectorfacility+0x00(SB), NOPTR, $8 +DATA coshvectorfacility+0x00(SB)/8, $·coshTrampolineSetup(SB) + + +TEXT ·Sin(SB),NOSPLIT,$0 + MOVD sinvectorfacility+0x00(SB),R1 + BR (R1) + +TEXT ·sinTrampolineSetup(SB),NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $sinvectorfacility+0x00(SB), R1 + MOVD $·sin(SB), R2 + MOVD R2, 0(R1) + BR ·sin(SB) +vectorimpl: + MOVD $sinvectorfacility+0x00(SB), R1 + MOVD $·sinAsm(SB), R2 + MOVD R2, 0(R1) + BR ·sinAsm(SB) + +GLOBL sinvectorfacility+0x00(SB), NOPTR, $8 +DATA sinvectorfacility+0x00(SB)/8, $·sinTrampolineSetup(SB) + + +TEXT ·Sinh(SB),NOSPLIT,$0 + MOVD sinhvectorfacility+0x00(SB),R1 + BR (R1) + +TEXT ·sinhTrampolineSetup(SB),NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $sinhvectorfacility+0x00(SB), R1 + MOVD $·sinh(SB), R2 + MOVD R2, 0(R1) + BR ·sinh(SB) +vectorimpl: + MOVD $sinhvectorfacility+0x00(SB), R1 + MOVD $·sinhAsm(SB), R2 + MOVD R2, 0(R1) + BR ·sinhAsm(SB) + +GLOBL sinhvectorfacility+0x00(SB), NOPTR, $8 +DATA sinhvectorfacility+0x00(SB)/8, $·sinhTrampolineSetup(SB) + + + +TEXT ·Tanh(SB),NOSPLIT,$0 + MOVD tanhvectorfacility+0x00(SB),R1 + BR (R1) + +TEXT ·tanhTrampolineSetup(SB),NOSPLIT, $0 + MOVB ·hasVX(SB), R1 + CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVD $tanhvectorfacility+0x00(SB), R1 + MOVD $·tanh(SB), R2 + MOVD R2, 0(R1) + BR ·tanh(SB) +vectorimpl: + MOVD $tanhvectorfacility+0x00(SB), R1 + MOVD $·tanhAsm(SB), R2 + MOVD R2, 0(R1) + BR ·tanhAsm(SB) + +GLOBL tanhvectorfacility+0x00(SB), NOPTR, $8 +DATA tanhvectorfacility+0x00(SB)/8, $·tanhTrampolineSetup(SB) + + diff --git a/src/math/tanh.go b/src/math/tanh.go index cf0ffa1923f..eaa0e4cc52a 100644 --- a/src/math/tanh.go +++ b/src/math/tanh.go @@ -71,7 +71,9 @@ var tanhQ = [...]float64{ // Tanh(±0) = ±0 // Tanh(±Inf) = ±1 // Tanh(NaN) = NaN -func Tanh(x float64) float64 { +func Tanh(x float64) float64 + +func tanh(x float64) float64 { const MAXLOG = 8.8029691931113054295988e+01 // log(2**127) z := Abs(x) switch { diff --git a/src/math/tanh_s390x.s b/src/math/tanh_s390x.s new file mode 100644 index 00000000000..1b76c14486e --- /dev/null +++ b/src/math/tanh_s390x.s @@ -0,0 +1,173 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// Minimax polynomial approximations +DATA tanhrodataL18<>+0(SB)/8, $-1.0 +DATA tanhrodataL18<>+8(SB)/8, $-2.0 +DATA tanhrodataL18<>+16(SB)/8, $1.0 +DATA tanhrodataL18<>+24(SB)/8, $2.0 +DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01 +DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01 +DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00 +DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00 +DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01 +DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01 +DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00 +DATA tanhrodataL18<>+88(SB)/8, $20.E0 +GLOBL tanhrodataL18<>+0(SB), RODATA, $96 + +// Constants +DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000 +GLOBL tanhrlog2<>+0(SB), RODATA, $8 +DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0 +GLOBL tanhxadd<>+0(SB), RODATA, $8 +DATA tanhxmone<>+0(SB)/8, $-1.0 +GLOBL tanhxmone<>+0(SB), RODATA, $8 +DATA tanhxzero<>+0(SB)/8, $0 +GLOBL tanhxzero<>+0(SB), RODATA, $8 + +// Polynomial coefficients +DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00 +DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01 +DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01 +DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01 +DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01 +DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01 +DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01 +DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01 +DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01 +DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01 +DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01 +DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01 +DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01 +DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01 +DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01 +DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01 +GLOBL tanhtab<>+0(SB), RODATA, $128 + +// Tanh returns the hyperbolic tangent of the argument. +// +// Special cases are: +// Tanh(±0) = ±0 +// Tanh(±Inf) = ±1 +// Tanh(NaN) = NaN +// The algorithm used is minimax polynomial approximation using a table of +// polynomial coefficients determined with a Remez exchange algorithm. + +TEXT ·tanhAsm(SB),NOSPLIT,$0-16 + FMOVD x+0(FP), F0 + //specail case Tanh(±0) = ±0 + FMOVD $(0.0), F1 + FCMPU F0, F1 + BEQ tanhIsZero + MOVD $tanhrodataL18<>+0(SB), R5 + WORD $0xB3120000 //ltdbr %f0,%f0 + MOVD $0x4034000000000000, R1 + BLTU L15 + FMOVD F0, F1 +L2: + MOVD $tanhxadd<>+0(SB), R2 + FMOVD 0(R2), F2 + MOVD tanhrlog2<>+0(SB), R2 + WORD $0xB3C10042 //ldgr %f4,%r2 + WFMSDB V0, V4, V2, V4 + MOVD $tanhtab<>+0(SB), R3 + WORD $0xB3CD0024 //lgdr %r2,%f4 + WORD $0xEC4239BC //risbg %r4,%r2,57,128+60,3 + BYTE $0x03 + BYTE $0x55 + WORD $0xED105058 //cdb %f1,.L19-.L18(%r5) + BYTE $0x00 + BYTE $0x19 + WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16 + BYTE $0x30 + BYTE $0x59 + WORD $0x68543000 //ld %f5,0(%r4,%r3) + WORD $0xB3C10061 //ldgr %f6,%r1 + BLT L3 + MOVD $tanhxzero<>+0(SB), R1 + FMOVD 0(R1), F2 + WFCHDBS V0, V2, V4 + BEQ L9 + WFCHDBS V2, V0, V2 + BNE L1 + MOVD $tanhxmone<>+0(SB), R1 + FMOVD 0(R1), F0 + FMOVD F0, ret+8(FP) + RET + +L3: + FADD F4, F2 + FMOVD tanhrodataL18<>+80(SB), F4 + FMADD F4, F2, F0, F0 + FMOVD tanhrodataL18<>+72(SB), F1 + WFMDB V0, V0, V3 + FMOVD tanhrodataL18<>+64(SB), F2 + WFMADB V0, V1, V2, V1 + FMOVD tanhrodataL18<>+56(SB), F4 + FMOVD tanhrodataL18<>+48(SB), F2 + WFMADB V1, V3, V4, V1 + FMOVD tanhrodataL18<>+40(SB), F4 + WFMADB V3, V2, V4, V2 + FMOVD tanhrodataL18<>+32(SB), F4 + WORD $0xB9270022 //lhr %r2,%r2 + WFMADB V3, V1, V4, V1 + FMOVD tanhrodataL18<>+24(SB), F4 + WFMADB V3, V2, V4, V3 + WFMADB V0, V5, V0, V2 + WFMADB V0, V1, V3, V0 + WORD $0xA7183ECF //lhi %r1,16079 + WFMADB V0, V2, V5, V2 + FMUL F6, F2 + MOVW R2, R10 + MOVW R1, R11 + CMPBLE R10, R11, L16 + FMOVD F6, F0 + WORD $0xED005010 //adb %f0,.L28-.L18(%r5) + BYTE $0x00 + BYTE $0x1A + WORD $0xA7184330 //lhi %r1,17200 + FADD F2, F0 + MOVW R2, R10 + MOVW R1, R11 + CMPBGT R10, R11, L17 + WORD $0xED605010 //sdb %f6,.L28-.L18(%r5) + BYTE $0x00 + BYTE $0x1B + FADD F6, F2 + WFDDB V0, V2, V0 + FMOVD F0, ret+8(FP) + RET + +L9: + FMOVD tanhrodataL18<>+16(SB), F0 +L1: + FMOVD F0, ret+8(FP) + RET + +L15: + FNEG F0, F1 + BR L2 +L16: + FADD F6, F2 + FMOVD tanhrodataL18<>+8(SB), F0 + FMADD F4, F2, F0, F0 + FMOVD tanhrodataL18<>+0(SB), F4 + FNEG F0, F0 + WFMADB V0, V2, V4, V0 + FMOVD F0, ret+8(FP) + RET + +L17: + WFDDB V0, V4, V0 + FMOVD tanhrodataL18<>+16(SB), F2 + WFSDB V0, V2, V0 + FMOVD F0, ret+8(FP) + RET + +tanhIsZero: //return ±0 + FMOVD F0, ret+8(FP) + RET