mirror of
https://github.com/golang/go
synced 2024-11-23 17:40:03 -07:00
math: use SIMD to accelerate some scalar math functions on s390x
Note, most math functions are structured to use stubs, so that they can be accelerated with assembly on any platform. Sinh, cosh, and tanh were not structued with stubs, so this CL does that. This set of routines was chosen as likely to produce good speedups with assembly on any platform. Technique used was minimax polynomial approximation using tables of polynomial coefficients, with argument range reduction. A table of scaling factors was also used for cosh and log10. before after speedup BenchmarkCos 22.1 ns/op 6.79 ns/op 3.25x BenchmarkCosh 125 ns/op 11.7 ns/op 10.68x BenchmarkLog10 48.4 ns/op 12.5 ns/op 3.87x BenchmarkSin 22.2 ns/op 6.55 ns/op 3.39x BenchmarkSinh 125 ns/op 14.2 ns/op 8.80x BenchmarkTanh 65.0 ns/op 15.1 ns/op 4.30x Accuracy was tested against a high precision reference function to determine maximum error. Approximately 4,000,000 points were tested for each function, producing the following result. Note: ulperr is error in "units in the last place" max ulperr sin 1.43 (returns NaN beyond +-2^50) cos 1.79 (returns NaN beyond +-2^50) cosh 1.05 sinh 3.02 tanh 3.69 log10 1.75 Also includes a set of tests to test non-vector functions even when SIMD is enabled Change-Id: Icb45f14d00864ee19ed973d209c3af21e4df4edc Reviewed-on: https://go-review.googlesource.com/32352 Run-TryBot: Michael Munday <munday@ca.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Munday <munday@ca.ibm.com>
This commit is contained in:
parent
9f9d83404f
commit
b6a15683f0
29
src/math/arith_s390x.go
Normal file
29
src/math/arith_s390x.go
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package math
|
||||
|
||||
func log10TrampolineSetup(x float64) float64
|
||||
func log10Asm(x float64) float64
|
||||
|
||||
func cosTrampolineSetup(x float64) float64
|
||||
func cosAsm(x float64) float64
|
||||
|
||||
func coshTrampolineSetup(x float64) float64
|
||||
func coshAsm(x float64) float64
|
||||
|
||||
func sinTrampolineSetup(x float64) float64
|
||||
func sinAsm(x float64) float64
|
||||
|
||||
func sinhTrampolineSetup(x float64) float64
|
||||
func sinhAsm(x float64) float64
|
||||
|
||||
func tanhTrampolineSetup(x float64) float64
|
||||
func tanhAsm(x float64) float64
|
||||
|
||||
// hasVectorFacility reports whether the machine has the z/Architecture
|
||||
// vector facility installed and enabled.
|
||||
func hasVectorFacility() bool
|
||||
|
||||
var hasVX = hasVectorFacility()
|
144
src/math/arith_s390x_test.go
Normal file
144
src/math/arith_s390x_test.go
Normal file
@ -0,0 +1,144 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Tests whether the non vector routines are working, even when the tests are run on a
|
||||
// vector-capable machine.
|
||||
package math_test
|
||||
|
||||
import (
|
||||
. "math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCosNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
for i := 0; i < len(vf); i++ {
|
||||
if f := CosNoVec(vf[i]); !veryclose(cos[i], f) {
|
||||
t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i])
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(vfcosSC); i++ {
|
||||
if f := CosNoVec(vfcosSC[i]); !alike(cosSC[i], f) {
|
||||
t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoshNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
for i := 0; i < len(vf); i++ {
|
||||
if f := CoshNoVec(vf[i]); !close(cosh[i], f) {
|
||||
t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i])
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(vfcoshSC); i++ {
|
||||
if f := CoshNoVec(vfcoshSC[i]); !alike(coshSC[i], f) {
|
||||
t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
func TestSinNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
for i := 0; i < len(vf); i++ {
|
||||
if f := SinNoVec(vf[i]); !veryclose(sin[i], f) {
|
||||
t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i])
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(vfsinSC); i++ {
|
||||
if f := SinNoVec(vfsinSC[i]); !alike(sinSC[i], f) {
|
||||
t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSinhNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
for i := 0; i < len(vf); i++ {
|
||||
if f := SinhNoVec(vf[i]); !close(sinh[i], f) {
|
||||
t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i])
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(vfsinhSC); i++ {
|
||||
if f := SinhNoVec(vfsinhSC[i]); !alike(sinhSC[i], f) {
|
||||
t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that math functions of high angle values
|
||||
// return accurate results. [Since (vf[i] + large) - large != vf[i],
|
||||
// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
|
||||
// a multiple of 2*Pi, is misleading.]
|
||||
func TestLargeCosNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
large := float64(100000 * Pi)
|
||||
for i := 0; i < len(vf); i++ {
|
||||
f1 := cosLarge[i]
|
||||
f2 := CosNoVec(vf[i] + large)
|
||||
if !close(f1, f2) {
|
||||
t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLargeSinNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
large := float64(100000 * Pi)
|
||||
for i := 0; i < len(vf); i++ {
|
||||
f1 := sinLarge[i]
|
||||
f2 := SinNoVec(vf[i] + large)
|
||||
if !close(f1, f2) {
|
||||
t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTanhNovec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
for i := 0; i < len(vf); i++ {
|
||||
if f := TanhNoVec(vf[i]); !veryclose(tanh[i], f) {
|
||||
t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i])
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(vftanhSC); i++ {
|
||||
if f := TanhNoVec(vftanhSC[i]); !alike(tanhSC[i], f) {
|
||||
t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestLog10Novec(t *testing.T) {
|
||||
if !HasVX {
|
||||
t.Skipf("no vector support")
|
||||
}
|
||||
for i := 0; i < len(vf); i++ {
|
||||
a := Abs(vf[i])
|
||||
if f := Log10NoVec(a); !veryclose(log10[i], f) {
|
||||
t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i])
|
||||
}
|
||||
}
|
||||
if f := Log10NoVec(E); f != Log10E {
|
||||
t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E)
|
||||
}
|
||||
for i := 0; i < len(vflogSC); i++ {
|
||||
if f := Log10NoVec(vflogSC[i]); !alike(logSC[i], f) {
|
||||
t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i])
|
||||
}
|
||||
}
|
||||
}
|
227
src/math/cosh_s390x.s
Normal file
227
src/math/cosh_s390x.s
Normal file
@ -0,0 +1,227 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Constants
|
||||
DATA coshrodataL23<>+0(SB)/8, $0.231904681384629956E-16
|
||||
DATA coshrodataL23<>+8(SB)/8, $0.693147180559945286E+00
|
||||
DATA coshrodataL23<>+16(SB)/8, $0.144269504088896339E+01
|
||||
DATA coshrodataL23<>+24(SB)/8, $704.E0
|
||||
GLOBL coshrodataL23<>+0(SB), RODATA, $32
|
||||
DATA coshxinf<>+0(SB)/8, $0x7FF0000000000000
|
||||
GLOBL coshxinf<>+0(SB), RODATA, $8
|
||||
DATA coshxlim1<>+0(SB)/8, $800.E0
|
||||
GLOBL coshxlim1<>+0(SB), RODATA, $8
|
||||
DATA coshxaddhy<>+0(SB)/8, $0xc2f0000100003fdf
|
||||
GLOBL coshxaddhy<>+0(SB), RODATA, $8
|
||||
DATA coshx4ff<>+0(SB)/8, $0x4ff0000000000000
|
||||
GLOBL coshx4ff<>+0(SB), RODATA, $8
|
||||
DATA coshe1<>+0(SB)/8, $0x3ff000000000000a
|
||||
GLOBL coshe1<>+0(SB), RODATA, $8
|
||||
|
||||
// Log multiplier table
|
||||
DATA coshtab<>+0(SB)/8, $0.442737824274138381E-01
|
||||
DATA coshtab<>+8(SB)/8, $0.263602189790660309E-01
|
||||
DATA coshtab<>+16(SB)/8, $0.122565642281703586E-01
|
||||
DATA coshtab<>+24(SB)/8, $0.143757052860721398E-02
|
||||
DATA coshtab<>+32(SB)/8, $-.651375034121276075E-02
|
||||
DATA coshtab<>+40(SB)/8, $-.119317678849450159E-01
|
||||
DATA coshtab<>+48(SB)/8, $-.150868749549871069E-01
|
||||
DATA coshtab<>+56(SB)/8, $-.161992609578469234E-01
|
||||
DATA coshtab<>+64(SB)/8, $-.154492360403337917E-01
|
||||
DATA coshtab<>+72(SB)/8, $-.129850717389178721E-01
|
||||
DATA coshtab<>+80(SB)/8, $-.892902649276657891E-02
|
||||
DATA coshtab<>+88(SB)/8, $-.338202636596794887E-02
|
||||
DATA coshtab<>+96(SB)/8, $0.357266307045684762E-02
|
||||
DATA coshtab<>+104(SB)/8, $0.118665304327406698E-01
|
||||
DATA coshtab<>+112(SB)/8, $0.214434994118118914E-01
|
||||
DATA coshtab<>+120(SB)/8, $0.322580645161290314E-01
|
||||
GLOBL coshtab<>+0(SB), RODATA, $128
|
||||
|
||||
// Minimax polynomial approximations
|
||||
DATA coshe2<>+0(SB)/8, $0.500000000000004237e+00
|
||||
GLOBL coshe2<>+0(SB), RODATA, $8
|
||||
DATA coshe3<>+0(SB)/8, $0.166666666630345592e+00
|
||||
GLOBL coshe3<>+0(SB), RODATA, $8
|
||||
DATA coshe4<>+0(SB)/8, $0.416666664838056960e-01
|
||||
GLOBL coshe4<>+0(SB), RODATA, $8
|
||||
DATA coshe5<>+0(SB)/8, $0.833349307718286047e-02
|
||||
GLOBL coshe5<>+0(SB), RODATA, $8
|
||||
DATA coshe6<>+0(SB)/8, $0.138926439368309441e-02
|
||||
GLOBL coshe6<>+0(SB), RODATA, $8
|
||||
|
||||
// Cosh returns the hyperbolic cosine of x.
|
||||
//
|
||||
// Special cases are:
|
||||
// Cosh(±0) = 1
|
||||
// Cosh(±Inf) = +Inf
|
||||
// Cosh(NaN) = NaN
|
||||
// The algorithm used is minimax polynomial approximation
|
||||
// with coefficients determined with a Remez exchange algorithm.
|
||||
|
||||
TEXT ·coshAsm(SB),NOSPLIT,$0-16
|
||||
FMOVD x+0(FP), F0
|
||||
MOVD $coshrodataL23<>+0(SB), R9
|
||||
WORD $0xB3120000 //ltdbr %f0,%f0
|
||||
MOVD $0x4086000000000000, R2
|
||||
MOVD $0x4086000000000000, R3
|
||||
BLTU L19
|
||||
FMOVD F0, F4
|
||||
L2:
|
||||
WORD $0xED409018 //cdb %f4,.L24-.L23(%r9)
|
||||
BYTE $0x00
|
||||
BYTE $0x19
|
||||
BGE L14 //jnl .L14
|
||||
BVS L14
|
||||
WFCEDBS V4, V4, V2
|
||||
BEQ L20
|
||||
L1:
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L14:
|
||||
WFCEDBS V4, V4, V2
|
||||
BVS L1
|
||||
MOVD $coshxlim1<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
WFCHEDBS V4, V2, V2
|
||||
BEQ L21
|
||||
MOVD $coshxaddhy<>+0(SB), R1
|
||||
FMOVD coshrodataL23<>+16(SB), F5
|
||||
FMOVD 0(R1), F2
|
||||
WFMSDB V0, V5, V2, V5
|
||||
FMOVD coshrodataL23<>+8(SB), F3
|
||||
FADD F5, F2
|
||||
MOVD $coshe6<>+0(SB), R1
|
||||
WFMSDB V2, V3, V0, V3
|
||||
FMOVD 0(R1), F6
|
||||
WFMDB V3, V3, V1
|
||||
MOVD $coshe4<>+0(SB), R1
|
||||
FMOVD coshrodataL23<>+0(SB), F7
|
||||
WFMADB V2, V7, V3, V2
|
||||
FMOVD 0(R1), F3
|
||||
MOVD $coshe5<>+0(SB), R1
|
||||
WFMADB V1, V6, V3, V6
|
||||
FMOVD 0(R1), F7
|
||||
MOVD $coshe3<>+0(SB), R1
|
||||
FMOVD 0(R1), F3
|
||||
WFMADB V1, V7, V3, V7
|
||||
FNEG F2, F3
|
||||
WORD $0xB3CD0015 //lgdr %r1,%f5
|
||||
MOVD $coshe2<>+0(SB), R3
|
||||
WFCEDBS V4, V0, V0
|
||||
FMOVD 0(R3), F5
|
||||
MOVD $coshe1<>+0(SB), R3
|
||||
WFMADB V1, V6, V5, V6
|
||||
FMOVD 0(R3), F5
|
||||
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WFMADB V1, V7, V5, V1
|
||||
BVS L22
|
||||
WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3
|
||||
BYTE $0x03
|
||||
BYTE $0x55
|
||||
MOVD $coshtab<>+0(SB), R3
|
||||
WFMADB V3, V6, V1, V6
|
||||
WORD $0x68043000 //ld %f0,0(%r4,%r3)
|
||||
FMSUB F0, F3, F2, F2
|
||||
WORD $0xA71AF000 //ahi %r1,-4096
|
||||
WFMADB V2, V6, V0, V6
|
||||
L17:
|
||||
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WORD $0xB3C10022 //ldgr %f2,%r2
|
||||
FMADD F2, F6, F2, F2
|
||||
MOVD $coshx4ff<>+0(SB), R1
|
||||
FMOVD 0(R1), F0
|
||||
FMUL F2, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L19:
|
||||
FNEG F0, F4
|
||||
BR L2
|
||||
L20:
|
||||
MOVD $coshxaddhy<>+0(SB), R1
|
||||
FMOVD coshrodataL23<>+16(SB), F3
|
||||
FMOVD 0(R1), F2
|
||||
WFMSDB V0, V3, V2, V3
|
||||
FMOVD coshrodataL23<>+8(SB), F4
|
||||
FADD F3, F2
|
||||
MOVD $coshe6<>+0(SB), R1
|
||||
FMSUB F4, F2, F0, F0
|
||||
FMOVD 0(R1), F6
|
||||
WFMDB V0, V0, V1
|
||||
MOVD $coshe4<>+0(SB), R1
|
||||
FMOVD 0(R1), F4
|
||||
MOVD $coshe5<>+0(SB), R1
|
||||
FMOVD coshrodataL23<>+0(SB), F5
|
||||
WFMADB V1, V6, V4, V6
|
||||
FMADD F5, F2, F0, F0
|
||||
FMOVD 0(R1), F2
|
||||
MOVD $coshe3<>+0(SB), R1
|
||||
FMOVD 0(R1), F4
|
||||
WFMADB V1, V2, V4, V2
|
||||
MOVD $coshe2<>+0(SB), R1
|
||||
FMOVD 0(R1), F5
|
||||
FNEG F0, F4
|
||||
WFMADB V1, V6, V5, V6
|
||||
MOVD $coshe1<>+0(SB), R1
|
||||
FMOVD 0(R1), F5
|
||||
WFMADB V1, V2, V5, V1
|
||||
WORD $0xB3CD0013 //lgdr %r1,%f3
|
||||
MOVD $coshtab<>+0(SB), R5
|
||||
WFMADB V4, V6, V1, V3
|
||||
WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3
|
||||
BYTE $0x03
|
||||
BYTE $0x55
|
||||
WFMSDB V4, V6, V1, V6
|
||||
WORD $0x68145000 //ld %f1,0(%r4,%r5)
|
||||
WFMSDB V4, V1, V0, V2
|
||||
WORD $0xA7487FBE //lhi %r4,32702
|
||||
FMADD F3, F2, F1, F1
|
||||
SUBW R1, R4
|
||||
WORD $0xECC439BC //risbg %r12,%r4,57,128+60,3
|
||||
BYTE $0x03
|
||||
BYTE $0x55
|
||||
WORD $0x682C5000 //ld %f2,0(%r12,%r5)
|
||||
FMSUB F2, F4, F0, F0
|
||||
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WFMADB V0, V6, V2, V6
|
||||
WORD $0xEC34000F //risbgn %r3,%r4,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WORD $0xB3C10022 //ldgr %f2,%r2
|
||||
WORD $0xB3C10003 //ldgr %f0,%r3
|
||||
FMADD F2, F1, F2, F2
|
||||
FMADD F0, F6, F0, F0
|
||||
FADD F2, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L22:
|
||||
WORD $0xA7387FBE //lhi %r3,32702
|
||||
MOVD $coshtab<>+0(SB), R4
|
||||
SUBW R1, R3
|
||||
WFMSDB V3, V6, V1, V6
|
||||
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,3
|
||||
BYTE $0x03
|
||||
BYTE $0x55
|
||||
WORD $0x68034000 //ld %f0,0(%r3,%r4)
|
||||
FMSUB F0, F3, F2, F2
|
||||
WORD $0xA7386FBE //lhi %r3,28606
|
||||
WFMADB V2, V6, V0, V6
|
||||
SUBW R1, R3, R1
|
||||
BR L17
|
||||
L21:
|
||||
MOVD $coshxinf<>+0(SB), R1
|
||||
FMOVD 0(R1), F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
14
src/math/export_s390x_test.go
Normal file
14
src/math/export_s390x_test.go
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package math
|
||||
|
||||
// Export internal functions and variable for testing.
|
||||
var Log10NoVec = log10
|
||||
var CosNoVec = cos
|
||||
var CoshNoVec = cosh
|
||||
var SinNoVec = sin
|
||||
var SinhNoVec = sinh
|
||||
var TanhNoVec = tanh
|
||||
var HasVX = hasVX
|
170
src/math/log10_s390x.s
Normal file
170
src/math/log10_s390x.s
Normal file
@ -0,0 +1,170 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Minimax polynomial coefficients and other constants
|
||||
DATA log10rodataL19<>+0(SB)/8, $0.000000000000000000E+00
|
||||
DATA log10rodataL19<>+8(SB)/8, $-1.0
|
||||
DATA log10rodataL19<>+16(SB)/8, $0x7FF8000000000000 //+NanN
|
||||
DATA log10rodataL19<>+24(SB)/8, $.15375570329280596749
|
||||
DATA log10rodataL19<>+32(SB)/8, $.60171950900703668594E+04
|
||||
DATA log10rodataL19<>+40(SB)/8, $-1.9578460454940795898
|
||||
DATA log10rodataL19<>+48(SB)/8, $0.78962633073318517310E-01
|
||||
DATA log10rodataL19<>+56(SB)/8, $-.71784211884836937993E-02
|
||||
DATA log10rodataL19<>+64(SB)/8, $0.87011165920689940661E-03
|
||||
DATA log10rodataL19<>+72(SB)/8, $-.11865158981621437541E-03
|
||||
DATA log10rodataL19<>+80(SB)/8, $0.17258413403018680410E-04
|
||||
DATA log10rodataL19<>+88(SB)/8, $0.40752932047883484315E-06
|
||||
DATA log10rodataL19<>+96(SB)/8, $-.26149194688832680410E-05
|
||||
DATA log10rodataL19<>+104(SB)/8, $0.92453396963875026759E-08
|
||||
DATA log10rodataL19<>+112(SB)/8, $-.64572084905921579630E-07
|
||||
DATA log10rodataL19<>+120(SB)/8, $-5.5
|
||||
DATA log10rodataL19<>+128(SB)/8, $18446744073709551616.
|
||||
GLOBL log10rodataL19<>+0(SB), RODATA, $136
|
||||
|
||||
// Table of log10 correction terms
|
||||
DATA log10tab2074<>+0(SB)/8, $0.254164497922885069E-01
|
||||
DATA log10tab2074<>+8(SB)/8, $0.179018857989381839E-01
|
||||
DATA log10tab2074<>+16(SB)/8, $0.118926768029048674E-01
|
||||
DATA log10tab2074<>+24(SB)/8, $0.722595568238080033E-02
|
||||
DATA log10tab2074<>+32(SB)/8, $0.376393570022739135E-02
|
||||
DATA log10tab2074<>+40(SB)/8, $0.138901135928814326E-02
|
||||
DATA log10tab2074<>+48(SB)/8, $0
|
||||
DATA log10tab2074<>+56(SB)/8, $-0.490780466387818203E-03
|
||||
DATA log10tab2074<>+64(SB)/8, $-0.159811431402137571E-03
|
||||
DATA log10tab2074<>+72(SB)/8, $0.925796337165100494E-03
|
||||
DATA log10tab2074<>+80(SB)/8, $0.270683176738357035E-02
|
||||
DATA log10tab2074<>+88(SB)/8, $0.513079030821304758E-02
|
||||
DATA log10tab2074<>+96(SB)/8, $0.815089785397996303E-02
|
||||
DATA log10tab2074<>+104(SB)/8, $0.117253060262419215E-01
|
||||
DATA log10tab2074<>+112(SB)/8, $0.158164239345343963E-01
|
||||
DATA log10tab2074<>+120(SB)/8, $0.203903595489229786E-01
|
||||
GLOBL log10tab2074<>+0(SB), RODATA, $128
|
||||
|
||||
// Log10 returns the decimal logarithm of the argument.
|
||||
//
|
||||
// Special cases are:
|
||||
// Log(+Inf) = +Inf
|
||||
// Log(0) = -Inf
|
||||
// Log(x < 0) = NaN
|
||||
// Log(NaN) = NaN
|
||||
// The algorithm used is minimax polynomial approximation
|
||||
// with coefficients determined with a Remez exchange algorithm.
|
||||
|
||||
TEXT ·log10Asm(SB),NOSPLIT,$8-16
|
||||
FMOVD x+0(FP), F0
|
||||
MOVD $log10rodataL19<>+0(SB), R9
|
||||
FMOVD F0, x-8(SP)
|
||||
WORD $0xC0298006 //iilf %r2,2147909631
|
||||
BYTE $0x7F
|
||||
BYTE $0xFF
|
||||
WORD $0x5840F008 //l %r4, 8(%r15)
|
||||
SUBW R4, R2, R3
|
||||
WORD $0xEC5320AF //risbg %r5,%r3,32,128+47,0
|
||||
BYTE $0x00
|
||||
BYTE $0x55
|
||||
MOVH $0x0, R1
|
||||
WORD $0xEC15001F //risbgn %r1,%r5,64-64+0,64-64+0+32-1,64-0-32
|
||||
BYTE $0x20
|
||||
BYTE $0x59
|
||||
WORD $0xC0590016 //iilf %r5,1507327
|
||||
BYTE $0xFF
|
||||
BYTE $0xFF
|
||||
MOVW R4, R10
|
||||
MOVW R5, R11
|
||||
CMPBLE R10, R11, L2
|
||||
WORD $0xC0297FEF //iilf %r2,2146435071
|
||||
BYTE $0xFF
|
||||
BYTE $0xFF
|
||||
MOVW R4, R10
|
||||
MOVW R2, R11
|
||||
CMPBLE R10, R11, L16
|
||||
L3:
|
||||
L1:
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L2:
|
||||
WORD $0xB3120000 //ltdbr %f0,%f0
|
||||
BLEU L13
|
||||
WORD $0xED009080 //mdb %f0,.L20-.L19(%r9)
|
||||
BYTE $0x00
|
||||
BYTE $0x1C
|
||||
FMOVD F0, x-8(SP)
|
||||
WORD $0x5B20F008 //s %r2, 8(%r15)
|
||||
WORD $0xEC3239BC //risbg %r3,%r2,57,128+60,64-13
|
||||
BYTE $0x33
|
||||
BYTE $0x55
|
||||
ANDW $0xFFFF0000, R2
|
||||
WORD $0xEC12001F //risbgn %r1,%r2,64-64+0,64-64+0+32-1,64-0-32
|
||||
BYTE $0x20
|
||||
BYTE $0x59
|
||||
ADDW $0x4000000, R2
|
||||
BLEU L17
|
||||
L8:
|
||||
SRW $8, R2, R2
|
||||
ORW $0x45000000, R2
|
||||
L4:
|
||||
FMOVD log10rodataL19<>+120(SB), F2
|
||||
WORD $0xB3C10041 //ldgr %f4,%r1
|
||||
WFMADB V4, V0, V2, V0
|
||||
FMOVD log10rodataL19<>+112(SB), F4
|
||||
FMOVD log10rodataL19<>+104(SB), F6
|
||||
WFMADB V0, V6, V4, V6
|
||||
FMOVD log10rodataL19<>+96(SB), F4
|
||||
FMOVD log10rodataL19<>+88(SB), F1
|
||||
WFMADB V0, V1, V4, V1
|
||||
WFMDB V0, V0, V4
|
||||
FMOVD log10rodataL19<>+80(SB), F2
|
||||
WFMADB V6, V4, V1, V6
|
||||
FMOVD log10rodataL19<>+72(SB), F1
|
||||
WFMADB V0, V2, V1, V2
|
||||
FMOVD log10rodataL19<>+64(SB), F1
|
||||
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,0
|
||||
BYTE $0x00
|
||||
BYTE $0x55
|
||||
WFMADB V4, V6, V2, V6
|
||||
FMOVD log10rodataL19<>+56(SB), F2
|
||||
WFMADB V0, V1, V2, V1
|
||||
VLVGF $0, R2, V2
|
||||
WFMADB V4, V6, V1, V4
|
||||
LDEBR F2, F2
|
||||
FMOVD log10rodataL19<>+48(SB), F6
|
||||
WFMADB V0, V4, V6, V4
|
||||
FMOVD log10rodataL19<>+40(SB), F1
|
||||
FMOVD log10rodataL19<>+32(SB), F6
|
||||
MOVD $log10tab2074<>+0(SB), R1
|
||||
WFMADB V2, V1, V6, V2
|
||||
WORD $0x68331000 //ld %f3,0(%r3,%r1)
|
||||
WFMADB V0, V4, V3, V0
|
||||
FMOVD log10rodataL19<>+24(SB), F4
|
||||
FMADD F4, F2, F0, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L16:
|
||||
WORD $0xEC2328B7 //risbg %r2,%r3,40,128+55,64-8
|
||||
BYTE $0x38
|
||||
BYTE $0x55
|
||||
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13
|
||||
BYTE $0x33
|
||||
BYTE $0x55
|
||||
ORW $0x45000000, R2
|
||||
BR L4
|
||||
L13:
|
||||
BGE L18 //jnl .L18
|
||||
BVS L18
|
||||
FMOVD log10rodataL19<>+16(SB), F0
|
||||
BR L1
|
||||
L17:
|
||||
SRAW $1, R2, R2
|
||||
SUBW $0x40000000, R2
|
||||
BR L8
|
||||
L18:
|
||||
FMOVD log10rodataL19<>+8(SB), F0
|
||||
WORD $0xED009000 //ddb %f0,.L36-.L19(%r9)
|
||||
BYTE $0x00
|
||||
BYTE $0x1D
|
||||
BR L1
|
356
src/math/sin_s390x.s
Normal file
356
src/math/sin_s390x.s
Normal file
@ -0,0 +1,356 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Various constants
|
||||
DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000
|
||||
GLOBL sincosxnan<>+0(SB), RODATA, $8
|
||||
DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19
|
||||
GLOBL sincosxlim<>+0(SB), RODATA, $8
|
||||
DATA sincosxadd<>+0(SB)/8, $0xc338000000000000
|
||||
GLOBL sincosxadd<>+0(SB), RODATA, $8
|
||||
DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31
|
||||
GLOBL sincosxpi2l<>+0(SB), RODATA, $8
|
||||
DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16
|
||||
GLOBL sincosxpi2m<>+0(SB), RODATA, $8
|
||||
DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01
|
||||
GLOBL sincosxpi2h<>+0(SB), RODATA, $8
|
||||
DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00
|
||||
GLOBL sincosrpi2<>+0(SB), RODATA, $8
|
||||
|
||||
// Minimax polynomial approximations
|
||||
DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01
|
||||
GLOBL sincosc0<>+0(SB), RODATA, $8
|
||||
DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00
|
||||
GLOBL sincosc1<>+0(SB), RODATA, $8
|
||||
DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01
|
||||
GLOBL sincosc2<>+0(SB), RODATA, $8
|
||||
DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02
|
||||
GLOBL sincosc3<>+0(SB), RODATA, $8
|
||||
DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04
|
||||
GLOBL sincosc4<>+0(SB), RODATA, $8
|
||||
DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06
|
||||
GLOBL sincosc5<>+0(SB), RODATA, $8
|
||||
DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08
|
||||
GLOBL sincosc6<>+0(SB), RODATA, $8
|
||||
DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10
|
||||
GLOBL sincosc7<>+0(SB), RODATA, $8
|
||||
DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01
|
||||
GLOBL sincoss0<>+0(SB), RODATA, $8
|
||||
DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00
|
||||
GLOBL sincoss1<>+0(SB), RODATA, $8
|
||||
DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02
|
||||
GLOBL sincoss2<>+0(SB), RODATA, $8
|
||||
DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03
|
||||
GLOBL sincoss3<>+0(SB), RODATA, $8
|
||||
DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05
|
||||
GLOBL sincoss4<>+0(SB), RODATA, $8
|
||||
DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07
|
||||
GLOBL sincoss5<>+0(SB), RODATA, $8
|
||||
DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09
|
||||
GLOBL sincoss6<>+0(SB), RODATA, $8
|
||||
DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12
|
||||
GLOBL sincoss7<>+0(SB), RODATA, $8
|
||||
|
||||
// Sin returns the sine of the radian argument x.
|
||||
//
|
||||
// Special cases are:
|
||||
// Sin(±0) = ±0
|
||||
// Sin(±Inf) = NaN
|
||||
// Sin(NaN) = NaN
|
||||
// The algorithm used is minimax polynomial approximation.
|
||||
// with coefficients determined with a Remez exchange algorithm.
|
||||
|
||||
TEXT ·sinAsm(SB),NOSPLIT,$0-16
|
||||
FMOVD x+0(FP), F0
|
||||
//special case Sin(±0) = ±0
|
||||
FMOVD $(0.0), F1
|
||||
FCMPU F0, F1
|
||||
BEQ sinIsZero
|
||||
WORD $0xB3120000 //ltdbr %f0,%f0
|
||||
BLTU L17
|
||||
FMOVD F0, F5
|
||||
L2:
|
||||
MOVD $sincoss7<>+0(SB), R1
|
||||
FMOVD 0(R1), F4
|
||||
MOVD $sincoss6<>+0(SB), R1
|
||||
FMOVD 0(R1), F1
|
||||
MOVD $sincoss5<>+0(SB), R1
|
||||
VLEG $0, 0(R1), V18
|
||||
MOVD $sincoss4<>+0(SB), R1
|
||||
FMOVD 0(R1), F6
|
||||
MOVD $sincoss2<>+0(SB), R1
|
||||
VLEG $0, 0(R1), V16
|
||||
MOVD $sincoss3<>+0(SB), R1
|
||||
FMOVD 0(R1), F7
|
||||
MOVD $sincoss1<>+0(SB), R1
|
||||
FMOVD 0(R1), F3
|
||||
MOVD $sincoss0<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
WFCHDBS V2, V5, V2
|
||||
BEQ L18
|
||||
MOVD $sincosrpi2<>+0(SB), R1
|
||||
FMOVD 0(R1), F3
|
||||
MOVD $sincosxadd<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
WFMSDB V0, V3, V2, V3
|
||||
FMOVD 0(R1), F6
|
||||
FADD F3, F6
|
||||
MOVD $sincosxpi2h<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
FMSUB F2, F6, F0, F0
|
||||
MOVD $sincosxpi2m<>+0(SB), R1
|
||||
FMOVD 0(R1), F4
|
||||
FMADD F4, F6, F0, F0
|
||||
MOVD $sincosxpi2l<>+0(SB), R1
|
||||
WFMDB V0, V0, V1
|
||||
FMOVD 0(R1), F7
|
||||
WFMDB V1, V1, V2
|
||||
WORD $0xB3CD0013 //lgdr %r1,%f3
|
||||
MOVD $sincosxlim<>+0(SB), R2
|
||||
WORD $0xA7110001 //tmll %r1,1
|
||||
BEQ L6
|
||||
FMOVD 0(R2), F0
|
||||
WFCHDBS V0, V5, V0
|
||||
BNE L14
|
||||
MOVD $sincosc7<>+0(SB), R2
|
||||
FMOVD 0(R2), F0
|
||||
MOVD $sincosc6<>+0(SB), R2
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincosc5<>+0(SB), R2
|
||||
WFMADB V1, V0, V4, V0
|
||||
FMOVD 0(R2), F6
|
||||
MOVD $sincosc4<>+0(SB), R2
|
||||
WFMADB V1, V0, V6, V0
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincosc2<>+0(SB), R2
|
||||
FMOVD 0(R2), F6
|
||||
WFMADB V2, V4, V6, V4
|
||||
MOVD $sincosc3<>+0(SB), R2
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincosc1<>+0(SB), R2
|
||||
WFMADB V2, V0, V3, V0
|
||||
FMOVD 0(R2), F6
|
||||
WFMADB V1, V4, V6, V4
|
||||
WORD $0xA7110002 //tmll %r1,2
|
||||
WFMADB V2, V0, V4, V0
|
||||
MOVD $sincosc0<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
WFMADB V1, V0, V2, V0
|
||||
BNE L15
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L6:
|
||||
FMOVD 0(R2), F4
|
||||
WFCHDBS V4, V5, V4
|
||||
BNE L14
|
||||
MOVD $sincoss7<>+0(SB), R2
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincoss6<>+0(SB), R2
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincoss5<>+0(SB), R2
|
||||
WFMADB V1, V4, V3, V4
|
||||
WFMADB V6, V7, V0, V6
|
||||
FMOVD 0(R2), F0
|
||||
MOVD $sincoss4<>+0(SB), R2
|
||||
FMADD F4, F1, F0, F0
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincoss2<>+0(SB), R2
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincoss3<>+0(SB), R2
|
||||
WFMADB V2, V3, V4, V3
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincoss1<>+0(SB), R2
|
||||
WFMADB V2, V0, V4, V0
|
||||
FMOVD 0(R2), F4
|
||||
WFMADB V1, V3, V4, V3
|
||||
FNEG F6, F4
|
||||
WFMADB V2, V0, V3, V2
|
||||
WFMDB V4, V1, V0
|
||||
WORD $0xA7110002 //tmll %r1,2
|
||||
WFMSDB V0, V2, V6, V0
|
||||
BNE L15
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L14:
|
||||
MOVD $sincosxnan<>+0(SB), R1
|
||||
FMOVD 0(R1), F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L18:
|
||||
WFMDB V0, V0, V2
|
||||
WFMADB V2, V4, V1, V4
|
||||
WFMDB V2, V2, V1
|
||||
WFMADB V2, V4, V18, V4
|
||||
WFMADB V1, V6, V16, V6
|
||||
WFMADB V1, V4, V7, V4
|
||||
WFMADB V2, V6, V3, V6
|
||||
FMUL F0, F2
|
||||
WFMADB V1, V4, V6, V4
|
||||
FMADD F4, F2, F0, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L17:
|
||||
FNEG F0, F5
|
||||
BR L2
|
||||
L15:
|
||||
FNEG F0, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
|
||||
sinIsZero:
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
// Cos returns the cosine of the radian argument.
|
||||
//
|
||||
// Special cases are:
|
||||
// Cos(±Inf) = NaN
|
||||
// Cos(NaN) = NaN
|
||||
// The algorithm used is minimax polynomial approximation.
|
||||
// with coefficients determined with a Remez exchange algorithm.
|
||||
|
||||
TEXT ·cosAsm(SB),NOSPLIT,$0-16
|
||||
FMOVD x+0(FP), F0
|
||||
WORD $0xB3120000 //ltdbr %f0,%f0
|
||||
BLTU L35
|
||||
FMOVD F0, F1
|
||||
L21:
|
||||
MOVD $sincosc7<>+0(SB), R1
|
||||
FMOVD 0(R1), F4
|
||||
MOVD $sincosc6<>+0(SB), R1
|
||||
VLEG $0, 0(R1), V20
|
||||
MOVD $sincosc5<>+0(SB), R1
|
||||
VLEG $0, 0(R1), V18
|
||||
MOVD $sincosc4<>+0(SB), R1
|
||||
FMOVD 0(R1), F6
|
||||
MOVD $sincosc2<>+0(SB), R1
|
||||
VLEG $0, 0(R1), V16
|
||||
MOVD $sincosc3<>+0(SB), R1
|
||||
FMOVD 0(R1), F7
|
||||
MOVD $sincosc1<>+0(SB), R1
|
||||
FMOVD 0(R1), F5
|
||||
MOVD $sincosrpi2<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
MOVD $sincosxadd<>+0(SB), R1
|
||||
FMOVD 0(R1), F3
|
||||
MOVD $sincoss0<>+0(SB), R1
|
||||
WFMSDB V0, V2, V3, V2
|
||||
FMOVD 0(R1), F3
|
||||
WFCHDBS V3, V1, V3
|
||||
WORD $0xB3CD0012 //lgdr %r1,%f2
|
||||
BEQ L36
|
||||
MOVD $sincosxadd<>+0(SB), R2
|
||||
FMOVD 0(R2), F4
|
||||
FADD F2, F4
|
||||
MOVD $sincosxpi2h<>+0(SB), R2
|
||||
FMOVD 0(R2), F2
|
||||
WFMSDB V4, V2, V0, V2
|
||||
MOVD $sincosxpi2m<>+0(SB), R2
|
||||
FMOVD 0(R2), F0
|
||||
WFMADB V4, V0, V2, V0
|
||||
MOVD $sincosxpi2l<>+0(SB), R2
|
||||
WFMDB V0, V0, V2
|
||||
FMOVD 0(R2), F5
|
||||
WFMDB V2, V2, V6
|
||||
MOVD $sincosxlim<>+0(SB), R2
|
||||
WORD $0xA7110001 //tmll %r1,1
|
||||
BNE L25
|
||||
FMOVD 0(R2), F0
|
||||
WFCHDBS V0, V1, V0
|
||||
BNE L33
|
||||
MOVD $sincosc7<>+0(SB), R2
|
||||
FMOVD 0(R2), F0
|
||||
MOVD $sincosc6<>+0(SB), R2
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincosc5<>+0(SB), R2
|
||||
WFMADB V2, V0, V4, V0
|
||||
FMOVD 0(R2), F1
|
||||
MOVD $sincosc4<>+0(SB), R2
|
||||
WFMADB V2, V0, V1, V0
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincosc2<>+0(SB), R2
|
||||
FMOVD 0(R2), F1
|
||||
WFMADB V6, V4, V1, V4
|
||||
MOVD $sincosc3<>+0(SB), R2
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincosc1<>+0(SB), R2
|
||||
WFMADB V6, V0, V3, V0
|
||||
FMOVD 0(R2), F1
|
||||
WFMADB V2, V4, V1, V4
|
||||
WORD $0xA7110002 //tmll %r1,2
|
||||
WFMADB V6, V0, V4, V0
|
||||
MOVD $sincosc0<>+0(SB), R1
|
||||
FMOVD 0(R1), F4
|
||||
WFMADB V2, V0, V4, V0
|
||||
BNE L34
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L25:
|
||||
FMOVD 0(R2), F3
|
||||
WFCHDBS V3, V1, V1
|
||||
BNE L33
|
||||
MOVD $sincoss7<>+0(SB), R2
|
||||
FMOVD 0(R2), F1
|
||||
MOVD $sincoss6<>+0(SB), R2
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincoss5<>+0(SB), R2
|
||||
WFMADB V2, V1, V3, V1
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincoss4<>+0(SB), R2
|
||||
WFMADB V2, V1, V3, V1
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sincoss2<>+0(SB), R2
|
||||
FMOVD 0(R2), F7
|
||||
WFMADB V6, V3, V7, V3
|
||||
MOVD $sincoss3<>+0(SB), R2
|
||||
FMADD F5, F4, F0, F0
|
||||
FMOVD 0(R2), F4
|
||||
MOVD $sincoss1<>+0(SB), R2
|
||||
FMADD F1, F6, F4, F4
|
||||
FMOVD 0(R2), F1
|
||||
FMADD F3, F2, F1, F1
|
||||
FMUL F0, F2
|
||||
WFMADB V6, V4, V1, V6
|
||||
WORD $0xA7110002 //tmll %r1,2
|
||||
FMADD F6, F2, F0, F0
|
||||
BNE L34
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L33:
|
||||
MOVD $sincosxnan<>+0(SB), R1
|
||||
FMOVD 0(R1), F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L36:
|
||||
FMUL F0, F0
|
||||
MOVD $sincosc0<>+0(SB), R1
|
||||
WFMDB V0, V0, V1
|
||||
WFMADB V0, V4, V20, V4
|
||||
WFMADB V1, V6, V16, V6
|
||||
WFMADB V0, V4, V18, V4
|
||||
WFMADB V0, V6, V5, V6
|
||||
WFMADB V1, V4, V7, V4
|
||||
FMOVD 0(R1), F2
|
||||
WFMADB V1, V4, V6, V4
|
||||
WFMADB V0, V4, V2, V0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L35:
|
||||
FNEG F0, F1
|
||||
BR L21
|
||||
L34:
|
||||
FNEG F0, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
@ -22,7 +22,9 @@ package math
|
||||
// Sinh(±0) = ±0
|
||||
// Sinh(±Inf) = ±Inf
|
||||
// Sinh(NaN) = NaN
|
||||
func Sinh(x float64) float64 {
|
||||
func Sinh(x float64) float64
|
||||
|
||||
func sinh(x float64) float64 {
|
||||
// The coefficients are #2029 from Hart & Cheney. (20.36D)
|
||||
const (
|
||||
P0 = -0.6307673640497716991184787251e+6
|
||||
@ -66,7 +68,9 @@ func Sinh(x float64) float64 {
|
||||
// Cosh(±0) = 1
|
||||
// Cosh(±Inf) = +Inf
|
||||
// Cosh(NaN) = NaN
|
||||
func Cosh(x float64) float64 {
|
||||
func Cosh(x float64) float64
|
||||
|
||||
func cosh(x float64) float64 {
|
||||
if x < 0 {
|
||||
x = -x
|
||||
}
|
||||
|
261
src/math/sinh_s390x.s
Normal file
261
src/math/sinh_s390x.s
Normal file
@ -0,0 +1,261 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Constants
|
||||
DATA sinhrodataL21<>+0(SB)/8, $0.231904681384629956E-16
|
||||
DATA sinhrodataL21<>+8(SB)/8, $0.693147180559945286E+00
|
||||
DATA sinhrodataL21<>+16(SB)/8, $704.E0
|
||||
GLOBL sinhrodataL21<>+0(SB), RODATA, $24
|
||||
DATA sinhrlog2<>+0(SB)/8, $0x3ff7154760000000
|
||||
GLOBL sinhrlog2<>+0(SB), RODATA, $8
|
||||
DATA sinhxinf<>+0(SB)/8, $0x7ff0000000000000
|
||||
GLOBL sinhxinf<>+0(SB), RODATA, $8
|
||||
DATA sinhxinit<>+0(SB)/8, $0x3ffb504f333f9de6
|
||||
GLOBL sinhxinit<>+0(SB), RODATA, $8
|
||||
DATA sinhxlim1<>+0(SB)/8, $800.E0
|
||||
GLOBL sinhxlim1<>+0(SB), RODATA, $8
|
||||
DATA sinhxadd<>+0(SB)/8, $0xc3200001610007fb
|
||||
GLOBL sinhxadd<>+0(SB), RODATA, $8
|
||||
DATA sinhx4ff<>+0(SB)/8, $0x4ff0000000000000
|
||||
GLOBL sinhx4ff<>+0(SB), RODATA, $8
|
||||
|
||||
// Minimax polynomial approximations
|
||||
DATA sinhe0<>+0(SB)/8, $0.11715728752538099300E+01
|
||||
GLOBL sinhe0<>+0(SB), RODATA, $8
|
||||
DATA sinhe1<>+0(SB)/8, $0.11715728752538099300E+01
|
||||
GLOBL sinhe1<>+0(SB), RODATA, $8
|
||||
DATA sinhe2<>+0(SB)/8, $0.58578643762688526692E+00
|
||||
GLOBL sinhe2<>+0(SB), RODATA, $8
|
||||
DATA sinhe3<>+0(SB)/8, $0.19526214587563004497E+00
|
||||
GLOBL sinhe3<>+0(SB), RODATA, $8
|
||||
DATA sinhe4<>+0(SB)/8, $0.48815536475176217404E-01
|
||||
GLOBL sinhe4<>+0(SB), RODATA, $8
|
||||
DATA sinhe5<>+0(SB)/8, $0.97631072948627397816E-02
|
||||
GLOBL sinhe5<>+0(SB), RODATA, $8
|
||||
DATA sinhe6<>+0(SB)/8, $0.16271839297756073153E-02
|
||||
GLOBL sinhe6<>+0(SB), RODATA, $8
|
||||
DATA sinhe7<>+0(SB)/8, $0.23245485387271142509E-03
|
||||
GLOBL sinhe7<>+0(SB), RODATA, $8
|
||||
DATA sinhe8<>+0(SB)/8, $0.29080955860869629131E-04
|
||||
GLOBL sinhe8<>+0(SB), RODATA, $8
|
||||
DATA sinhe9<>+0(SB)/8, $0.32311267157667725278E-05
|
||||
GLOBL sinhe9<>+0(SB), RODATA, $8
|
||||
|
||||
// Sinh returns the hyperbolic sine of the argument.
|
||||
//
|
||||
// Special cases are:
|
||||
// Sinh(±0) = ±0
|
||||
// Sinh(±Inf) = ±Inf
|
||||
// Sinh(NaN) = NaN
|
||||
// The algorithm used is minimax polynomial approximation
|
||||
// with coefficients determined with a Remez exchange algorithm.
|
||||
|
||||
TEXT ·sinhAsm(SB),NOSPLIT,$0-16
|
||||
FMOVD x+0(FP), F0
|
||||
//specail case Sinh(±0) = ±0
|
||||
FMOVD $(0.0), F1
|
||||
FCMPU F0, F1
|
||||
BEQ sinhIsZero
|
||||
//specail case Sinh(±Inf = ±Inf
|
||||
FMOVD $1.797693134862315708145274237317043567981e+308, F1
|
||||
FCMPU F1, F0
|
||||
BLEU sinhIsInf
|
||||
FMOVD $-1.797693134862315708145274237317043567981e+308, F1
|
||||
FCMPU F1, F0
|
||||
BGT sinhIsInf
|
||||
|
||||
MOVD $sinhrodataL21<>+0(SB), R5
|
||||
WORD $0xB3120000 //ltdbr %f0,%f0
|
||||
MOVD sinhxinit<>+0(SB), R1
|
||||
FMOVD F0, F4
|
||||
MOVD R1, R3
|
||||
BLTU L19
|
||||
FMOVD F0, F2
|
||||
L2:
|
||||
WORD $0xED205010 //cdb %f2,.L22-.L21(%r5)
|
||||
BYTE $0x00
|
||||
BYTE $0x19
|
||||
BGE L15 //jnl .L15
|
||||
BVS L15
|
||||
WFCEDBS V2, V2, V0
|
||||
BEQ L20
|
||||
L12:
|
||||
FMOVD F4, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L15:
|
||||
WFCEDBS V2, V2, V0
|
||||
BVS L12
|
||||
MOVD $sinhxlim1<>+0(SB), R2
|
||||
FMOVD 0(R2), F0
|
||||
WFCHDBS V0, V2, V0
|
||||
BEQ L6
|
||||
WFCHEDBS V4, V2, V6
|
||||
MOVD $sinhxinf<>+0(SB), R1
|
||||
FMOVD 0(R1), F0
|
||||
BNE LEXITTAGsinh
|
||||
WFCHDBS V2, V4, V2
|
||||
BNE L16
|
||||
FNEG F0, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L19:
|
||||
FNEG F0, F2
|
||||
BR L2
|
||||
L6:
|
||||
MOVD $sinhxadd<>+0(SB), R2
|
||||
FMOVD 0(R2), F0
|
||||
MOVD sinhrlog2<>+0(SB), R2
|
||||
WORD $0xB3C10062 //ldgr %f6,%r2
|
||||
WFMSDB V4, V6, V0, V16
|
||||
FMOVD sinhrodataL21<>+8(SB), F6
|
||||
WFADB V0, V16, V0
|
||||
FMOVD sinhrodataL21<>+0(SB), F3
|
||||
WFMSDB V0, V6, V4, V6
|
||||
MOVD $sinhe9<>+0(SB), R2
|
||||
WFMADB V0, V3, V6, V0
|
||||
FMOVD 0(R2), F1
|
||||
MOVD $sinhe7<>+0(SB), R2
|
||||
WFMDB V0, V0, V6
|
||||
FMOVD 0(R2), F5
|
||||
MOVD $sinhe8<>+0(SB), R2
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sinhe6<>+0(SB), R2
|
||||
WFMADB V6, V1, V5, V1
|
||||
FMOVD 0(R2), F5
|
||||
MOVD $sinhe5<>+0(SB), R2
|
||||
FMOVD 0(R2), F7
|
||||
MOVD $sinhe3<>+0(SB), R2
|
||||
WFMADB V6, V3, V5, V3
|
||||
FMOVD 0(R2), F5
|
||||
MOVD $sinhe4<>+0(SB), R2
|
||||
WFMADB V6, V7, V5, V7
|
||||
FMOVD 0(R2), F5
|
||||
MOVD $sinhe2<>+0(SB), R2
|
||||
VLEG $0, 0(R2), V20
|
||||
WFMDB V6, V6, V18
|
||||
WFMADB V6, V5, V20, V5
|
||||
WFMADB V1, V18, V7, V1
|
||||
FNEG F0, F0
|
||||
WFMADB V3, V18, V5, V3
|
||||
MOVD $sinhe1<>+0(SB), R3
|
||||
WFCEDBS V2, V4, V2
|
||||
FMOVD 0(R3), F5
|
||||
MOVD $sinhe0<>+0(SB), R3
|
||||
WFMADB V6, V1, V5, V1
|
||||
FMOVD 0(R3), F5
|
||||
VLGVG $0, V16, R2
|
||||
WFMADB V6, V3, V5, V6
|
||||
RLL $3, R2, R2
|
||||
WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
BEQ L9
|
||||
WFMSDB V0, V1, V6, V0
|
||||
MOVD $sinhx4ff<>+0(SB), R3
|
||||
FNEG F0, F0
|
||||
FMOVD 0(R3), F2
|
||||
FMUL F2, F0
|
||||
ANDW $0xFFFF, R2
|
||||
WORD $0xA53FEFB6 //llill %r3,61366
|
||||
SUBW R2, R3, R2
|
||||
WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WORD $0xB3C10021 //ldgr %f2,%r1
|
||||
FMUL F2, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L20:
|
||||
MOVD $sinhxadd<>+0(SB), R2
|
||||
FMOVD 0(R2), F2
|
||||
MOVD sinhrlog2<>+0(SB), R2
|
||||
WORD $0xB3C10002 //ldgr %f0,%r2
|
||||
WFMSDB V4, V0, V2, V6
|
||||
FMOVD sinhrodataL21<>+8(SB), F0
|
||||
FADD F6, F2
|
||||
MOVD $sinhe9<>+0(SB), R2
|
||||
FMSUB F0, F2, F4, F4
|
||||
FMOVD 0(R2), F1
|
||||
FMOVD sinhrodataL21<>+0(SB), F3
|
||||
MOVD $sinhe7<>+0(SB), R2
|
||||
FMADD F3, F2, F4, F4
|
||||
FMOVD 0(R2), F0
|
||||
MOVD $sinhe8<>+0(SB), R2
|
||||
WFMDB V4, V4, V2
|
||||
FMOVD 0(R2), F3
|
||||
MOVD $sinhe6<>+0(SB), R2
|
||||
FMOVD 0(R2), F5
|
||||
WORD $0xB3CD0026 //lgdr %r2,%f6
|
||||
RLL $3, R2, R2
|
||||
WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WFMADB V2, V1, V0, V1
|
||||
WORD $0xB3C10001 //ldgr %f0,%r1
|
||||
MOVD $sinhe5<>+0(SB), R1
|
||||
WFMADB V2, V3, V5, V3
|
||||
FMOVD 0(R1), F5
|
||||
MOVD $sinhe3<>+0(SB), R1
|
||||
FMOVD 0(R1), F6
|
||||
WFMDB V2, V2, V7
|
||||
WFMADB V2, V5, V6, V5
|
||||
WORD $0xA7487FB6 //lhi %r4,32694
|
||||
FNEG F4, F4
|
||||
ANDW $0xFFFF, R2
|
||||
SUBW R2, R4, R2
|
||||
WORD $0xEC32000F //risbgn %r3,%r2,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WORD $0xB3C10063 //ldgr %f6,%r3
|
||||
WFADB V0, V6, V16
|
||||
MOVD $sinhe4<>+0(SB), R1
|
||||
WFMADB V1, V7, V5, V1
|
||||
WFMDB V4, V16, V4
|
||||
FMOVD 0(R1), F5
|
||||
MOVD $sinhe2<>+0(SB), R1
|
||||
VLEG $0, 0(R1), V16
|
||||
MOVD $sinhe1<>+0(SB), R1
|
||||
WFMADB V2, V5, V16, V5
|
||||
VLEG $0, 0(R1), V16
|
||||
WFMADB V3, V7, V5, V3
|
||||
WFMADB V2, V1, V16, V1
|
||||
FSUB F6, F0
|
||||
FMUL F1, F4
|
||||
MOVD $sinhe0<>+0(SB), R1
|
||||
FMOVD 0(R1), F6
|
||||
WFMADB V2, V3, V6, V2
|
||||
WFMADB V0, V2, V4, V0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L9:
|
||||
WFMADB V0, V1, V6, V0
|
||||
MOVD $sinhx4ff<>+0(SB), R3
|
||||
FMOVD 0(R3), F2
|
||||
FMUL F2, F0
|
||||
WORD $0xA72AF000 //ahi %r2,-4096
|
||||
WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WORD $0xB3C10021 //ldgr %f2,%r1
|
||||
FMUL F2, F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L16:
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
LEXITTAGsinh:
|
||||
sinhIsInf:
|
||||
sinhIsZero:
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
17
src/math/sinh_stub.s
Normal file
17
src/math/sinh_stub.s
Normal file
@ -0,0 +1,17 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32 arm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Sinh(SB),NOSPLIT,$0
|
||||
JMP ·sinh(SB)
|
||||
|
||||
TEXT ·Cosh(SB),NOSPLIT,$0
|
||||
JMP ·cosh(SB)
|
||||
|
||||
TEXT ·Tanh(SB),NOSPLIT,$0
|
||||
JMP ·tanh(SB)
|
||||
|
@ -21,6 +21,9 @@ TEXT ·Atan(SB),NOSPLIT,$0
|
||||
TEXT ·Exp2(SB),NOSPLIT,$0
|
||||
B ·exp2(SB)
|
||||
|
||||
TEXT ·Cosh(SB),NOSPLIT,$0
|
||||
B ·cosh(SB)
|
||||
|
||||
TEXT ·Expm1(SB),NOSPLIT,$0
|
||||
B ·expm1(SB)
|
||||
|
||||
@ -60,8 +63,14 @@ TEXT ·Sincos(SB),NOSPLIT,$0
|
||||
TEXT ·Sin(SB),NOSPLIT,$0
|
||||
B ·sin(SB)
|
||||
|
||||
TEXT ·Sinh(SB),NOSPLIT,$0
|
||||
B ·sinh(SB)
|
||||
|
||||
TEXT ·Cos(SB),NOSPLIT,$0
|
||||
B ·cos(SB)
|
||||
|
||||
TEXT ·Tan(SB),NOSPLIT,$0
|
||||
B ·tan(SB)
|
||||
|
||||
TEXT ·Tanh(SB),NOSPLIT,$0
|
||||
B ·tanh(SB)
|
||||
|
@ -81,11 +81,20 @@ TEXT ·Sincos(SB),NOSPLIT,$0
|
||||
TEXT ·Sin(SB),NOSPLIT,$0
|
||||
JMP ·sin(SB)
|
||||
|
||||
TEXT ·Sinh(SB),NOSPLIT,$0
|
||||
JMP ·sinh(SB)
|
||||
|
||||
TEXT ·Cos(SB),NOSPLIT,$0
|
||||
JMP ·cos(SB)
|
||||
|
||||
TEXT ·Cosh(SB),NOSPLIT,$0
|
||||
JMP ·cosh(SB)
|
||||
|
||||
TEXT ·Sqrt(SB),NOSPLIT,$0
|
||||
JMP ·sqrt(SB)
|
||||
|
||||
TEXT ·Tan(SB),NOSPLIT,$0
|
||||
JMP ·tan(SB)
|
||||
|
||||
TEXT ·Tanh(SB),NOSPLIT,$0
|
||||
JMP ·tanh(SB)
|
||||
|
@ -81,8 +81,18 @@ TEXT ·Sincos(SB),NOSPLIT,$0
|
||||
TEXT ·Sin(SB),NOSPLIT,$0
|
||||
JMP ·sin(SB)
|
||||
|
||||
TEXT ·Sinh(SB),NOSPLIT,$0
|
||||
JMP ·sinh(SB)
|
||||
|
||||
TEXT ·Cos(SB),NOSPLIT,$0
|
||||
JMP ·cos(SB)
|
||||
|
||||
TEXT ·Cosh(SB),NOSPLIT,$0
|
||||
JMP ·cosh(SB)
|
||||
|
||||
TEXT ·Tan(SB),NOSPLIT,$0
|
||||
JMP ·tan(SB)
|
||||
|
||||
TEXT ·Tanh(SB),NOSPLIT,$0
|
||||
JMP ·tanh(SB)
|
||||
|
||||
|
@ -72,8 +72,17 @@ TEXT ·Sincos(SB),NOSPLIT,$0
|
||||
TEXT ·Sin(SB),NOSPLIT,$0
|
||||
BR ·sin(SB)
|
||||
|
||||
TEXT ·Sinh(SB),NOSPLIT,$0
|
||||
BR ·sinh(SB)
|
||||
|
||||
TEXT ·Cos(SB),NOSPLIT,$0
|
||||
BR ·cos(SB)
|
||||
|
||||
TEXT ·Cosh(SB),NOSPLIT,$0
|
||||
BR ·cosh(SB)
|
||||
|
||||
TEXT ·Tan(SB),NOSPLIT,$0
|
||||
BR ·tan(SB)
|
||||
|
||||
TEXT ·Tanh(SB),NOSPLIT,$0
|
||||
BR ·tanh(SB)
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "../runtime/textflag.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Asin(SB),NOSPLIT,$0
|
||||
BR ·asin(SB)
|
||||
@ -34,9 +34,6 @@ TEXT ·Hypot(SB),NOSPLIT,$0
|
||||
TEXT ·Ldexp(SB),NOSPLIT,$0
|
||||
BR ·ldexp(SB)
|
||||
|
||||
TEXT ·Log10(SB),NOSPLIT,$0
|
||||
BR ·log10(SB)
|
||||
|
||||
TEXT ·Log2(SB),NOSPLIT,$0
|
||||
BR ·log2(SB)
|
||||
|
||||
@ -58,11 +55,154 @@ TEXT ·Remainder(SB),NOSPLIT,$0
|
||||
TEXT ·Sincos(SB),NOSPLIT,$0
|
||||
BR ·sincos(SB)
|
||||
|
||||
TEXT ·Sin(SB),NOSPLIT,$0
|
||||
BR ·sin(SB)
|
||||
|
||||
TEXT ·Cos(SB),NOSPLIT,$0
|
||||
BR ·cos(SB)
|
||||
|
||||
TEXT ·Tan(SB),NOSPLIT,$0
|
||||
BR ·tan(SB)
|
||||
|
||||
//if go assembly use vector instruction
|
||||
TEXT ·hasVectorFacility(SB),NOSPLIT,$24-1
|
||||
MOVD $x-24(SP), R1
|
||||
XC $24, 0(R1), 0(R1) // clear the storage
|
||||
MOVD $2, R0 // R0 is the number of double words stored -1
|
||||
WORD $0xB2B01000 // STFLE 0(R1)
|
||||
XOR R0, R0 // reset the value of R0
|
||||
MOVBZ z-8(SP), R1
|
||||
AND $0x40, R1
|
||||
BEQ novector
|
||||
vectorinstalled:
|
||||
// check if the vector instruction has been enabled
|
||||
VLEIB $0, $0xF, V16
|
||||
VLGVB $0, V16, R1
|
||||
CMPBNE R1, $0xF, novector
|
||||
MOVB $1, ret+0(FP) // have vx
|
||||
RET
|
||||
novector:
|
||||
MOVB $0, ret+0(FP) // no vx
|
||||
RET
|
||||
|
||||
TEXT ·Log10(SB),NOSPLIT,$0
|
||||
MOVD log10vectorfacility+0x00(SB),R1
|
||||
BR (R1)
|
||||
|
||||
TEXT ·log10TrampolineSetup(SB),NOSPLIT, $0
|
||||
MOVB ·hasVX(SB), R1
|
||||
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
|
||||
MOVD $log10vectorfacility+0x00(SB), R1
|
||||
MOVD $·log10(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·log10(SB)
|
||||
vectorimpl:
|
||||
MOVD $log10vectorfacility+0x00(SB), R1
|
||||
MOVD $·log10Asm(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·log10Asm(SB)
|
||||
|
||||
GLOBL log10vectorfacility+0x00(SB), NOPTR, $8
|
||||
DATA log10vectorfacility+0x00(SB)/8, $·log10TrampolineSetup(SB)
|
||||
|
||||
|
||||
TEXT ·Cos(SB),NOSPLIT,$0
|
||||
MOVD cosvectorfacility+0x00(SB),R1
|
||||
BR (R1)
|
||||
|
||||
TEXT ·cosTrampolineSetup(SB),NOSPLIT, $0
|
||||
MOVB ·hasVX(SB), R1
|
||||
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
|
||||
MOVD $cosvectorfacility+0x00(SB), R1
|
||||
MOVD $·cos(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·cos(SB)
|
||||
vectorimpl:
|
||||
MOVD $cosvectorfacility+0x00(SB), R1
|
||||
MOVD $·cosAsm(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·cosAsm(SB)
|
||||
|
||||
GLOBL cosvectorfacility+0x00(SB), NOPTR, $8
|
||||
DATA cosvectorfacility+0x00(SB)/8, $·cosTrampolineSetup(SB)
|
||||
|
||||
|
||||
TEXT ·Cosh(SB),NOSPLIT,$0
|
||||
MOVD coshvectorfacility+0x00(SB),R1
|
||||
BR (R1)
|
||||
|
||||
TEXT ·coshTrampolineSetup(SB),NOSPLIT, $0
|
||||
MOVB ·hasVX(SB), R1
|
||||
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
|
||||
MOVD $coshvectorfacility+0x00(SB), R1
|
||||
MOVD $·cosh(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·cosh(SB)
|
||||
vectorimpl:
|
||||
MOVD $coshvectorfacility+0x00(SB), R1
|
||||
MOVD $·coshAsm(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·coshAsm(SB)
|
||||
|
||||
GLOBL coshvectorfacility+0x00(SB), NOPTR, $8
|
||||
DATA coshvectorfacility+0x00(SB)/8, $·coshTrampolineSetup(SB)
|
||||
|
||||
|
||||
TEXT ·Sin(SB),NOSPLIT,$0
|
||||
MOVD sinvectorfacility+0x00(SB),R1
|
||||
BR (R1)
|
||||
|
||||
TEXT ·sinTrampolineSetup(SB),NOSPLIT, $0
|
||||
MOVB ·hasVX(SB), R1
|
||||
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
|
||||
MOVD $sinvectorfacility+0x00(SB), R1
|
||||
MOVD $·sin(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·sin(SB)
|
||||
vectorimpl:
|
||||
MOVD $sinvectorfacility+0x00(SB), R1
|
||||
MOVD $·sinAsm(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·sinAsm(SB)
|
||||
|
||||
GLOBL sinvectorfacility+0x00(SB), NOPTR, $8
|
||||
DATA sinvectorfacility+0x00(SB)/8, $·sinTrampolineSetup(SB)
|
||||
|
||||
|
||||
TEXT ·Sinh(SB),NOSPLIT,$0
|
||||
MOVD sinhvectorfacility+0x00(SB),R1
|
||||
BR (R1)
|
||||
|
||||
TEXT ·sinhTrampolineSetup(SB),NOSPLIT, $0
|
||||
MOVB ·hasVX(SB), R1
|
||||
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
|
||||
MOVD $sinhvectorfacility+0x00(SB), R1
|
||||
MOVD $·sinh(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·sinh(SB)
|
||||
vectorimpl:
|
||||
MOVD $sinhvectorfacility+0x00(SB), R1
|
||||
MOVD $·sinhAsm(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·sinhAsm(SB)
|
||||
|
||||
GLOBL sinhvectorfacility+0x00(SB), NOPTR, $8
|
||||
DATA sinhvectorfacility+0x00(SB)/8, $·sinhTrampolineSetup(SB)
|
||||
|
||||
|
||||
|
||||
TEXT ·Tanh(SB),NOSPLIT,$0
|
||||
MOVD tanhvectorfacility+0x00(SB),R1
|
||||
BR (R1)
|
||||
|
||||
TEXT ·tanhTrampolineSetup(SB),NOSPLIT, $0
|
||||
MOVB ·hasVX(SB), R1
|
||||
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
|
||||
MOVD $tanhvectorfacility+0x00(SB), R1
|
||||
MOVD $·tanh(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·tanh(SB)
|
||||
vectorimpl:
|
||||
MOVD $tanhvectorfacility+0x00(SB), R1
|
||||
MOVD $·tanhAsm(SB), R2
|
||||
MOVD R2, 0(R1)
|
||||
BR ·tanhAsm(SB)
|
||||
|
||||
GLOBL tanhvectorfacility+0x00(SB), NOPTR, $8
|
||||
DATA tanhvectorfacility+0x00(SB)/8, $·tanhTrampolineSetup(SB)
|
||||
|
||||
|
||||
|
@ -71,7 +71,9 @@ var tanhQ = [...]float64{
|
||||
// Tanh(±0) = ±0
|
||||
// Tanh(±Inf) = ±1
|
||||
// Tanh(NaN) = NaN
|
||||
func Tanh(x float64) float64 {
|
||||
func Tanh(x float64) float64
|
||||
|
||||
func tanh(x float64) float64 {
|
||||
const MAXLOG = 8.8029691931113054295988e+01 // log(2**127)
|
||||
z := Abs(x)
|
||||
switch {
|
||||
|
173
src/math/tanh_s390x.s
Normal file
173
src/math/tanh_s390x.s
Normal file
@ -0,0 +1,173 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Minimax polynomial approximations
|
||||
DATA tanhrodataL18<>+0(SB)/8, $-1.0
|
||||
DATA tanhrodataL18<>+8(SB)/8, $-2.0
|
||||
DATA tanhrodataL18<>+16(SB)/8, $1.0
|
||||
DATA tanhrodataL18<>+24(SB)/8, $2.0
|
||||
DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01
|
||||
DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01
|
||||
DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00
|
||||
DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00
|
||||
DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01
|
||||
DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01
|
||||
DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00
|
||||
DATA tanhrodataL18<>+88(SB)/8, $20.E0
|
||||
GLOBL tanhrodataL18<>+0(SB), RODATA, $96
|
||||
|
||||
// Constants
|
||||
DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000
|
||||
GLOBL tanhrlog2<>+0(SB), RODATA, $8
|
||||
DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0
|
||||
GLOBL tanhxadd<>+0(SB), RODATA, $8
|
||||
DATA tanhxmone<>+0(SB)/8, $-1.0
|
||||
GLOBL tanhxmone<>+0(SB), RODATA, $8
|
||||
DATA tanhxzero<>+0(SB)/8, $0
|
||||
GLOBL tanhxzero<>+0(SB), RODATA, $8
|
||||
|
||||
// Polynomial coefficients
|
||||
DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00
|
||||
DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01
|
||||
DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01
|
||||
DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01
|
||||
DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01
|
||||
DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01
|
||||
DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01
|
||||
DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01
|
||||
DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01
|
||||
DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01
|
||||
DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01
|
||||
DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01
|
||||
DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01
|
||||
DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01
|
||||
DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01
|
||||
DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01
|
||||
GLOBL tanhtab<>+0(SB), RODATA, $128
|
||||
|
||||
// Tanh returns the hyperbolic tangent of the argument.
|
||||
//
|
||||
// Special cases are:
|
||||
// Tanh(±0) = ±0
|
||||
// Tanh(±Inf) = ±1
|
||||
// Tanh(NaN) = NaN
|
||||
// The algorithm used is minimax polynomial approximation using a table of
|
||||
// polynomial coefficients determined with a Remez exchange algorithm.
|
||||
|
||||
TEXT ·tanhAsm(SB),NOSPLIT,$0-16
|
||||
FMOVD x+0(FP), F0
|
||||
//specail case Tanh(±0) = ±0
|
||||
FMOVD $(0.0), F1
|
||||
FCMPU F0, F1
|
||||
BEQ tanhIsZero
|
||||
MOVD $tanhrodataL18<>+0(SB), R5
|
||||
WORD $0xB3120000 //ltdbr %f0,%f0
|
||||
MOVD $0x4034000000000000, R1
|
||||
BLTU L15
|
||||
FMOVD F0, F1
|
||||
L2:
|
||||
MOVD $tanhxadd<>+0(SB), R2
|
||||
FMOVD 0(R2), F2
|
||||
MOVD tanhrlog2<>+0(SB), R2
|
||||
WORD $0xB3C10042 //ldgr %f4,%r2
|
||||
WFMSDB V0, V4, V2, V4
|
||||
MOVD $tanhtab<>+0(SB), R3
|
||||
WORD $0xB3CD0024 //lgdr %r2,%f4
|
||||
WORD $0xEC4239BC //risbg %r4,%r2,57,128+60,3
|
||||
BYTE $0x03
|
||||
BYTE $0x55
|
||||
WORD $0xED105058 //cdb %f1,.L19-.L18(%r5)
|
||||
BYTE $0x00
|
||||
BYTE $0x19
|
||||
WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
|
||||
BYTE $0x30
|
||||
BYTE $0x59
|
||||
WORD $0x68543000 //ld %f5,0(%r4,%r3)
|
||||
WORD $0xB3C10061 //ldgr %f6,%r1
|
||||
BLT L3
|
||||
MOVD $tanhxzero<>+0(SB), R1
|
||||
FMOVD 0(R1), F2
|
||||
WFCHDBS V0, V2, V4
|
||||
BEQ L9
|
||||
WFCHDBS V2, V0, V2
|
||||
BNE L1
|
||||
MOVD $tanhxmone<>+0(SB), R1
|
||||
FMOVD 0(R1), F0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L3:
|
||||
FADD F4, F2
|
||||
FMOVD tanhrodataL18<>+80(SB), F4
|
||||
FMADD F4, F2, F0, F0
|
||||
FMOVD tanhrodataL18<>+72(SB), F1
|
||||
WFMDB V0, V0, V3
|
||||
FMOVD tanhrodataL18<>+64(SB), F2
|
||||
WFMADB V0, V1, V2, V1
|
||||
FMOVD tanhrodataL18<>+56(SB), F4
|
||||
FMOVD tanhrodataL18<>+48(SB), F2
|
||||
WFMADB V1, V3, V4, V1
|
||||
FMOVD tanhrodataL18<>+40(SB), F4
|
||||
WFMADB V3, V2, V4, V2
|
||||
FMOVD tanhrodataL18<>+32(SB), F4
|
||||
WORD $0xB9270022 //lhr %r2,%r2
|
||||
WFMADB V3, V1, V4, V1
|
||||
FMOVD tanhrodataL18<>+24(SB), F4
|
||||
WFMADB V3, V2, V4, V3
|
||||
WFMADB V0, V5, V0, V2
|
||||
WFMADB V0, V1, V3, V0
|
||||
WORD $0xA7183ECF //lhi %r1,16079
|
||||
WFMADB V0, V2, V5, V2
|
||||
FMUL F6, F2
|
||||
MOVW R2, R10
|
||||
MOVW R1, R11
|
||||
CMPBLE R10, R11, L16
|
||||
FMOVD F6, F0
|
||||
WORD $0xED005010 //adb %f0,.L28-.L18(%r5)
|
||||
BYTE $0x00
|
||||
BYTE $0x1A
|
||||
WORD $0xA7184330 //lhi %r1,17200
|
||||
FADD F2, F0
|
||||
MOVW R2, R10
|
||||
MOVW R1, R11
|
||||
CMPBGT R10, R11, L17
|
||||
WORD $0xED605010 //sdb %f6,.L28-.L18(%r5)
|
||||
BYTE $0x00
|
||||
BYTE $0x1B
|
||||
FADD F6, F2
|
||||
WFDDB V0, V2, V0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L9:
|
||||
FMOVD tanhrodataL18<>+16(SB), F0
|
||||
L1:
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L15:
|
||||
FNEG F0, F1
|
||||
BR L2
|
||||
L16:
|
||||
FADD F6, F2
|
||||
FMOVD tanhrodataL18<>+8(SB), F0
|
||||
FMADD F4, F2, F0, F0
|
||||
FMOVD tanhrodataL18<>+0(SB), F4
|
||||
FNEG F0, F0
|
||||
WFMADB V0, V2, V4, V0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
L17:
|
||||
WFDDB V0, V4, V0
|
||||
FMOVD tanhrodataL18<>+16(SB), F2
|
||||
WFSDB V0, V2, V0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
||||
|
||||
tanhIsZero: //return ±0
|
||||
FMOVD F0, ret+8(FP)
|
||||
RET
|
Loading…
Reference in New Issue
Block a user