mirror of
https://github.com/golang/go
synced 2024-11-19 21:34:45 -07:00
9459c03b29
This change adds a better asm implementation of addVV for ppc64x, with speedups up to nearly 3x in the best cases. benchmark old ns/op new ns/op delta BenchmarkAddVV/1-8 7.33 5.81 -20.74% BenchmarkAddVV/2-8 8.72 6.49 -25.57% BenchmarkAddVV/3-8 10.5 7.08 -32.57% BenchmarkAddVV/4-8 12.7 7.57 -40.39% BenchmarkAddVV/5-8 14.3 8.06 -43.64% BenchmarkAddVV/10-8 27.6 11.1 -59.78% BenchmarkAddVV/100-8 218 82.4 -62.20% BenchmarkAddVV/1000-8 2064 718 -65.21% BenchmarkAddVV/10000-8 20536 7153 -65.17% BenchmarkAddVV/100000-8 211004 72403 -65.69% benchmark old MB/s new MB/s speedup BenchmarkAddVV/1-8 8729.74 11006.26 1.26x BenchmarkAddVV/2-8 14683.65 19707.55 1.34x BenchmarkAddVV/3-8 18226.96 27103.63 1.49x BenchmarkAddVV/4-8 20204.50 33805.81 1.67x BenchmarkAddVV/5-8 22348.64 39694.06 1.78x BenchmarkAddVV/10-8 23212.74 57631.08 2.48x BenchmarkAddVV/100-8 29300.07 77629.53 2.65x BenchmarkAddVV/1000-8 31000.56 89094.54 2.87x BenchmarkAddVV/10000-8 31163.61 89469.16 2.87x BenchmarkAddVV/100000-8 30331.16 88393.73 2.91x It also adds the use of CTR for the loop counter in subVV, instead of manually updating the loop counter. This is slightly faster. Change-Id: Ic4b05cad384fd057972d46a5618ed5c3039d7460 Reviewed-on: https://go-review.googlesource.com/41010 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> |
||
---|---|---|
.. | ||
big | ||
bits | ||
cmplx | ||
rand | ||
abs.go | ||
acosh.go | ||
all_test.go | ||
arith_s390x_test.go | ||
arith_s390x.go | ||
asin_386.s | ||
asin_amd64.s | ||
asin_amd64p32.s | ||
asin_arm.s | ||
asin.go | ||
asinh.go | ||
atan2_386.s | ||
atan2_amd64.s | ||
atan2_amd64p32.s | ||
atan2_arm.s | ||
atan2.go | ||
atan_386.s | ||
atan_amd64.s | ||
atan_amd64p32.s | ||
atan_arm.s | ||
atan.go | ||
atanh.go | ||
bits.go | ||
cbrt.go | ||
const.go | ||
copysign.go | ||
cosh_s390x.s | ||
dim_386.s | ||
dim_amd64.s | ||
dim_amd64p32.s | ||
dim_arm64.s | ||
dim_arm.s | ||
dim_s390x.s | ||
dim.go | ||
erf.go | ||
exp2_386.s | ||
exp2_amd64.s | ||
exp2_amd64p32.s | ||
exp2_arm.s | ||
exp_386.s | ||
exp_amd64.s | ||
exp_amd64p32.s | ||
exp_arm.s | ||
exp.go | ||
expm1_386.s | ||
expm1_amd64.s | ||
expm1_amd64p32.s | ||
expm1_arm.s | ||
expm1.go | ||
export_s390x_test.go | ||
export_test.go | ||
floor_386.s | ||
floor_amd64.s | ||
floor_amd64p32.s | ||
floor_arm64.s | ||
floor_arm.s | ||
floor_asm.go | ||
floor_ppc64x.s | ||
floor_s390x.s | ||
floor.go | ||
frexp_386.s | ||
frexp_amd64.s | ||
frexp_amd64p32.s | ||
frexp_arm.s | ||
frexp.go | ||
gamma.go | ||
hypot_386.s | ||
hypot_amd64.s | ||
hypot_amd64p32.s | ||
hypot_arm.s | ||
hypot.go | ||
j0.go | ||
j1.go | ||
jn.go | ||
ldexp_386.s | ||
ldexp_amd64.s | ||
ldexp_amd64p32.s | ||
ldexp_arm.s | ||
ldexp.go | ||
lgamma.go | ||
log1p_386.s | ||
log1p_amd64.s | ||
log1p_amd64p32.s | ||
log1p_arm.s | ||
log1p.go | ||
log10_386.s | ||
log10_amd64.s | ||
log10_amd64p32.s | ||
log10_arm.s | ||
log10_s390x.s | ||
log10.go | ||
log_386.s | ||
log_amd64.s | ||
log_amd64p32.s | ||
log_arm.s | ||
log.go | ||
logb.go | ||
mod_386.s | ||
mod_amd64.s | ||
mod_amd64p32.s | ||
mod_arm.s | ||
mod.go | ||
modf_386.s | ||
modf_amd64.s | ||
modf_amd64p32.s | ||
modf_arm64.s | ||
modf_arm.s | ||
modf.go | ||
nextafter.go | ||
pow10.go | ||
pow.go | ||
remainder_386.s | ||
remainder_amd64.s | ||
remainder_amd64p32.s | ||
remainder_arm.s | ||
remainder.go | ||
signbit.go | ||
sin_386.s | ||
sin_amd64.s | ||
sin_amd64p32.s | ||
sin_arm.s | ||
sin_s390x.s | ||
sin.go | ||
sincos_386.go | ||
sincos_386.s | ||
sincos.go | ||
sinh_s390x.s | ||
sinh_stub.s | ||
sinh.go | ||
sqrt_386.s | ||
sqrt_amd64.s | ||
sqrt_amd64p32.s | ||
sqrt_arm64.s | ||
sqrt_arm.s | ||
sqrt_mipsx.s | ||
sqrt_ppc64x.s | ||
sqrt_s390x.s | ||
sqrt.go | ||
stubs_arm64.s | ||
stubs_mips64x.s | ||
stubs_mipsx.s | ||
stubs_ppc64x.s | ||
stubs_s390x.s | ||
tan_386.s | ||
tan_amd64.s | ||
tan_amd64p32.s | ||
tan_arm.s | ||
tan.go | ||
tanh_s390x.s | ||
tanh.go | ||
unsafe.go |