1
0
mirror of https://github.com/golang/go synced 2024-11-18 12:24:48 -07:00
go/test/codegen/floats.go

141 lines
3.0 KiB
Go
Raw Normal View History

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
cmd/compile: optimize 386 code with FLDPI FLDPI pushes the constant pi to 387's register stack, which is more efficient than MOVSSconst/MOVSDconst. 1. This optimization reduces 0.3KB of the total size of pkg/linux_386 (exlcuding cmd/compile). 2. There is little regression in the go1 benchmark. name old time/op new time/op delta BinaryTree17-4 3.30s ± 3% 3.30s ± 2% ~ (p=0.759 n=40+39) Fannkuch11-4 3.53s ± 1% 3.54s ± 1% ~ (p=0.168 n=40+40) FmtFprintfEmpty-4 45.5ns ± 3% 45.6ns ± 3% ~ (p=0.553 n=40+40) FmtFprintfString-4 78.4ns ± 3% 78.3ns ± 3% ~ (p=0.593 n=40+40) FmtFprintfInt-4 88.8ns ± 2% 89.9ns ± 2% ~ (p=0.083 n=40+33) FmtFprintfIntInt-4 140ns ± 4% 140ns ± 4% ~ (p=0.656 n=40+40) FmtFprintfPrefixedInt-4 180ns ± 2% 181ns ± 3% +0.53% (p=0.050 n=40+40) FmtFprintfFloat-4 408ns ± 4% 411ns ± 3% ~ (p=0.112 n=40+40) FmtManyArgs-4 599ns ± 3% 602ns ± 3% ~ (p=0.784 n=40+40) GobDecode-4 7.24ms ± 6% 7.30ms ± 5% ~ (p=0.171 n=40+40) GobEncode-4 6.98ms ± 5% 6.89ms ± 8% ~ (p=0.107 n=40+40) Gzip-4 396ms ± 4% 396ms ± 3% ~ (p=0.852 n=40+40) Gunzip-4 41.3ms ± 3% 41.5ms ± 4% ~ (p=0.221 n=40+40) HTTPClientServer-4 63.4µs ± 3% 63.4µs ± 2% ~ (p=0.895 n=39+40) JSONEncode-4 17.5ms ± 2% 17.5ms ± 3% ~ (p=0.090 n=40+40) JSONDecode-4 60.6ms ± 3% 60.1ms ± 4% ~ (p=0.184 n=40+40) Mandelbrot200-4 7.80ms ± 3% 7.78ms ± 2% ~ (p=0.512 n=40+40) GoParse-4 3.30ms ± 3% 3.28ms ± 2% -0.61% (p=0.034 n=40+40) RegexpMatchEasy0_32-4 104ns ± 4% 103ns ± 4% ~ (p=0.118 n=40+40) RegexpMatchEasy0_1K-4 850ns ± 2% 848ns ± 2% ~ (p=0.370 n=40+40) RegexpMatchEasy1_32-4 112ns ± 4% 112ns ± 4% ~ (p=0.848 n=40+40) RegexpMatchEasy1_1K-4 1.04µs ± 4% 1.03µs ± 4% ~ (p=0.333 n=40+40) RegexpMatchMedium_32-4 132ns ± 4% 131ns ± 3% ~ (p=0.527 n=40+40) RegexpMatchMedium_1K-4 43.4µs ± 3% 43.5µs ± 3% ~ (p=0.111 n=40+40) RegexpMatchHard_32-4 2.24µs ± 4% 2.24µs ± 4% ~ (p=0.441 n=40+40) RegexpMatchHard_1K-4 67.9µs ± 3% 68.0µs ± 3% ~ (p=0.095 n=40+40) Revcomp-4 1.84s ± 2% 1.84s ± 2% ~ (p=0.677 n=40+40) Template-4 68.4ms ± 3% 68.6ms ± 3% ~ (p=0.345 n=40+40) TimeParse-4 433ns ± 3% 433ns ± 3% ~ (p=0.403 n=40+40) TimeFormat-4 407ns ± 3% 406ns ± 3% ~ (p=0.900 n=40+40) [Geo mean] 67.1µs 67.2µs +0.04% name old speed new speed delta GobDecode-4 106MB/s ± 5% 105MB/s ± 5% ~ (p=0.173 n=40+40) GobEncode-4 110MB/s ± 5% 112MB/s ± 9% ~ (p=0.104 n=40+40) Gzip-4 49.0MB/s ± 4% 49.1MB/s ± 4% ~ (p=0.836 n=40+40) Gunzip-4 471MB/s ± 3% 468MB/s ± 4% ~ (p=0.218 n=40+40) JSONEncode-4 111MB/s ± 2% 111MB/s ± 3% ~ (p=0.090 n=40+40) JSONDecode-4 32.0MB/s ± 3% 32.3MB/s ± 4% ~ (p=0.194 n=40+40) GoParse-4 17.6MB/s ± 3% 17.7MB/s ± 2% +0.62% (p=0.035 n=40+40) RegexpMatchEasy0_32-4 307MB/s ± 4% 309MB/s ± 4% +0.70% (p=0.041 n=40+40) RegexpMatchEasy0_1K-4 1.20GB/s ± 3% 1.21GB/s ± 2% ~ (p=0.353 n=40+40) RegexpMatchEasy1_32-4 285MB/s ± 3% 284MB/s ± 4% ~ (p=0.384 n=40+40) RegexpMatchEasy1_1K-4 988MB/s ± 4% 992MB/s ± 3% ~ (p=0.335 n=40+40) RegexpMatchMedium_32-4 7.56MB/s ± 4% 7.57MB/s ± 4% ~ (p=0.314 n=40+40) RegexpMatchMedium_1K-4 23.6MB/s ± 3% 23.6MB/s ± 3% ~ (p=0.107 n=40+40) RegexpMatchHard_32-4 14.3MB/s ± 4% 14.3MB/s ± 4% ~ (p=0.429 n=40+40) RegexpMatchHard_1K-4 15.1MB/s ± 3% 15.1MB/s ± 3% ~ (p=0.099 n=40+40) Revcomp-4 138MB/s ± 2% 138MB/s ± 2% ~ (p=0.658 n=40+40) Template-4 28.4MB/s ± 3% 28.3MB/s ± 3% ~ (p=0.331 n=40+40) [Geo mean] 80.8MB/s 80.8MB/s +0.09% Change-Id: I0cb715eead68ade097a302e7fb80ccbd1d1b511e Reviewed-on: https://go-review.googlesource.com/130975 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-08-22 20:08:15 -06:00
import "math"
// This file contains codegen tests related to arithmetic
// simplifications and optimizations on float types.
// For codegen tests on integer types, see arithmetic.go.
// --------------------- //
// Strength-reduce //
// --------------------- //
func Mul2(f float64) float64 {
// 386/sse2:"ADDSD",-"MULSD"
// 386/387:"FADDDP",-"FMULDP"
// amd64:"ADDSD",-"MULSD"
// arm/7:"ADDD",-"MULD"
// arm64:"FADDD",-"FMULD"
// ppc64:"FADD",-"FMUL"
// ppc64le:"FADD",-"FMUL"
return f * 2.0
}
func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
// 386/sse2:"MULSD",-"DIVSD"
// 386/387:"FMULDP",-"FDIVDP"
// amd64:"MULSD",-"DIVSD"
// arm/7:"MULD",-"DIVD"
// arm64:"FMULD",-"FDIVD"
// ppc64:"FMUL",-"FDIV"
// ppc64le:"FMUL",-"FDIV"
x := f1 / 16.0
// 386/sse2:"MULSD",-"DIVSD"
// 386/387:"FMULDP",-"FDIVDP"
// amd64:"MULSD",-"DIVSD"
// arm/7:"MULD",-"DIVD"
// arm64:"FMULD",-"FDIVD"
// ppc64:"FMUL",-"FDIVD"
// ppc64le:"FMUL",-"FDIVD"
y := f2 / 0.125
// 386/sse2:"ADDSD",-"DIVSD",-"MULSD"
// 386/387:"FADDDP",-"FDIVDP",-"FMULDP"
// amd64:"ADDSD",-"DIVSD",-"MULSD"
// arm/7:"ADDD",-"MULD",-"DIVD"
// arm64:"FADDD",-"FMULD",-"FDIVD"
// ppc64:"FADD",-"FMUL",-"FDIV"
// ppc64le:"FADD",-"FMUL",-"FDIV"
z := f3 / 0.5
return x, y, z
}
cmd/compile: optimize 386 code with FLDPI FLDPI pushes the constant pi to 387's register stack, which is more efficient than MOVSSconst/MOVSDconst. 1. This optimization reduces 0.3KB of the total size of pkg/linux_386 (exlcuding cmd/compile). 2. There is little regression in the go1 benchmark. name old time/op new time/op delta BinaryTree17-4 3.30s ± 3% 3.30s ± 2% ~ (p=0.759 n=40+39) Fannkuch11-4 3.53s ± 1% 3.54s ± 1% ~ (p=0.168 n=40+40) FmtFprintfEmpty-4 45.5ns ± 3% 45.6ns ± 3% ~ (p=0.553 n=40+40) FmtFprintfString-4 78.4ns ± 3% 78.3ns ± 3% ~ (p=0.593 n=40+40) FmtFprintfInt-4 88.8ns ± 2% 89.9ns ± 2% ~ (p=0.083 n=40+33) FmtFprintfIntInt-4 140ns ± 4% 140ns ± 4% ~ (p=0.656 n=40+40) FmtFprintfPrefixedInt-4 180ns ± 2% 181ns ± 3% +0.53% (p=0.050 n=40+40) FmtFprintfFloat-4 408ns ± 4% 411ns ± 3% ~ (p=0.112 n=40+40) FmtManyArgs-4 599ns ± 3% 602ns ± 3% ~ (p=0.784 n=40+40) GobDecode-4 7.24ms ± 6% 7.30ms ± 5% ~ (p=0.171 n=40+40) GobEncode-4 6.98ms ± 5% 6.89ms ± 8% ~ (p=0.107 n=40+40) Gzip-4 396ms ± 4% 396ms ± 3% ~ (p=0.852 n=40+40) Gunzip-4 41.3ms ± 3% 41.5ms ± 4% ~ (p=0.221 n=40+40) HTTPClientServer-4 63.4µs ± 3% 63.4µs ± 2% ~ (p=0.895 n=39+40) JSONEncode-4 17.5ms ± 2% 17.5ms ± 3% ~ (p=0.090 n=40+40) JSONDecode-4 60.6ms ± 3% 60.1ms ± 4% ~ (p=0.184 n=40+40) Mandelbrot200-4 7.80ms ± 3% 7.78ms ± 2% ~ (p=0.512 n=40+40) GoParse-4 3.30ms ± 3% 3.28ms ± 2% -0.61% (p=0.034 n=40+40) RegexpMatchEasy0_32-4 104ns ± 4% 103ns ± 4% ~ (p=0.118 n=40+40) RegexpMatchEasy0_1K-4 850ns ± 2% 848ns ± 2% ~ (p=0.370 n=40+40) RegexpMatchEasy1_32-4 112ns ± 4% 112ns ± 4% ~ (p=0.848 n=40+40) RegexpMatchEasy1_1K-4 1.04µs ± 4% 1.03µs ± 4% ~ (p=0.333 n=40+40) RegexpMatchMedium_32-4 132ns ± 4% 131ns ± 3% ~ (p=0.527 n=40+40) RegexpMatchMedium_1K-4 43.4µs ± 3% 43.5µs ± 3% ~ (p=0.111 n=40+40) RegexpMatchHard_32-4 2.24µs ± 4% 2.24µs ± 4% ~ (p=0.441 n=40+40) RegexpMatchHard_1K-4 67.9µs ± 3% 68.0µs ± 3% ~ (p=0.095 n=40+40) Revcomp-4 1.84s ± 2% 1.84s ± 2% ~ (p=0.677 n=40+40) Template-4 68.4ms ± 3% 68.6ms ± 3% ~ (p=0.345 n=40+40) TimeParse-4 433ns ± 3% 433ns ± 3% ~ (p=0.403 n=40+40) TimeFormat-4 407ns ± 3% 406ns ± 3% ~ (p=0.900 n=40+40) [Geo mean] 67.1µs 67.2µs +0.04% name old speed new speed delta GobDecode-4 106MB/s ± 5% 105MB/s ± 5% ~ (p=0.173 n=40+40) GobEncode-4 110MB/s ± 5% 112MB/s ± 9% ~ (p=0.104 n=40+40) Gzip-4 49.0MB/s ± 4% 49.1MB/s ± 4% ~ (p=0.836 n=40+40) Gunzip-4 471MB/s ± 3% 468MB/s ± 4% ~ (p=0.218 n=40+40) JSONEncode-4 111MB/s ± 2% 111MB/s ± 3% ~ (p=0.090 n=40+40) JSONDecode-4 32.0MB/s ± 3% 32.3MB/s ± 4% ~ (p=0.194 n=40+40) GoParse-4 17.6MB/s ± 3% 17.7MB/s ± 2% +0.62% (p=0.035 n=40+40) RegexpMatchEasy0_32-4 307MB/s ± 4% 309MB/s ± 4% +0.70% (p=0.041 n=40+40) RegexpMatchEasy0_1K-4 1.20GB/s ± 3% 1.21GB/s ± 2% ~ (p=0.353 n=40+40) RegexpMatchEasy1_32-4 285MB/s ± 3% 284MB/s ± 4% ~ (p=0.384 n=40+40) RegexpMatchEasy1_1K-4 988MB/s ± 4% 992MB/s ± 3% ~ (p=0.335 n=40+40) RegexpMatchMedium_32-4 7.56MB/s ± 4% 7.57MB/s ± 4% ~ (p=0.314 n=40+40) RegexpMatchMedium_1K-4 23.6MB/s ± 3% 23.6MB/s ± 3% ~ (p=0.107 n=40+40) RegexpMatchHard_32-4 14.3MB/s ± 4% 14.3MB/s ± 4% ~ (p=0.429 n=40+40) RegexpMatchHard_1K-4 15.1MB/s ± 3% 15.1MB/s ± 3% ~ (p=0.099 n=40+40) Revcomp-4 138MB/s ± 2% 138MB/s ± 2% ~ (p=0.658 n=40+40) Template-4 28.4MB/s ± 3% 28.3MB/s ± 3% ~ (p=0.331 n=40+40) [Geo mean] 80.8MB/s 80.8MB/s +0.09% Change-Id: I0cb715eead68ade097a302e7fb80ccbd1d1b511e Reviewed-on: https://go-review.googlesource.com/130975 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2018-08-22 20:08:15 -06:00
func getPi() float64 {
// 386/387:"FLDPI"
return math.Pi
}
cmd/compile: optimize arm64 with indexed FP load/store The FP load/store on arm64 have register indexed forms. And this CL implements this optimization. 1. The total size of pkg/android_arm64 (excluding cmd/compile) decreases about 400 bytes. 2. There is no regression in the go1 benchmark, the test case GobEncode even gets slight improvement, excluding noise. name old time/op new time/op delta BinaryTree17-4 19.0s ± 0% 19.0s ± 1% ~ (p=0.817 n=29+29) Fannkuch11-4 9.94s ± 0% 9.95s ± 0% +0.03% (p=0.010 n=24+30) FmtFprintfEmpty-4 233ns ± 0% 233ns ± 0% ~ (all equal) FmtFprintfString-4 427ns ± 0% 427ns ± 0% ~ (p=0.649 n=30+30) FmtFprintfInt-4 471ns ± 0% 471ns ± 0% ~ (all equal) FmtFprintfIntInt-4 730ns ± 0% 730ns ± 0% ~ (all equal) FmtFprintfPrefixedInt-4 889ns ± 0% 889ns ± 0% ~ (all equal) FmtFprintfFloat-4 1.21µs ± 0% 1.21µs ± 0% +0.04% (p=0.012 n=20+30) FmtManyArgs-4 2.99µs ± 0% 2.99µs ± 0% ~ (p=0.651 n=29+29) GobDecode-4 42.4ms ± 1% 42.3ms ± 1% -0.27% (p=0.001 n=29+28) GobEncode-4 37.8ms ±11% 36.0ms ± 0% -4.67% (p=0.000 n=30+26) Gzip-4 1.98s ± 1% 1.96s ± 1% -1.26% (p=0.000 n=30+30) Gunzip-4 175ms ± 0% 175ms ± 0% ~ (p=0.988 n=29+29) HTTPClientServer-4 854µs ± 5% 860µs ± 5% ~ (p=0.236 n=28+29) JSONEncode-4 88.8ms ± 0% 87.9ms ± 0% -1.00% (p=0.000 n=24+26) JSONDecode-4 390ms ± 1% 392ms ± 2% +0.48% (p=0.025 n=30+30) Mandelbrot200-4 19.5ms ± 0% 19.5ms ± 0% ~ (p=0.894 n=24+29) GoParse-4 20.3ms ± 0% 20.1ms ± 1% -0.94% (p=0.000 n=27+26) RegexpMatchEasy0_32-4 451ns ± 0% 451ns ± 0% ~ (p=0.578 n=30+30) RegexpMatchEasy0_1K-4 1.63µs ± 0% 1.63µs ± 0% ~ (p=0.298 n=30+28) RegexpMatchEasy1_32-4 431ns ± 0% 434ns ± 0% +0.67% (p=0.000 n=30+29) RegexpMatchEasy1_1K-4 2.60µs ± 0% 2.64µs ± 0% +1.36% (p=0.000 n=28+26) RegexpMatchMedium_32-4 744ns ± 0% 744ns ± 0% ~ (p=0.474 n=29+29) RegexpMatchMedium_1K-4 223µs ± 0% 223µs ± 0% -0.08% (p=0.038 n=26+30) RegexpMatchHard_32-4 12.2µs ± 0% 12.3µs ± 0% +0.27% (p=0.000 n=29+30) RegexpMatchHard_1K-4 373µs ± 0% 373µs ± 0% ~ (p=0.219 n=29+28) Revcomp-4 2.84s ± 0% 2.84s ± 0% ~ (p=0.130 n=28+28) Template-4 394ms ± 1% 392ms ± 1% -0.52% (p=0.001 n=30+30) TimeParse-4 1.93µs ± 0% 1.93µs ± 0% ~ (p=0.587 n=29+30) TimeFormat-4 2.00µs ± 0% 2.00µs ± 0% +0.07% (p=0.001 n=28+27) [Geo mean] 306µs 305µs -0.17% name old speed new speed delta GobDecode-4 18.1MB/s ± 1% 18.2MB/s ± 1% +0.27% (p=0.001 n=29+28) GobEncode-4 20.3MB/s ±10% 21.3MB/s ± 0% +4.64% (p=0.000 n=30+26) Gzip-4 9.79MB/s ± 1% 9.91MB/s ± 1% +1.28% (p=0.000 n=30+30) Gunzip-4 111MB/s ± 0% 111MB/s ± 0% ~ (p=0.988 n=29+29) JSONEncode-4 21.8MB/s ± 0% 22.1MB/s ± 0% +1.02% (p=0.000 n=24+26) JSONDecode-4 4.97MB/s ± 1% 4.95MB/s ± 2% -0.45% (p=0.031 n=30+30) GoParse-4 2.85MB/s ± 1% 2.88MB/s ± 1% +1.03% (p=0.000 n=30+26) RegexpMatchEasy0_32-4 70.9MB/s ± 0% 70.9MB/s ± 0% ~ (p=0.904 n=29+28) RegexpMatchEasy0_1K-4 627MB/s ± 0% 627MB/s ± 0% ~ (p=0.156 n=30+30) RegexpMatchEasy1_32-4 74.2MB/s ± 0% 73.7MB/s ± 0% -0.67% (p=0.000 n=30+29) RegexpMatchEasy1_1K-4 393MB/s ± 0% 388MB/s ± 0% -1.34% (p=0.000 n=28+26) RegexpMatchMedium_32-4 1.34MB/s ± 0% 1.34MB/s ± 0% ~ (all equal) RegexpMatchMedium_1K-4 4.59MB/s ± 0% 4.59MB/s ± 0% +0.07% (p=0.035 n=25+30) RegexpMatchHard_32-4 2.61MB/s ± 0% 2.61MB/s ± 0% -0.11% (p=0.002 n=28+30) RegexpMatchHard_1K-4 2.75MB/s ± 0% 2.75MB/s ± 0% +0.15% (p=0.001 n=30+24) Revcomp-4 89.4MB/s ± 0% 89.4MB/s ± 0% ~ (p=0.140 n=28+28) Template-4 4.93MB/s ± 1% 4.95MB/s ± 1% +0.51% (p=0.001 n=30+30) [Geo mean] 18.4MB/s 18.4MB/s +0.37% Change-Id: I9a6b521a971b21cfb51064e8e9b853cef8a1d071 Reviewed-on: https://go-review.googlesource.com/124636 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-07-18 03:31:35 -06:00
func indexLoad(b0 []float32, b1 float32, idx int) float32 {
// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+`
return b0[idx] * b1
}
func indexStore(b0 []float64, b1 float64, idx int) {
// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`
b0[idx] = b1
}
// ----------- //
// Fused //
// ----------- //
func FusedAdd32(x, y, z float32) float32 {
// s390x:"FMADDS\t"
// ppc64:"FMADDS\t"
// ppc64le:"FMADDS\t"
// arm64:"FMADDS"
return x*y + z
}
func FusedSub32_a(x, y, z float32) float32 {
// s390x:"FMSUBS\t"
// ppc64:"FMSUBS\t"
// ppc64le:"FMSUBS\t"
return x*y - z
}
func FusedSub32_b(x, y, z float32) float32 {
// arm64:"FMSUBS"
return z - x*y
}
func FusedAdd64(x, y, z float64) float64 {
// s390x:"FMADD\t"
// ppc64:"FMADD\t"
// ppc64le:"FMADD\t"
// arm64:"FMADDD"
return x*y + z
}
func FusedSub64_a(x, y, z float64) float64 {
// s390x:"FMSUB\t"
// ppc64:"FMSUB\t"
// ppc64le:"FMSUB\t"
return x*y - z
}
func FusedSub64_b(x, y, z float64) float64 {
// arm64:"FMSUBD"
return z - x*y
}
// ---------------- //
// Non-floats //
// ---------------- //
// We should make sure that the compiler doesn't generate floating point
// instructions for non-float operations on Plan 9, because floating point
// operations are not allowed in the note handler.
func ArrayZero() [16]byte {
// amd64:"MOVUPS"
// plan9/amd64/:-"MOVUPS"
var a [16]byte
return a
}
func ArrayCopy(a [16]byte) (b [16]byte) {
// amd64:"MOVUPS"
// plan9/amd64/:-"MOVUPS"
b = a
return
}