2018-03-03 11:17:20 -07:00
|
|
|
// asmcheck
|
|
|
|
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package codegen
|
|
|
|
|
|
|
|
import "math"
|
|
|
|
|
|
|
|
var sink64 [8]float64
|
|
|
|
|
|
|
|
func approx(x float64) {
|
2021-09-15 01:31:05 -06:00
|
|
|
// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
|
|
|
|
// amd64:"ROUNDSD\t[$]2"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"FIDBR\t[$]6"
|
|
|
|
// arm64:"FRINTPD"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FRIP"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Ceil"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[0] = math.Ceil(x)
|
|
|
|
|
2021-09-15 01:31:05 -06:00
|
|
|
// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
|
|
|
|
// amd64:"ROUNDSD\t[$]1"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"FIDBR\t[$]7"
|
|
|
|
// arm64:"FRINTMD"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FRIM"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Floor"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[1] = math.Floor(x)
|
|
|
|
|
|
|
|
// s390x:"FIDBR\t[$]1"
|
|
|
|
// arm64:"FRINTAD"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FRIN"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[2] = math.Round(x)
|
|
|
|
|
2021-09-15 01:31:05 -06:00
|
|
|
// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
|
|
|
|
// amd64:"ROUNDSD\t[$]3"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"FIDBR\t[$]5"
|
|
|
|
// arm64:"FRINTZD"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FRIZ"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Trunc"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[3] = math.Trunc(x)
|
|
|
|
|
2021-09-15 01:31:05 -06:00
|
|
|
// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
|
|
|
|
// amd64:"ROUNDSD\t[$]0"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"FIDBR\t[$]4"
|
2018-05-22 00:58:32 -06:00
|
|
|
// arm64:"FRINTND"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Nearest"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[4] = math.RoundToEven(x)
|
|
|
|
}
|
|
|
|
|
|
|
|
func sqrt(x float64) float64 {
|
|
|
|
// amd64:"SQRTSD"
|
2020-10-06 15:42:15 -06:00
|
|
|
// 386/sse2:"SQRTSD" 386/softfloat:-"SQRTD"
|
2018-03-03 11:17:20 -07:00
|
|
|
// arm64:"FSQRTD"
|
2018-04-15 11:00:27 -06:00
|
|
|
// arm/7:"SQRTD"
|
2018-04-26 07:37:27 -06:00
|
|
|
// mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD"
|
|
|
|
// mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Sqrt"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FSQRT"
|
2018-03-03 11:17:20 -07:00
|
|
|
return math.Sqrt(x)
|
|
|
|
}
|
|
|
|
|
2020-12-07 04:15:15 -07:00
|
|
|
func sqrt32(x float32) float32 {
|
|
|
|
// amd64:"SQRTSS"
|
|
|
|
// 386/sse2:"SQRTSS" 386/softfloat:-"SQRTS"
|
|
|
|
// arm64:"FSQRTS"
|
|
|
|
// arm/7:"SQRTF"
|
|
|
|
// mips/hardfloat:"SQRTF" mips/softfloat:-"SQRTF"
|
|
|
|
// mips64/hardfloat:"SQRTF" mips64/softfloat:-"SQRTF"
|
|
|
|
// wasm:"F32Sqrt"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FSQRTS"
|
2020-12-07 04:15:15 -07:00
|
|
|
return float32(math.Sqrt(float64(x)))
|
|
|
|
}
|
|
|
|
|
2018-03-03 11:17:20 -07:00
|
|
|
// Check that it's using integer registers
|
|
|
|
func abs(x, y float64) {
|
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
if t.wall&hasMonotonic != 0 {
0x1073465 488b08 MOVQ 0(AX), CX
0x1073468 4889ca MOVQ CX, DX
0x107346b 48c1e93f SHRQ $0x3f, CX
0x107346f 48c1e13f SHLQ $0x3f, CX
0x1073473 48f7c1ffffffff TESTQ $-0x1, CX
0x107347a 746b JE 0x10734e7
if t.wall&hasMonotonic != 0 {
0x1073435 488b08 MOVQ 0(AX), CX
0x1073438 480fbae13f BTQ $0x3f, CX
0x107343d 7363 JAE 0x10734a2
Another example:
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX
0x10734cf 48c1e61e SHLQ $0x1e, SI
0x10734d3 4809ce ORQ CX, SI
0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX
0x10734e0 4809f1 ORQ SI, CX
0x10734e3 488908 MOVQ CX, 0(AX)
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX
0x1073492 48c1e61e SHLQ $0x1e, SI
0x1073496 4809f2 ORQ SI, DX
0x1073499 480fbaea3f BTSQ $0x3f, DX
0x107349e 488910 MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name old time/op new time/op delta
BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9)
Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9)
FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8)
FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10)
FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10)
FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10)
FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9)
FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10)
GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10)
Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10)
HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9)
JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10)
Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10)
GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10)
RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10)
RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9)
RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9)
RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9)
Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10)
Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10)
TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10)
TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10)
name old speed new speed delta
GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10)
Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10)
JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10)
GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10)
RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10)
RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9)
RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9)
RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9)
Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10)
Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-02-17 05:54:03 -07:00
|
|
|
// amd64:"BTRQ\t[$]63"
|
2018-05-22 00:58:32 -06:00
|
|
|
// arm64:"FABSD\t"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"LPDFR\t",-"MOVD\t" (no integer load/store)
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FABS\t"
|
2021-09-09 16:47:14 -06:00
|
|
|
// riscv64:"FABSD\t"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Abs"
|
2019-08-01 20:41:59 -06:00
|
|
|
// arm/6:"ABSD\t"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[0] = math.Abs(x)
|
|
|
|
|
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
if t.wall&hasMonotonic != 0 {
0x1073465 488b08 MOVQ 0(AX), CX
0x1073468 4889ca MOVQ CX, DX
0x107346b 48c1e93f SHRQ $0x3f, CX
0x107346f 48c1e13f SHLQ $0x3f, CX
0x1073473 48f7c1ffffffff TESTQ $-0x1, CX
0x107347a 746b JE 0x10734e7
if t.wall&hasMonotonic != 0 {
0x1073435 488b08 MOVQ 0(AX), CX
0x1073438 480fbae13f BTQ $0x3f, CX
0x107343d 7363 JAE 0x10734a2
Another example:
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX
0x10734cf 48c1e61e SHLQ $0x1e, SI
0x10734d3 4809ce ORQ CX, SI
0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX
0x10734e0 4809f1 ORQ SI, CX
0x10734e3 488908 MOVQ CX, 0(AX)
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX
0x1073492 48c1e61e SHLQ $0x1e, SI
0x1073496 4809f2 ORQ SI, DX
0x1073499 480fbaea3f BTSQ $0x3f, DX
0x107349e 488910 MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name old time/op new time/op delta
BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9)
Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9)
FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8)
FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10)
FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10)
FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10)
FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9)
FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10)
GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10)
Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10)
HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9)
JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10)
Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10)
GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10)
RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10)
RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9)
RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9)
RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9)
Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10)
Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10)
TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10)
TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10)
name old speed new speed delta
GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10)
Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10)
JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10)
GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10)
RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10)
RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9)
RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9)
RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9)
Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10)
Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-02-17 05:54:03 -07:00
|
|
|
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FNABS\t"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[1] = -math.Abs(y)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that it's using integer registers
|
|
|
|
func abs32(x float32) float32 {
|
|
|
|
// s390x:"LPDFR",-"LDEBR",-"LEDBR" (no float64 conversion)
|
|
|
|
return float32(math.Abs(float64(x)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that it's using integer registers
|
|
|
|
func copysign(a, b, c float64) {
|
2019-09-09 09:50:35 -06:00
|
|
|
// amd64:"BTRQ\t[$]63","ANDQ","ORQ"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"CPSDR",-"MOVD" (no integer load/store)
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FCPSGN"
|
2021-09-09 16:47:14 -06:00
|
|
|
// riscv64:"FSGNJD"
|
2019-03-04 17:56:17 -07:00
|
|
|
// wasm:"F64Copysign"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[0] = math.Copysign(a, b)
|
|
|
|
|
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
if t.wall&hasMonotonic != 0 {
0x1073465 488b08 MOVQ 0(AX), CX
0x1073468 4889ca MOVQ CX, DX
0x107346b 48c1e93f SHRQ $0x3f, CX
0x107346f 48c1e13f SHLQ $0x3f, CX
0x1073473 48f7c1ffffffff TESTQ $-0x1, CX
0x107347a 746b JE 0x10734e7
if t.wall&hasMonotonic != 0 {
0x1073435 488b08 MOVQ 0(AX), CX
0x1073438 480fbae13f BTQ $0x3f, CX
0x107343d 7363 JAE 0x10734a2
Another example:
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX
0x10734cf 48c1e61e SHLQ $0x1e, SI
0x10734d3 4809ce ORQ CX, SI
0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX
0x10734e0 4809f1 ORQ SI, CX
0x10734e3 488908 MOVQ CX, 0(AX)
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX
0x1073492 48c1e61e SHLQ $0x1e, SI
0x1073496 4809f2 ORQ SI, DX
0x1073499 480fbaea3f BTSQ $0x3f, DX
0x107349e 488910 MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name old time/op new time/op delta
BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9)
Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9)
FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8)
FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10)
FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10)
FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10)
FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9)
FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10)
GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10)
Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10)
HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9)
JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10)
Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10)
GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10)
RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10)
RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9)
RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9)
RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9)
Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10)
Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10)
TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10)
TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10)
name old speed new speed delta
GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10)
Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10)
JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10)
GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10)
RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10)
RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9)
RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9)
RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9)
Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10)
Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-02-17 05:54:03 -07:00
|
|
|
// amd64:"BTSQ\t[$]63"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FCPSGN"
|
2021-09-09 16:47:14 -06:00
|
|
|
// riscv64:"FSGNJD"
|
2018-09-11 19:43:09 -06:00
|
|
|
// arm64:"ORR", -"AND"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[1] = math.Copysign(c, -1)
|
|
|
|
|
|
|
|
// Like math.Copysign(c, -1), but with integer operations. Useful
|
|
|
|
// for platforms that have a copysign opcode to see if it's detected.
|
|
|
|
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
|
|
|
|
sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63)
|
|
|
|
|
2019-09-09 09:50:35 -06:00
|
|
|
// amd64:"ANDQ","ORQ"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"CPSDR\t",-"MOVD\t" (no integer load/store)
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FCPSGN"
|
2021-09-09 16:47:14 -06:00
|
|
|
// riscv64:"FSGNJD"
|
2018-03-03 11:17:20 -07:00
|
|
|
sink64[3] = math.Copysign(-1, c)
|
|
|
|
}
|
|
|
|
|
2018-08-29 18:57:33 -06:00
|
|
|
func fma(x, y, z float64) float64 {
|
2021-09-15 01:31:05 -06:00
|
|
|
// amd64/v3:-".*x86HasFMA"
|
2018-09-25 01:10:33 -06:00
|
|
|
// amd64:"VFMADD231SD"
|
2018-10-15 01:14:57 -06:00
|
|
|
// arm/6:"FMULAD"
|
2018-08-29 18:57:33 -06:00
|
|
|
// arm64:"FMADDD"
|
|
|
|
// s390x:"FMADD"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FMADD"
|
2021-02-17 08:00:34 -07:00
|
|
|
// riscv64:"FMADDD"
|
math, cmd/compile: rename Fma to FMA
This API was added for #25819, where it was discussed as math.FMA.
The commit adding it used math.Fma, presumably for consistency
with the rest of the unusual names in package math
(Sincos, Acosh, Erfcinv, Float32bits, etc).
I believe that using an idiomatic Go name is more important here
than consistency with these other names, most of which are historical
baggage from C's standard library.
Early additions like Float32frombits happened before "uppercase for export"
(so they were originally like "float32frombits") and they were not properly
reconsidered when we uppercased the symbols to export them.
That's a mistake we live with.
The names of functions we have added since then, and even a few
that were legacy, are more properly Go-cased, such as IsNaN, IsInf,
and RoundToEven, rather than Isnan, Isinf, and Roundtoeven.
And also constants like MaxFloat32.
For new API, we should keep using proper Go-cased symbols
instead of minimally-upper-cased-C symbols.
So math.FMA, not math.Fma.
This API has not yet been released, so this change does not break
the compatibility promise.
This CL also modifies cmd/compile, since the compiler knows
the name of the function. I could have stopped at changing the
string constants, but it seemed to make more sense to use a
consistent casing everywhere.
Change-Id: I0f6f3407f41e99bfa8239467345c33945088896e
Reviewed-on: https://go-review.googlesource.com/c/go/+/205317
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-11-04 17:43:45 -07:00
|
|
|
return math.FMA(x, y, z)
|
2018-08-29 18:57:33 -06:00
|
|
|
}
|
|
|
|
|
2021-02-17 08:00:34 -07:00
|
|
|
func fms(x, y, z float64) float64 {
|
|
|
|
// riscv64:"FMSUBD"
|
|
|
|
return math.FMA(x, y, -z)
|
|
|
|
}
|
|
|
|
|
|
|
|
func fnma(x, y, z float64) float64 {
|
|
|
|
// riscv64:"FNMADDD"
|
|
|
|
return math.FMA(-x, y, z)
|
|
|
|
}
|
|
|
|
|
|
|
|
func fnms(x, y, z float64) float64 {
|
|
|
|
// riscv64:"FNMSUBD"
|
|
|
|
return math.FMA(x, -y, -z)
|
|
|
|
}
|
|
|
|
|
2018-03-03 11:17:20 -07:00
|
|
|
func fromFloat64(f64 float64) uint64 {
|
|
|
|
// amd64:"MOVQ\tX.*, [^X].*"
|
2018-07-15 22:45:25 -06:00
|
|
|
// arm64:"FMOVD\tF.*, R.*"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"MFVSRD"
|
2018-03-03 11:17:20 -07:00
|
|
|
return math.Float64bits(f64+1) + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
func fromFloat32(f32 float32) uint32 {
|
|
|
|
// amd64:"MOVL\tX.*, [^X].*"
|
2018-07-15 22:45:25 -06:00
|
|
|
// arm64:"FMOVS\tF.*, R.*"
|
2018-03-03 11:17:20 -07:00
|
|
|
return math.Float32bits(f32+1) + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
func toFloat64(u64 uint64) float64 {
|
|
|
|
// amd64:"MOVQ\t[^X].*, X.*"
|
2018-07-15 22:45:25 -06:00
|
|
|
// arm64:"FMOVD\tR.*, F.*"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"MTVSRD"
|
2018-03-03 11:17:20 -07:00
|
|
|
return math.Float64frombits(u64+1) + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
func toFloat32(u32 uint32) float32 {
|
|
|
|
// amd64:"MOVL\t[^X].*, X.*"
|
2018-07-15 22:45:25 -06:00
|
|
|
// arm64:"FMOVS\tR.*, F.*"
|
2018-03-03 11:17:20 -07:00
|
|
|
return math.Float32frombits(u32+1) + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that comparisons with constants converted to float
|
|
|
|
// are evaluated at compile-time
|
|
|
|
|
|
|
|
func constantCheck64() bool {
|
2021-04-12 12:00:49 -06:00
|
|
|
// amd64:"(MOVB\t[$]0)|(XORL\t[A-Z][A-Z0-9]+, [A-Z][A-Z0-9]+)",-"FCMP",-"MOVB\t[$]1"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1,"
|
2020-03-03 10:56:20 -07:00
|
|
|
return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63))
|
2018-03-03 11:17:20 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func constantCheck32() bool {
|
2021-04-12 12:00:49 -06:00
|
|
|
// amd64:"MOV(B|L)\t[$]1",-"FCMP",-"MOV(B|L)\t[$]0"
|
2018-03-03 11:17:20 -07:00
|
|
|
// s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0,"
|
2020-03-03 10:56:20 -07:00
|
|
|
return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31))
|
2018-03-03 11:17:20 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Test that integer constants are converted to floating point constants
|
|
|
|
// at compile-time
|
|
|
|
|
|
|
|
func constantConvert32(x float32) float32 {
|
|
|
|
// amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)"
|
|
|
|
// s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
|
2018-07-15 22:45:25 -06:00
|
|
|
// arm64:"FMOVS\t[$]\\(1.0\\)"
|
2018-03-03 11:17:20 -07:00
|
|
|
if x > math.Float32frombits(0x3f800000) {
|
|
|
|
return -x
|
|
|
|
}
|
|
|
|
return x
|
|
|
|
}
|
|
|
|
|
|
|
|
func constantConvertInt32(x uint32) uint32 {
|
|
|
|
// amd64:-"MOVSS"
|
|
|
|
// s390x:-"FMOVS"
|
2023-01-25 10:53:10 -07:00
|
|
|
// ppc64x:-"FMOVS"
|
2018-07-15 22:45:25 -06:00
|
|
|
// arm64:-"FMOVS"
|
2018-03-03 11:17:20 -07:00
|
|
|
if x > math.Float32bits(1) {
|
|
|
|
return -x
|
|
|
|
}
|
|
|
|
return x
|
|
|
|
}
|
2020-03-03 10:56:20 -07:00
|
|
|
|
|
|
|
func nanGenerate64() float64 {
|
|
|
|
// Test to make sure we don't generate a NaN while constant propagating.
|
|
|
|
// See issue 36400.
|
|
|
|
zero := 0.0
|
|
|
|
// amd64:-"DIVSD"
|
|
|
|
inf := 1 / zero // +inf. We can constant propagate this one.
|
|
|
|
negone := -1.0
|
|
|
|
|
|
|
|
// amd64:"DIVSD"
|
|
|
|
z0 := zero / zero
|
|
|
|
// amd64:"MULSD"
|
|
|
|
z1 := zero * inf
|
|
|
|
// amd64:"SQRTSD"
|
|
|
|
z2 := math.Sqrt(negone)
|
|
|
|
return z0 + z1 + z2
|
|
|
|
}
|
|
|
|
|
|
|
|
func nanGenerate32() float32 {
|
|
|
|
zero := float32(0.0)
|
|
|
|
// amd64:-"DIVSS"
|
|
|
|
inf := 1 / zero // +inf. We can constant propagate this one.
|
|
|
|
|
|
|
|
// amd64:"DIVSS"
|
|
|
|
z0 := zero / zero
|
|
|
|
// amd64:"MULSS"
|
|
|
|
z1 := zero * inf
|
|
|
|
return z0 + z1
|
|
|
|
}
|