1
0
mirror of https://github.com/golang/go synced 2024-09-29 07:24:32 -06:00
go/test/codegen/math.go
Martin Möhrmann 9ec7074a94 compile: prefer an AND instead of SHR+SHL instructions
On modern 64bit CPUs a SHR, SHL or AND instruction take 1 cycle to execute.
A pair of shifts that operate on the same register will take 2 cycles
and needs to wait for the input register value to be available.

Large constants used to mask the high bits of a register with an AND
instruction can not be encoded as an immediate in the AND instruction
on amd64 and therefore need to be loaded into a register with a MOV
instruction.

However that MOV instruction is not dependent on the output register and
on many CPUs does not compete with the AND or shift instructions for
execution ports.

Using a pair of shifts to mask high bits instead of an AND to mask high
bits of a register has a shorter encoding and uses one less general
purpose register but is slower due to taking one clock cycle longer
if there is no register pressure that would make the AND variant need to
generate a spill.

For example the instructions emitted for (x & 1 << 63) before this CL are:
48c1ea3f                SHRQ $0x3f, DX
48c1e23f                SHLQ $0x3f, DX

after this CL the instructions are the same as GCC and LLVM use:
48b80000000000000080    MOVQ $0x8000000000000000, AX
4821d0                  ANDQ DX, AX

Some platforms such as arm64 already have SSA optimization rules to fuse
two shift instructions back into an AND.

Removing the general rule to rewrite AND to SHR+SHL speeds up this benchmark:

var GlobalU uint

func BenchmarkAndHighBits(b *testing.B) {
	x := uint(0)
	for i := 0; i < b.N; i++ {
		x &= 1 << 63
	}
	GlobalU = x
}

amd64/darwin on Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz:
name           old time/op  new time/op  delta
AndHighBits-4  0.61ns ± 6%  0.42ns ± 6%  -31.42%  (p=0.000 n=25+25):

Updates #33826
Updates #32781

Change-Id: I862d3587446410c447b9a7265196b57f85358633
Reviewed-on: https://go-review.googlesource.com/c/go/+/191780
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-09-09 06:49:17 +00:00

179 lines
4.4 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import "math"
var sink64 [8]float64
func approx(x float64) {
// s390x:"FIDBR\t[$]6"
// arm64:"FRINTPD"
// ppc64:"FRIP"
// ppc64le:"FRIP"
// wasm:"F64Ceil"
sink64[0] = math.Ceil(x)
// s390x:"FIDBR\t[$]7"
// arm64:"FRINTMD"
// ppc64:"FRIM"
// ppc64le:"FRIM"
// wasm:"F64Floor"
sink64[1] = math.Floor(x)
// s390x:"FIDBR\t[$]1"
// arm64:"FRINTAD"
// ppc64:"FRIN"
// ppc64le:"FRIN"
sink64[2] = math.Round(x)
// s390x:"FIDBR\t[$]5"
// arm64:"FRINTZD"
// ppc64:"FRIZ"
// ppc64le:"FRIZ"
// wasm:"F64Trunc"
sink64[3] = math.Trunc(x)
// s390x:"FIDBR\t[$]4"
// arm64:"FRINTND"
// wasm:"F64Nearest"
sink64[4] = math.RoundToEven(x)
}
func sqrt(x float64) float64 {
// amd64:"SQRTSD"
// 386/387:"FSQRT" 386/sse2:"SQRTSD"
// arm64:"FSQRTD"
// arm/7:"SQRTD"
// mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD"
// mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD"
// wasm:"F64Sqrt"
return math.Sqrt(x)
}
// Check that it's using integer registers
func abs(x, y float64) {
// amd64:"BTRQ\t[$]63"
// arm64:"FABSD\t"
// s390x:"LPDFR\t",-"MOVD\t" (no integer load/store)
// ppc64:"FABS\t"
// ppc64le:"FABS\t"
// wasm:"F64Abs"
// arm/6:"ABSD\t"
sink64[0] = math.Abs(x)
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
// ppc64:"FNABS\t"
// ppc64le:"FNABS\t"
sink64[1] = -math.Abs(y)
}
// Check that it's using integer registers
func abs32(x float32) float32 {
// s390x:"LPDFR",-"LDEBR",-"LEDBR" (no float64 conversion)
return float32(math.Abs(float64(x)))
}
// Check that it's using integer registers
func copysign(a, b, c float64) {
// amd64:"BTRQ\t[$]63","ANDQ","ORQ"
// s390x:"CPSDR",-"MOVD" (no integer load/store)
// ppc64:"FCPSGN"
// ppc64le:"FCPSGN"
// wasm:"F64Copysign"
sink64[0] = math.Copysign(a, b)
// amd64:"BTSQ\t[$]63"
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
// ppc64:"FCPSGN"
// ppc64le:"FCPSGN"
// arm64:"ORR", -"AND"
sink64[1] = math.Copysign(c, -1)
// Like math.Copysign(c, -1), but with integer operations. Useful
// for platforms that have a copysign opcode to see if it's detected.
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63)
// amd64:"ANDQ","ORQ"
// s390x:"CPSDR\t",-"MOVD\t" (no integer load/store)
// ppc64:"FCPSGN"
// ppc64le:"FCPSGN"
sink64[3] = math.Copysign(-1, c)
}
func fromFloat64(f64 float64) uint64 {
// amd64:"MOVQ\tX.*, [^X].*"
// arm64:"FMOVD\tF.*, R.*"
// ppc64:"MFVSRD"
// ppc64le:"MFVSRD"
return math.Float64bits(f64+1) + 1
}
func fromFloat32(f32 float32) uint32 {
// amd64:"MOVL\tX.*, [^X].*"
// arm64:"FMOVS\tF.*, R.*"
return math.Float32bits(f32+1) + 1
}
func toFloat64(u64 uint64) float64 {
// amd64:"MOVQ\t[^X].*, X.*"
// arm64:"FMOVD\tR.*, F.*"
// ppc64:"MTVSRD"
// ppc64le:"MTVSRD"
return math.Float64frombits(u64+1) + 1
}
func toFloat32(u32 uint32) float32 {
// amd64:"MOVL\t[^X].*, X.*"
// arm64:"FMOVS\tR.*, F.*"
return math.Float32frombits(u32+1) + 1
}
// Test that comparisons with constants converted to float
// are evaluated at compile-time
func constantCheck64() bool {
// amd64:"MOVB\t[$]0",-"FCMP",-"MOVB\t[$]1"
// s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1,"
return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63)) || math.NaN() == math.NaN()
}
func constantCheck32() bool {
// amd64:"MOVB\t[$]1",-"FCMP",-"MOVB\t[$]0"
// s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0,"
return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31)) && float32(math.NaN()) != float32(math.NaN())
}
// Test that integer constants are converted to floating point constants
// at compile-time
func constantConvert32(x float32) float32 {
// amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)"
// s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
// ppc64:"FMOVS\t[$]f32.3f800000\\(SB\\)"
// ppc64le:"FMOVS\t[$]f32.3f800000\\(SB\\)"
// arm64:"FMOVS\t[$]\\(1.0\\)"
if x > math.Float32frombits(0x3f800000) {
return -x
}
return x
}
func constantConvertInt32(x uint32) uint32 {
// amd64:-"MOVSS"
// s390x:-"FMOVS"
// ppc64:-"FMOVS"
// ppc64le:-"FMOVS"
// arm64:-"FMOVS"
if x > math.Float32bits(1) {
return -x
}
return x
}