From 732f6fa9d552c643b6225dd56689eb653ad61473 Mon Sep 17 00:00:00 2001 From: Jake Ciolek Date: Sun, 10 Oct 2021 17:56:16 +0200 Subject: [PATCH] cmd/compile: use ANDL for small immediates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can rewrite ANDQ with an immediate fitting in 32bit with an ANDL, which is shorter to encode. Looking at Go binary itself, before the change there was: ANDL: 2337 ANDQ: 4476 After the change: ANDL: 3790 ANDQ: 3024 So we got rid of 1452 ANDQs This makes the Linux x86_64 binary 0.03% smaller. There seems to be an impact on performance. Intel Cascade Lake benchmarks (with perflock): name old time/op new time/op delta BinaryTree17-8 1.91s ± 1% 1.89s ± 1% -1.22% (p=0.000 n=21+18) Fannkuch11-8 2.34s ± 0% 2.34s ± 0% ~ (p=0.052 n=20+20) FmtFprintfEmpty-8 27.7ns ± 1% 27.4ns ± 3% ~ (p=0.497 n=21+21) FmtFprintfString-8 53.2ns ± 0% 51.5ns ± 0% -3.21% (p=0.000 n=20+19) FmtFprintfInt-8 57.3ns ± 0% 55.7ns ± 0% -2.89% (p=0.000 n=19+19) FmtFprintfIntInt-8 92.3ns ± 0% 88.4ns ± 1% -4.23% (p=0.000 n=20+21) FmtFprintfPrefixedInt-8 103ns ± 0% 103ns ± 0% +0.23% (p=0.000 n=20+21) FmtFprintfFloat-8 147ns ± 0% 148ns ± 0% +0.75% (p=0.000 n=20+21) FmtManyArgs-8 384ns ± 0% 381ns ± 0% -0.63% (p=0.000 n=21+21) GobDecode-8 3.86ms ± 1% 3.88ms ± 1% +0.52% (p=0.000 n=20+21) GobEncode-8 2.77ms ± 1% 2.77ms ± 0% ~ (p=0.078 n=21+21) Gzip-8 168ms ± 1% 168ms ± 0% +0.24% (p=0.000 n=20+20) Gunzip-8 25.1ms ± 0% 24.3ms ± 0% -3.03% (p=0.000 n=21+21) HTTPClientServer-8 61.4µs ± 8% 59.1µs ±10% ~ (p=0.088 n=20+21) JSONEncode-8 6.86ms ± 0% 6.70ms ± 0% -2.29% (p=0.000 n=20+19) JSONDecode-8 30.8ms ± 1% 30.6ms ± 1% -0.82% (p=0.000 n=20+20) Mandelbrot200-8 3.85ms ± 0% 3.85ms ± 0% ~ (p=0.191 n=16+17) GoParse-8 2.61ms ± 2% 2.60ms ± 1% ~ (p=0.561 n=21+20) RegexpMatchEasy0_32-8 48.5ns ± 2% 45.9ns ± 3% -5.26% (p=0.000 n=20+21) RegexpMatchEasy0_1K-8 139ns ± 0% 139ns ± 0% +0.27% (p=0.000 n=18+20) RegexpMatchEasy1_32-8 41.3ns ± 0% 42.1ns ± 4% +1.95% (p=0.000 n=17+21) RegexpMatchEasy1_1K-8 216ns ± 2% 216ns ± 0% +0.17% (p=0.020 n=21+19) RegexpMatchMedium_32-8 790ns ± 7% 803ns ± 8% ~ (p=0.178 n=21+21) RegexpMatchMedium_1K-8 23.5µs ± 5% 23.7µs ± 5% ~ (p=0.421 n=21+21) RegexpMatchHard_32-8 1.09µs ± 1% 1.09µs ± 1% -0.53% (p=0.000 n=19+18) RegexpMatchHard_1K-8 33.0µs ± 0% 33.0µs ± 0% ~ (p=0.610 n=21+20) Revcomp-8 348ms ± 0% 353ms ± 0% +1.38% (p=0.000 n=17+18) Template-8 42.0ms ± 1% 41.9ms ± 1% -0.30% (p=0.049 n=20+20) TimeParse-8 185ns ± 0% 185ns ± 0% ~ (p=0.387 n=20+18) TimeFormat-8 237ns ± 1% 241ns ± 1% +1.57% (p=0.000 n=21+21) [Geo mean] 35.4µs 35.2µs -0.66% name old speed new speed delta GobDecode-8 199MB/s ± 1% 198MB/s ± 1% -0.52% (p=0.000 n=20+21) GobEncode-8 277MB/s ± 1% 277MB/s ± 0% ~ (p=0.075 n=21+21) Gzip-8 116MB/s ± 1% 115MB/s ± 0% -0.25% (p=0.000 n=20+20) Gunzip-8 773MB/s ± 0% 797MB/s ± 0% +3.12% (p=0.000 n=21+21) JSONEncode-8 283MB/s ± 0% 290MB/s ± 0% +2.35% (p=0.000 n=20+19) JSONDecode-8 63.0MB/s ± 1% 63.5MB/s ± 1% +0.82% (p=0.000 n=20+20) GoParse-8 22.2MB/s ± 2% 22.3MB/s ± 1% ~ (p=0.539 n=21+20) RegexpMatchEasy0_32-8 660MB/s ± 2% 697MB/s ± 3% +5.57% (p=0.000 n=20+21) RegexpMatchEasy0_1K-8 7.36GB/s ± 0% 7.34GB/s ± 0% -0.26% (p=0.000 n=18+20) RegexpMatchEasy1_32-8 775MB/s ± 0% 761MB/s ± 4% -1.88% (p=0.000 n=17+21) RegexpMatchEasy1_1K-8 4.74GB/s ± 2% 4.74GB/s ± 0% -0.18% (p=0.020 n=21+19) RegexpMatchMedium_32-8 40.6MB/s ± 7% 39.9MB/s ± 9% ~ (p=0.191 n=21+21) RegexpMatchMedium_1K-8 43.7MB/s ± 5% 43.2MB/s ± 5% ~ (p=0.435 n=21+21) RegexpMatchHard_32-8 29.3MB/s ± 1% 29.4MB/s ± 1% +0.53% (p=0.000 n=19+18) RegexpMatchHard_1K-8 31.0MB/s ± 0% 31.0MB/s ± 0% ~ (p=0.572 n=21+20) Revcomp-8 730MB/s ± 0% 720MB/s ± 0% -1.36% (p=0.000 n=17+18) Template-8 46.2MB/s ± 1% 46.3MB/s ± 1% +0.30% (p=0.041 n=20+20) [Geo mean] 204MB/s 205MB/s +0.30% Change-Id: Iac75d0ec184a515ce0e65e19559d5fe2e9840514 Reviewed-on: https://go-review.googlesource.com/c/go/+/354970 Reviewed-by: Josh Bleecher Snyder Trust: Josh Bleecher Snyder Trust: Keith Randall Run-TryBot: Josh Bleecher Snyder TryBot-Result: Go Bot --- src/cmd/compile/internal/amd64/ssa.go | 15 ++++++++++++++- test/codegen/arithmetic.go | 8 ++++---- test/codegen/bits.go | 2 +- test/codegen/bool.go | 2 +- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 78822098a78..0e745744224 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -618,8 +618,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = r p.SetFrom3Reg(v.Args[0].Reg()) + case ssa.OpAMD64ANDQconst: + asm := v.Op.Asm() + // If the constant is positive and fits into 32 bits, use ANDL. + // This saves a few bytes of encoding. + if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { + asm = x86.AANDL + } + p := s.Prog(asm) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, - ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, + ssa.OpAMD64ANDLconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index eb95416b6aa..8b8c8ed64b0 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -241,7 +241,7 @@ func FloatDivs(a []float32) float32 { func Pow2Mods(n1 uint, n2 int) (uint, int) { // 386:"ANDL\t[$]31",-"DIVL" - // amd64:"ANDQ\t[$]31",-"DIVQ" + // amd64:"ANDL\t[$]31",-"DIVQ" // arm:"AND\t[$]31",-".*udiv" // arm64:"AND\t[$]31",-"UDIV" // ppc64:"ANDCC\t[$]31" @@ -452,7 +452,7 @@ func LenDiv2(s string) int { func LenMod1(a []int) int { // 386:"ANDL\t[$]1023" - // amd64:"ANDQ\t[$]1023" + // amd64:"ANDL\t[$]1023" // arm64:"AND\t[$]1023",-"SDIV" // arm/6:"AND",-".*udiv" // arm/7:"BFC",-".*udiv",-"AND" @@ -463,7 +463,7 @@ func LenMod1(a []int) int { func LenMod2(s string) int { // 386:"ANDL\t[$]2047" - // amd64:"ANDQ\t[$]2047" + // amd64:"ANDL\t[$]2047" // arm64:"AND\t[$]2047",-"SDIV" // arm/6:"AND",-".*udiv" // arm/7:"BFC",-".*udiv",-"AND" @@ -484,7 +484,7 @@ func CapDiv(a []int) int { func CapMod(a []int) int { // 386:"ANDL\t[$]4095" - // amd64:"ANDQ\t[$]4095" + // amd64:"ANDL\t[$]4095" // arm64:"AND\t[$]4095",-"SDIV" // arm/6:"AND",-".*udiv" // arm/7:"BFC",-".*udiv",-"AND" diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 8e973d5726e..e7826b8e658 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -332,7 +332,7 @@ func bitSetPowerOf2Test(x int) bool { } func bitSetTest(x int) bool { - // amd64:"ANDQ\t[$]9, AX" + // amd64:"ANDL\t[$]9, AX" // amd64:"CMPQ\tAX, [$]9" return x&9 == 9 } diff --git a/test/codegen/bool.go b/test/codegen/bool.go index 929b1b49b9b..a32423308e0 100644 --- a/test/codegen/bool.go +++ b/test/codegen/bool.go @@ -27,7 +27,7 @@ func convertNeq0L(x uint32, c bool) bool { } func convertNeq0Q(x uint64, c bool) bool { - // amd64:"ANDQ\t[$]1",-"SETB" + // amd64:"ANDL\t[$]1",-"SETB" b := x&1 != 0 return c && b }