1
0
mirror of https://github.com/golang/go synced 2024-09-25 05:20:13 -06:00

cmd/compile: use ANDL for small immediates

We can rewrite ANDQ with an immediate fitting in 32bit with an ANDL, which is shorter to encode.

Looking at Go binary itself, before the change there was:

ANDL: 2337
ANDQ: 4476

After the change:

ANDL: 3790
ANDQ: 3024

So we got rid of 1452 ANDQs

This makes the Linux x86_64 binary 0.03% smaller.

There seems to be an impact on performance.

Intel Cascade Lake benchmarks (with perflock):

name                     old time/op    new time/op    delta
BinaryTree17-8              1.91s ± 1%     1.89s ± 1%  -1.22%  (p=0.000 n=21+18)
Fannkuch11-8                2.34s ± 0%     2.34s ± 0%    ~     (p=0.052 n=20+20)
FmtFprintfEmpty-8          27.7ns ± 1%    27.4ns ± 3%    ~     (p=0.497 n=21+21)
FmtFprintfString-8         53.2ns ± 0%    51.5ns ± 0%  -3.21%  (p=0.000 n=20+19)
FmtFprintfInt-8            57.3ns ± 0%    55.7ns ± 0%  -2.89%  (p=0.000 n=19+19)
FmtFprintfIntInt-8         92.3ns ± 0%    88.4ns ± 1%  -4.23%  (p=0.000 n=20+21)
FmtFprintfPrefixedInt-8     103ns ± 0%     103ns ± 0%  +0.23%  (p=0.000 n=20+21)
FmtFprintfFloat-8           147ns ± 0%     148ns ± 0%  +0.75%  (p=0.000 n=20+21)
FmtManyArgs-8               384ns ± 0%     381ns ± 0%  -0.63%  (p=0.000 n=21+21)
GobDecode-8                3.86ms ± 1%    3.88ms ± 1%  +0.52%  (p=0.000 n=20+21)
GobEncode-8                2.77ms ± 1%    2.77ms ± 0%    ~     (p=0.078 n=21+21)
Gzip-8                      168ms ± 1%     168ms ± 0%  +0.24%  (p=0.000 n=20+20)
Gunzip-8                   25.1ms ± 0%    24.3ms ± 0%  -3.03%  (p=0.000 n=21+21)
HTTPClientServer-8         61.4µs ± 8%    59.1µs ±10%    ~     (p=0.088 n=20+21)
JSONEncode-8               6.86ms ± 0%    6.70ms ± 0%  -2.29%  (p=0.000 n=20+19)
JSONDecode-8               30.8ms ± 1%    30.6ms ± 1%  -0.82%  (p=0.000 n=20+20)
Mandelbrot200-8            3.85ms ± 0%    3.85ms ± 0%    ~     (p=0.191 n=16+17)
GoParse-8                  2.61ms ± 2%    2.60ms ± 1%    ~     (p=0.561 n=21+20)
RegexpMatchEasy0_32-8      48.5ns ± 2%    45.9ns ± 3%  -5.26%  (p=0.000 n=20+21)
RegexpMatchEasy0_1K-8       139ns ± 0%     139ns ± 0%  +0.27%  (p=0.000 n=18+20)
RegexpMatchEasy1_32-8      41.3ns ± 0%    42.1ns ± 4%  +1.95%  (p=0.000 n=17+21)
RegexpMatchEasy1_1K-8       216ns ± 2%     216ns ± 0%  +0.17%  (p=0.020 n=21+19)
RegexpMatchMedium_32-8      790ns ± 7%     803ns ± 8%    ~     (p=0.178 n=21+21)
RegexpMatchMedium_1K-8     23.5µs ± 5%    23.7µs ± 5%    ~     (p=0.421 n=21+21)
RegexpMatchHard_32-8       1.09µs ± 1%    1.09µs ± 1%  -0.53%  (p=0.000 n=19+18)
RegexpMatchHard_1K-8       33.0µs ± 0%    33.0µs ± 0%    ~     (p=0.610 n=21+20)
Revcomp-8                   348ms ± 0%     353ms ± 0%  +1.38%  (p=0.000 n=17+18)
Template-8                 42.0ms ± 1%    41.9ms ± 1%  -0.30%  (p=0.049 n=20+20)
TimeParse-8                 185ns ± 0%     185ns ± 0%    ~     (p=0.387 n=20+18)
TimeFormat-8                237ns ± 1%     241ns ± 1%  +1.57%  (p=0.000 n=21+21)
[Geo mean]                 35.4µs         35.2µs       -0.66%

name                     old speed      new speed      delta
GobDecode-8               199MB/s ± 1%   198MB/s ± 1%  -0.52%  (p=0.000 n=20+21)
GobEncode-8               277MB/s ± 1%   277MB/s ± 0%    ~     (p=0.075 n=21+21)
Gzip-8                    116MB/s ± 1%   115MB/s ± 0%  -0.25%  (p=0.000 n=20+20)
Gunzip-8                  773MB/s ± 0%   797MB/s ± 0%  +3.12%  (p=0.000 n=21+21)
JSONEncode-8              283MB/s ± 0%   290MB/s ± 0%  +2.35%  (p=0.000 n=20+19)
JSONDecode-8             63.0MB/s ± 1%  63.5MB/s ± 1%  +0.82%  (p=0.000 n=20+20)
GoParse-8                22.2MB/s ± 2%  22.3MB/s ± 1%    ~     (p=0.539 n=21+20)
RegexpMatchEasy0_32-8     660MB/s ± 2%   697MB/s ± 3%  +5.57%  (p=0.000 n=20+21)
RegexpMatchEasy0_1K-8    7.36GB/s ± 0%  7.34GB/s ± 0%  -0.26%  (p=0.000 n=18+20)
RegexpMatchEasy1_32-8     775MB/s ± 0%   761MB/s ± 4%  -1.88%  (p=0.000 n=17+21)
RegexpMatchEasy1_1K-8    4.74GB/s ± 2%  4.74GB/s ± 0%  -0.18%  (p=0.020 n=21+19)
RegexpMatchMedium_32-8   40.6MB/s ± 7%  39.9MB/s ± 9%    ~     (p=0.191 n=21+21)
RegexpMatchMedium_1K-8   43.7MB/s ± 5%  43.2MB/s ± 5%    ~     (p=0.435 n=21+21)
RegexpMatchHard_32-8     29.3MB/s ± 1%  29.4MB/s ± 1%  +0.53%  (p=0.000 n=19+18)
RegexpMatchHard_1K-8     31.0MB/s ± 0%  31.0MB/s ± 0%    ~     (p=0.572 n=21+20)
Revcomp-8                 730MB/s ± 0%   720MB/s ± 0%  -1.36%  (p=0.000 n=17+18)
Template-8               46.2MB/s ± 1%  46.3MB/s ± 1%  +0.30%  (p=0.041 n=20+20)
[Geo mean]                204MB/s        205MB/s       +0.30%

Change-Id: Iac75d0ec184a515ce0e65e19559d5fe2e9840514
Reviewed-on: https://go-review.googlesource.com/c/go/+/354970
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Trust: Josh Bleecher Snyder <josharian@gmail.com>
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
Jake Ciolek 2021-10-10 17:56:16 +02:00 committed by Josh Bleecher Snyder
parent 3283d1a2f2
commit 732f6fa9d5
4 changed files with 20 additions and 7 deletions

View File

@ -618,8 +618,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = r p.To.Reg = r
p.SetFrom3Reg(v.Args[0].Reg()) p.SetFrom3Reg(v.Args[0].Reg())
case ssa.OpAMD64ANDQconst:
asm := v.Op.Asm()
// If the constant is positive and fits into 32 bits, use ANDL.
// This saves a few bytes of encoding.
if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
asm = x86.AANDL
}
p := s.Prog(asm)
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDLconst,
ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,

View File

@ -241,7 +241,7 @@ func FloatDivs(a []float32) float32 {
func Pow2Mods(n1 uint, n2 int) (uint, int) { func Pow2Mods(n1 uint, n2 int) (uint, int) {
// 386:"ANDL\t[$]31",-"DIVL" // 386:"ANDL\t[$]31",-"DIVL"
// amd64:"ANDQ\t[$]31",-"DIVQ" // amd64:"ANDL\t[$]31",-"DIVQ"
// arm:"AND\t[$]31",-".*udiv" // arm:"AND\t[$]31",-".*udiv"
// arm64:"AND\t[$]31",-"UDIV" // arm64:"AND\t[$]31",-"UDIV"
// ppc64:"ANDCC\t[$]31" // ppc64:"ANDCC\t[$]31"
@ -452,7 +452,7 @@ func LenDiv2(s string) int {
func LenMod1(a []int) int { func LenMod1(a []int) int {
// 386:"ANDL\t[$]1023" // 386:"ANDL\t[$]1023"
// amd64:"ANDQ\t[$]1023" // amd64:"ANDL\t[$]1023"
// arm64:"AND\t[$]1023",-"SDIV" // arm64:"AND\t[$]1023",-"SDIV"
// arm/6:"AND",-".*udiv" // arm/6:"AND",-".*udiv"
// arm/7:"BFC",-".*udiv",-"AND" // arm/7:"BFC",-".*udiv",-"AND"
@ -463,7 +463,7 @@ func LenMod1(a []int) int {
func LenMod2(s string) int { func LenMod2(s string) int {
// 386:"ANDL\t[$]2047" // 386:"ANDL\t[$]2047"
// amd64:"ANDQ\t[$]2047" // amd64:"ANDL\t[$]2047"
// arm64:"AND\t[$]2047",-"SDIV" // arm64:"AND\t[$]2047",-"SDIV"
// arm/6:"AND",-".*udiv" // arm/6:"AND",-".*udiv"
// arm/7:"BFC",-".*udiv",-"AND" // arm/7:"BFC",-".*udiv",-"AND"
@ -484,7 +484,7 @@ func CapDiv(a []int) int {
func CapMod(a []int) int { func CapMod(a []int) int {
// 386:"ANDL\t[$]4095" // 386:"ANDL\t[$]4095"
// amd64:"ANDQ\t[$]4095" // amd64:"ANDL\t[$]4095"
// arm64:"AND\t[$]4095",-"SDIV" // arm64:"AND\t[$]4095",-"SDIV"
// arm/6:"AND",-".*udiv" // arm/6:"AND",-".*udiv"
// arm/7:"BFC",-".*udiv",-"AND" // arm/7:"BFC",-".*udiv",-"AND"

View File

@ -332,7 +332,7 @@ func bitSetPowerOf2Test(x int) bool {
} }
func bitSetTest(x int) bool { func bitSetTest(x int) bool {
// amd64:"ANDQ\t[$]9, AX" // amd64:"ANDL\t[$]9, AX"
// amd64:"CMPQ\tAX, [$]9" // amd64:"CMPQ\tAX, [$]9"
return x&9 == 9 return x&9 == 9
} }

View File

@ -27,7 +27,7 @@ func convertNeq0L(x uint32, c bool) bool {
} }
func convertNeq0Q(x uint64, c bool) bool { func convertNeq0Q(x uint64, c bool) bool {
// amd64:"ANDQ\t[$]1",-"SETB" // amd64:"ANDL\t[$]1",-"SETB"
b := x&1 != 0 b := x&1 != 0
return c && b return c && b
} }