1
0
mirror of https://github.com/golang/go synced 2024-09-30 15:18:32 -06:00
go/test/codegen/arithmetic.go
Ben Shi 27965c1436 cmd/compile: optimize 386's ADDLconstmodifyidx4
This CL optimize ADDLconstmodifyidx4 to INCL/DECL, when the
constant is +1/-1.

1. The total size of pkg/linux_386/ decreases 28 bytes, excluding
cmd/compile.

2. There is no regression in the go1 benchmark test, excluding noise.
name                     old time/op    new time/op    delta
BinaryTree17-4              3.25s ± 2%     3.23s ± 3%  -0.70%  (p=0.040 n=30+30)
Fannkuch11-4                3.50s ± 1%     3.47s ± 1%  -0.68%  (p=0.000 n=30+30)
FmtFprintfEmpty-4          44.6ns ± 3%    44.8ns ± 3%  +0.46%  (p=0.029 n=30+30)
FmtFprintfString-4         79.0ns ± 3%    78.7ns ± 3%    ~     (p=0.053 n=30+30)
FmtFprintfInt-4            89.2ns ± 2%    89.4ns ± 3%    ~     (p=0.665 n=30+29)
FmtFprintfIntInt-4          142ns ± 3%     142ns ± 3%    ~     (p=0.435 n=30+30)
FmtFprintfPrefixedInt-4     182ns ± 2%     182ns ± 2%    ~     (p=0.964 n=30+30)
FmtFprintfFloat-4           407ns ± 3%     411ns ± 4%    ~     (p=0.080 n=30+30)
FmtManyArgs-4               597ns ± 3%     593ns ± 4%    ~     (p=0.222 n=30+30)
GobDecode-4                7.09ms ± 6%    7.07ms ± 7%    ~     (p=0.633 n=30+30)
GobEncode-4                6.81ms ± 9%    6.81ms ± 8%    ~     (p=0.982 n=30+30)
Gzip-4                      398ms ± 4%     400ms ± 6%    ~     (p=0.177 n=30+30)
Gunzip-4                   41.3ms ± 3%    40.6ms ± 4%  -1.71%  (p=0.005 n=30+30)
HTTPClientServer-4         63.4µs ± 3%    63.4µs ± 4%    ~     (p=0.646 n=30+28)
JSONEncode-4               16.0ms ± 3%    16.1ms ± 3%    ~     (p=0.057 n=30+30)
JSONDecode-4               63.3ms ± 8%    63.1ms ± 7%    ~     (p=0.786 n=30+30)
Mandelbrot200-4            5.17ms ± 3%    5.15ms ± 8%    ~     (p=0.654 n=30+30)
GoParse-4                  3.24ms ± 3%    3.23ms ± 2%    ~     (p=0.091 n=30+30)
RegexpMatchEasy0_32-4       103ns ± 4%     103ns ± 4%    ~     (p=0.575 n=30+30)
RegexpMatchEasy0_1K-4       823ns ± 2%     821ns ± 3%    ~     (p=0.827 n=30+30)
RegexpMatchEasy1_32-4       113ns ± 3%     112ns ± 3%    ~     (p=0.076 n=30+30)
RegexpMatchEasy1_1K-4      1.02µs ± 4%    1.01µs ± 5%    ~     (p=0.087 n=30+30)
RegexpMatchMedium_32-4      129ns ± 3%     127ns ± 4%  -1.55%  (p=0.009 n=30+30)
RegexpMatchMedium_1K-4     39.3µs ± 4%    39.7µs ± 3%    ~     (p=0.054 n=30+30)
RegexpMatchHard_32-4       2.15µs ± 4%    2.15µs ± 4%    ~     (p=0.712 n=30+30)
RegexpMatchHard_1K-4       66.0µs ± 3%    65.1µs ± 3%  -1.32%  (p=0.002 n=30+30)
Revcomp-4                   1.85s ± 2%     1.85s ± 3%    ~     (p=0.168 n=30+30)
Template-4                 69.5ms ± 7%    68.9ms ± 6%    ~     (p=0.250 n=28+28)
TimeParse-4                 434ns ± 3%     432ns ± 4%    ~     (p=0.629 n=30+30)
TimeFormat-4                403ns ± 4%     408ns ± 3%  +1.23%  (p=0.019 n=30+29)
[Geo mean]                 65.5µs         65.3µs       -0.20%

name                     old speed      new speed      delta
GobDecode-4               108MB/s ± 6%   109MB/s ± 6%    ~     (p=0.636 n=30+30)
GobEncode-4               113MB/s ±10%   113MB/s ± 9%    ~     (p=0.982 n=30+30)
Gzip-4                   48.8MB/s ± 4%  48.6MB/s ± 5%    ~     (p=0.178 n=30+30)
Gunzip-4                  470MB/s ± 3%   479MB/s ± 4%  +1.72%  (p=0.006 n=30+30)
JSONEncode-4              121MB/s ± 3%   120MB/s ± 3%    ~     (p=0.057 n=30+30)
JSONDecode-4             30.7MB/s ± 8%  30.8MB/s ± 8%    ~     (p=0.784 n=30+30)
GoParse-4                17.9MB/s ± 3%  17.9MB/s ± 2%    ~     (p=0.090 n=30+30)
RegexpMatchEasy0_32-4     309MB/s ± 4%   309MB/s ± 3%    ~     (p=0.530 n=30+30)
RegexpMatchEasy0_1K-4    1.24GB/s ± 2%  1.25GB/s ± 3%    ~     (p=0.976 n=30+30)
RegexpMatchEasy1_32-4     282MB/s ± 3%   284MB/s ± 3%  +0.81%  (p=0.041 n=30+30)
RegexpMatchEasy1_1K-4    1.00GB/s ± 3%  1.01GB/s ± 4%    ~     (p=0.091 n=30+30)
RegexpMatchMedium_32-4   7.71MB/s ± 3%  7.84MB/s ± 4%  +1.71%  (p=0.000 n=30+30)
RegexpMatchMedium_1K-4   26.1MB/s ± 4%  25.8MB/s ± 3%    ~     (p=0.051 n=30+30)
RegexpMatchHard_32-4     14.9MB/s ± 4%  14.9MB/s ± 4%    ~     (p=0.712 n=30+30)
RegexpMatchHard_1K-4     15.5MB/s ± 3%  15.7MB/s ± 3%  +1.34%  (p=0.003 n=30+30)
Revcomp-4                 138MB/s ± 2%   137MB/s ± 3%    ~     (p=0.174 n=30+30)
Template-4               28.0MB/s ± 6%  28.2MB/s ± 6%    ~     (p=0.251 n=28+28)
[Geo mean]               82.3MB/s       82.6MB/s       +0.36%

Change-Id: I389829699ffe9500a013fcf31be58a97e98043e1
Reviewed-on: https://go-review.googlesource.com/c/140701
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-10-11 01:20:34 +00:00

230 lines
4.9 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// This file contains codegen tests related to arithmetic
// simplifications and optimizations on integer types.
// For codegen tests on float types, see floats.go.
// ----------------- //
// Subtraction //
// ----------------- //
var ef int
func SubMem(arr []int, b, c, d int) int {
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
arr[2] -= b
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
arr[3] -= b
// 386:`DECL\s16\([A-Z]+\)`
arr[4]--
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
arr[5] -= 20
// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
ef -= arr[b]
// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[c] -= b
// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[d] -= 15
// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[b]--
// 386:"SUBL\t4"
// amd64:"SUBQ\t8"
return arr[0] - arr[1]
}
// -------------------- //
// Multiplication //
// -------------------- //
func Pow2Muls(n1, n2 int) (int, int) {
// amd64:"SHLQ\t[$]5",-"IMULQ"
// 386:"SHLL\t[$]5",-"IMULL"
// arm:"SLL\t[$]5",-"MUL"
// arm64:"LSL\t[$]5",-"MUL"
a := n1 * 32
// amd64:"SHLQ\t[$]6",-"IMULQ"
// 386:"SHLL\t[$]6",-"IMULL"
// arm:"SLL\t[$]6",-"MUL"
// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
b := -64 * n2
return a, b
}
func Mul_96(n int) int {
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`
return n * 96
}
func MulMemSrc(a []uint32, b []float32) {
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
a[0] *= a[1]
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
b[0] *= b[1]
}
// Multiplications merging tests
func MergeMuls1(n int) int {
// amd64:"IMUL3Q\t[$]46"
// 386:"IMUL3L\t[$]46"
return 15*n + 31*n // 46n
}
func MergeMuls2(n int) int {
// amd64:"IMUL3Q\t[$]23","ADDQ\t[$]29"
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
}
func MergeMuls3(a, n int) int {
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
return a*n + 19*n // (a+19)n
}
func MergeMuls4(n int) int {
// amd64:"IMUL3Q\t[$]14"
// 386:"IMUL3L\t[$]14"
return 23*n - 9*n // 14n
}
func MergeMuls5(a, n int) int {
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
return a*n - 19*n // (a-19)n
}
// -------------- //
// Division //
// -------------- //
func DivMemSrc(a []float64) {
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
a[0] /= a[1]
}
func Pow2Divs(n1 uint, n2 int) (uint, int) {
// 386:"SHRL\t[$]5",-"DIVL"
// amd64:"SHRQ\t[$]5",-"DIVQ"
// arm:"SRL\t[$]5",-".*udiv"
// arm64:"LSR\t[$]5",-"UDIV"
a := n1 / 32 // unsigned
// amd64:"SARQ\t[$]6",-"IDIVQ"
// 386:"SARL\t[$]6",-"IDIVL"
// arm:"SRA\t[$]6",-".*udiv"
// arm64:"ASR\t[$]6",-"SDIV"
b := n2 / 64 // signed
return a, b
}
// Check that constant divisions get turned into MULs
func ConstDivs(n1 uint, n2 int) (uint, int) {
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
a := n1 / 17 // unsigned
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
b := n2 / 17 // signed
return a, b
}
func FloatDivs(a []float32) float32 {
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
return a[1] / a[2]
}
func Pow2Mods(n1 uint, n2 int) (uint, int) {
// 386:"ANDL\t[$]31",-"DIVL"
// amd64:"ANDQ\t[$]31",-"DIVQ"
// arm:"AND\t[$]31",-".*udiv"
// arm64:"AND\t[$]31",-"UDIV"
a := n1 % 32 // unsigned
// 386:-"IDIVL"
// amd64:-"IDIVQ"
// arm:-".*udiv"
// arm64:-"REM"
b := n2 % 64 // signed
return a, b
}
// Check that constant modulo divs get turned into MULs
func ConstMods(n1 uint, n2 int) (uint, int) {
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
a := n1 % 17 // unsigned
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
b := n2 % 17 // signed
return a, b
}
// Check that len() and cap() calls divided by powers of two are
// optimized into shifts and ands
func LenDiv1(a []int) int {
// 386:"SHRL\t[$]10"
// amd64:"SHRQ\t[$]10"
return len(a) / 1024
}
func LenDiv2(s string) int {
// 386:"SHRL\t[$]11"
// amd64:"SHRQ\t[$]11"
return len(s) / (4097 >> 1)
}
func LenMod1(a []int) int {
// 386:"ANDL\t[$]1023"
// amd64:"ANDQ\t[$]1023"
return len(a) % 1024
}
func LenMod2(s string) int {
// 386:"ANDL\t[$]2047"
// amd64:"ANDQ\t[$]2047"
return len(s) % (4097 >> 1)
}
func CapDiv(a []int) int {
// 386:"SHRL\t[$]12"
// amd64:"SHRQ\t[$]12"
return cap(a) / ((1 << 11) + 2048)
}
func CapMod(a []int) int {
// 386:"ANDL\t[$]4095"
// amd64:"ANDQ\t[$]4095"
return cap(a) % ((1 << 11) + 2048)
}
func AddMul(x int) int {
// amd64:"LEAQ\t1"
return 2*x + 1
}
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
// arm:`MULA`,-`MUL\s`
// arm64:`MADDW`,-`MULW`
r0 := a*b + c
// arm:`MULA`-`MUL\s`
// arm64:`MADDW`,-`MULW`
r1 := c*79 + a
// arm:`ADD`,-`MULA`-`MUL\s`
// arm64:`ADD`,-`MADD`,-`MULW`
r2 := b*64 + c
return r0, r1, r2
}