1
0
mirror of https://github.com/golang/go synced 2024-11-13 17:50:23 -07:00
go/test/codegen/arithmetic.go
Ben Shi 3bc34385fa cmd/compile: introduce more read-modify-write operations for amd64
Add suport of read-modify-write for AND/SUB/AND/OR/XOR on amd64.

1. The total size of pkg/linux_amd64 decreases about 4KB, excluding
cmd/compile.

2. The go1 benchmark shows a little improvement, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              2.63s ± 3%     2.65s ± 4%   +1.01%  (p=0.037 n=35+35)
Fannkuch11-4                2.33s ± 2%     2.39s ± 2%   +2.49%  (p=0.000 n=35+35)
FmtFprintfEmpty-4          45.4ns ± 5%    40.8ns ± 6%  -10.09%  (p=0.000 n=35+35)
FmtFprintfString-4         73.3ns ± 4%    70.9ns ± 3%   -3.23%  (p=0.000 n=30+35)
FmtFprintfInt-4            79.9ns ± 4%    79.5ns ± 3%     ~     (p=0.736 n=34+35)
FmtFprintfIntInt-4          126ns ± 4%     125ns ± 4%     ~     (p=0.083 n=35+35)
FmtFprintfPrefixedInt-4     152ns ± 6%     152ns ± 3%     ~     (p=0.855 n=34+35)
FmtFprintfFloat-4           215ns ± 4%     213ns ± 4%     ~     (p=0.066 n=35+35)
FmtManyArgs-4               522ns ± 3%     506ns ± 3%   -3.15%  (p=0.000 n=35+35)
GobDecode-4                6.45ms ± 8%    6.51ms ± 7%   +0.96%  (p=0.026 n=35+35)
GobEncode-4                6.10ms ± 6%    6.02ms ± 8%     ~     (p=0.160 n=35+35)
Gzip-4                      228ms ± 3%     221ms ± 3%   -2.92%  (p=0.000 n=35+35)
Gunzip-4                   37.5ms ± 4%    37.2ms ± 3%   -0.78%  (p=0.036 n=35+35)
HTTPClientServer-4         58.7µs ± 2%    59.2µs ± 1%   +0.80%  (p=0.000 n=33+33)
JSONEncode-4               12.0ms ± 3%    12.2ms ± 3%   +1.84%  (p=0.008 n=35+35)
JSONDecode-4               57.0ms ± 4%    56.6ms ± 3%     ~     (p=0.320 n=35+35)
Mandelbrot200-4            3.82ms ± 3%    3.79ms ± 3%     ~     (p=0.074 n=35+35)
GoParse-4                  3.21ms ± 5%    3.24ms ± 4%     ~     (p=0.119 n=35+35)
RegexpMatchEasy0_32-4      76.3ns ± 4%    75.4ns ± 4%   -1.14%  (p=0.014 n=34+33)
RegexpMatchEasy0_1K-4       251ns ± 4%     254ns ± 3%   +1.28%  (p=0.016 n=35+35)
RegexpMatchEasy1_32-4      69.6ns ± 3%    70.1ns ± 3%   +0.82%  (p=0.005 n=35+35)
RegexpMatchEasy1_1K-4       367ns ± 4%     376ns ± 4%   +2.47%  (p=0.000 n=35+35)
RegexpMatchMedium_32-4      108ns ± 5%     104ns ± 4%   -3.18%  (p=0.000 n=35+35)
RegexpMatchMedium_1K-4     33.8µs ± 3%    32.7µs ± 3%   -3.27%  (p=0.000 n=35+35)
RegexpMatchHard_32-4       1.55µs ± 3%    1.52µs ± 3%   -1.64%  (p=0.000 n=35+35)
RegexpMatchHard_1K-4       46.6µs ± 3%    46.6µs ± 4%     ~     (p=0.149 n=35+35)
Revcomp-4                   416ms ± 7%     412ms ± 6%   -0.95%  (p=0.033 n=33+35)
Template-4                 64.3ms ± 3%    62.4ms ± 7%   -2.94%  (p=0.000 n=35+35)
TimeParse-4                 320ns ± 2%     322ns ± 3%     ~     (p=0.589 n=35+35)
TimeFormat-4                300ns ± 3%     300ns ± 3%     ~     (p=0.597 n=35+35)
[Geo mean]                 47.4µs         47.0µs        -0.86%

name                     old speed      new speed      delta
GobDecode-4               119MB/s ± 7%   118MB/s ± 7%   -0.96%  (p=0.027 n=35+35)
GobEncode-4               126MB/s ± 7%   127MB/s ± 6%     ~     (p=0.157 n=34+34)
Gzip-4                   85.3MB/s ± 3%  87.9MB/s ± 3%   +3.02%  (p=0.000 n=35+35)
Gunzip-4                  518MB/s ± 4%   522MB/s ± 3%   +0.79%  (p=0.037 n=35+35)
JSONEncode-4              162MB/s ± 3%   159MB/s ± 3%   -1.81%  (p=0.009 n=35+35)
JSONDecode-4             34.1MB/s ± 4%  34.3MB/s ± 3%     ~     (p=0.318 n=35+35)
GoParse-4                18.0MB/s ± 5%  17.9MB/s ± 4%     ~     (p=0.117 n=35+35)
RegexpMatchEasy0_32-4     419MB/s ± 3%   425MB/s ± 4%   +1.46%  (p=0.003 n=32+33)
RegexpMatchEasy0_1K-4    4.07GB/s ± 4%  4.02GB/s ± 3%   -1.28%  (p=0.014 n=35+35)
RegexpMatchEasy1_32-4     460MB/s ± 3%   456MB/s ± 4%   -0.82%  (p=0.004 n=35+35)
RegexpMatchEasy1_1K-4    2.79GB/s ± 4%  2.72GB/s ± 4%   -2.39%  (p=0.000 n=35+35)
RegexpMatchMedium_32-4   9.23MB/s ± 4%  9.53MB/s ± 4%   +3.16%  (p=0.000 n=35+35)
RegexpMatchMedium_1K-4   30.3MB/s ± 3%  31.3MB/s ± 3%   +3.38%  (p=0.000 n=35+35)
RegexpMatchHard_32-4     20.7MB/s ± 3%  21.0MB/s ± 3%   +1.67%  (p=0.000 n=35+35)
RegexpMatchHard_1K-4     22.0MB/s ± 3%  21.9MB/s ± 4%     ~     (p=0.277 n=35+33)
Revcomp-4                 612MB/s ± 7%   618MB/s ± 6%   +0.96%  (p=0.034 n=33+35)
Template-4               30.2MB/s ± 3%  31.1MB/s ± 6%   +3.05%  (p=0.000 n=35+35)
[Geo mean]                123MB/s        124MB/s        +0.64%

Change-Id: Ia025da272e07d0069413824bfff3471b106d6280
Reviewed-on: https://go-review.googlesource.com/121535
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Keith Randall <khr@golang.org>
2018-08-24 23:38:25 +00:00

208 lines
4.3 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// This file contains codegen tests related to arithmetic
// simplifications and optimizations on integer types.
// For codegen tests on float types, see floats.go.
// ----------------- //
// Subtraction //
// ----------------- //
func SubMem(arr []int, b int) int {
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
arr[2] -= b
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
arr[3] -= b
// 386:`DECL\s16\([A-Z]+\)`
arr[4]--
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
arr[5] -= 20
// 386:"SUBL\t4"
// amd64:"SUBQ\t8"
return arr[0] - arr[1]
}
// -------------------- //
// Multiplication //
// -------------------- //
func Pow2Muls(n1, n2 int) (int, int) {
// amd64:"SHLQ\t[$]5",-"IMULQ"
// 386:"SHLL\t[$]5",-"IMULL"
// arm:"SLL\t[$]5",-"MUL"
// arm64:"LSL\t[$]5",-"MUL"
a := n1 * 32
// amd64:"SHLQ\t[$]6",-"IMULQ"
// 386:"SHLL\t[$]6",-"IMULL"
// arm:"SLL\t[$]6",-"MUL"
// arm64:"LSL\t[$]6",-"MUL"
b := -64 * n2
return a, b
}
func Mul_96(n int) int {
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`
return n * 96
}
func MulMemSrc(a []uint32, b []float32) {
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
a[0] *= a[1]
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
b[0] *= b[1]
}
// Multiplications merging tests
func MergeMuls1(n int) int {
// amd64:"IMUL3Q\t[$]46"
// 386:"IMUL3L\t[$]46"
return 15*n + 31*n // 46n
}
func MergeMuls2(n int) int {
// amd64:"IMUL3Q\t[$]23","ADDQ\t[$]29"
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
}
func MergeMuls3(a, n int) int {
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
return a*n + 19*n // (a+19)n
}
func MergeMuls4(n int) int {
// amd64:"IMUL3Q\t[$]14"
// 386:"IMUL3L\t[$]14"
return 23*n - 9*n // 14n
}
func MergeMuls5(a, n int) int {
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
return a*n - 19*n // (a-19)n
}
// -------------- //
// Division //
// -------------- //
func DivMemSrc(a []float64) {
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
a[0] /= a[1]
}
func Pow2Divs(n1 uint, n2 int) (uint, int) {
// 386:"SHRL\t[$]5",-"DIVL"
// amd64:"SHRQ\t[$]5",-"DIVQ"
// arm:"SRL\t[$]5",-".*udiv"
// arm64:"LSR\t[$]5",-"UDIV"
a := n1 / 32 // unsigned
// amd64:"SARQ\t[$]6",-"IDIVQ"
// 386:"SARL\t[$]6",-"IDIVL"
// arm:"SRA\t[$]6",-".*udiv"
// arm64:"ASR\t[$]6",-"SDIV"
b := n2 / 64 // signed
return a, b
}
// Check that constant divisions get turned into MULs
func ConstDivs(n1 uint, n2 int) (uint, int) {
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
a := n1 / 17 // unsigned
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
b := n2 / 17 // signed
return a, b
}
func FloatDivs(a []float32) float32 {
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
return a[1] / a[2]
}
func Pow2Mods(n1 uint, n2 int) (uint, int) {
// 386:"ANDL\t[$]31",-"DIVL"
// amd64:"ANDQ\t[$]31",-"DIVQ"
// arm:"AND\t[$]31",-".*udiv"
// arm64:"AND\t[$]31",-"UDIV"
a := n1 % 32 // unsigned
// 386:-"IDIVL"
// amd64:-"IDIVQ"
// arm:-".*udiv"
// arm64:-"REM"
b := n2 % 64 // signed
return a, b
}
// Check that constant modulo divs get turned into MULs
func ConstMods(n1 uint, n2 int) (uint, int) {
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
a := n1 % 17 // unsigned
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
b := n2 % 17 // signed
return a, b
}
// Check that len() and cap() calls divided by powers of two are
// optimized into shifts and ands
func LenDiv1(a []int) int {
// 386:"SHRL\t[$]10"
// amd64:"SHRQ\t[$]10"
return len(a) / 1024
}
func LenDiv2(s string) int {
// 386:"SHRL\t[$]11"
// amd64:"SHRQ\t[$]11"
return len(s) / (4097 >> 1)
}
func LenMod1(a []int) int {
// 386:"ANDL\t[$]1023"
// amd64:"ANDQ\t[$]1023"
return len(a) % 1024
}
func LenMod2(s string) int {
// 386:"ANDL\t[$]2047"
// amd64:"ANDQ\t[$]2047"
return len(s) % (4097 >> 1)
}
func CapDiv(a []int) int {
// 386:"SHRL\t[$]12"
// amd64:"SHRQ\t[$]12"
return cap(a) / ((1 << 11) + 2048)
}
func CapMod(a []int) int {
// 386:"ANDL\t[$]4095"
// amd64:"ANDQ\t[$]4095"
return cap(a) % ((1 << 11) + 2048)
}
func AddMul(x int) int {
// amd64:"LEAQ\t1"
return 2*x + 1
}