mirror of
https://github.com/golang/go
synced 2024-11-12 07:00:21 -07:00
d60cf39f8e
This CL implements constant folding for MADD/MSUB on arm64. 1. The total size of pkg/android_arm64/ decreases about 4KB, excluding cmd/compile/ . 2. There is no regression in the go1 benchmark, excluding noise. name old time/op new time/op delta BinaryTree17-4 16.4s ± 1% 16.5s ± 1% +0.24% (p=0.008 n=29+29) Fannkuch11-4 8.73s ± 0% 8.71s ± 0% -0.15% (p=0.000 n=29+29) FmtFprintfEmpty-4 174ns ± 0% 174ns ± 0% ~ (all equal) FmtFprintfString-4 370ns ± 0% 372ns ± 2% +0.53% (p=0.007 n=24+30) FmtFprintfInt-4 419ns ± 0% 419ns ± 0% ~ (all equal) FmtFprintfIntInt-4 673ns ± 1% 661ns ± 1% -1.81% (p=0.000 n=30+27) FmtFprintfPrefixedInt-4 806ns ± 0% 805ns ± 0% ~ (p=0.957 n=28+27) FmtFprintfFloat-4 1.09µs ± 0% 1.09µs ± 0% -0.04% (p=0.001 n=22+30) FmtManyArgs-4 2.67µs ± 0% 2.68µs ± 0% +0.03% (p=0.045 n=29+28) GobDecode-4 33.2ms ± 1% 32.5ms ± 1% -2.11% (p=0.000 n=29+29) GobEncode-4 29.5ms ± 0% 29.2ms ± 0% -1.04% (p=0.000 n=28+28) Gzip-4 1.39s ± 2% 1.38s ± 1% -0.48% (p=0.023 n=30+30) Gunzip-4 139ms ± 0% 139ms ± 0% ~ (p=0.616 n=30+28) HTTPClientServer-4 766µs ± 4% 758µs ± 3% -1.03% (p=0.013 n=28+29) JSONEncode-4 49.7ms ± 0% 49.6ms ± 0% -0.24% (p=0.000 n=30+30) JSONDecode-4 266ms ± 0% 268ms ± 1% +1.07% (p=0.000 n=29+30) Mandelbrot200-4 16.6ms ± 0% 16.6ms ± 0% ~ (p=0.248 n=30+29) GoParse-4 15.9ms ± 0% 16.0ms ± 0% +0.76% (p=0.000 n=29+29) RegexpMatchEasy0_32-4 381ns ± 0% 380ns ± 0% -0.14% (p=0.000 n=30+30) RegexpMatchEasy0_1K-4 1.18µs ± 0% 1.19µs ± 1% +0.30% (p=0.000 n=29+30) RegexpMatchEasy1_32-4 357ns ± 0% 357ns ± 0% ~ (all equal) RegexpMatchEasy1_1K-4 2.04µs ± 0% 2.05µs ± 0% +0.50% (p=0.000 n=26+28) RegexpMatchMedium_32-4 590ns ± 0% 589ns ± 0% -0.12% (p=0.000 n=30+23) RegexpMatchMedium_1K-4 162µs ± 0% 162µs ± 0% ~ (p=0.318 n=28+25) RegexpMatchHard_32-4 9.56µs ± 0% 9.56µs ± 0% ~ (p=0.072 n=30+29) RegexpMatchHard_1K-4 287µs ± 0% 287µs ± 0% -0.02% (p=0.005 n=28+28) Revcomp-4 2.50s ± 0% 2.51s ± 0% ~ (p=0.246 n=29+29) Template-4 312ms ± 1% 313ms ± 1% +0.46% (p=0.002 n=30+30) TimeParse-4 1.68µs ± 0% 1.67µs ± 0% -0.31% (p=0.000 n=27+29) TimeFormat-4 1.66µs ± 0% 1.64µs ± 0% -0.92% (p=0.000 n=29+26) [Geo mean] 247µs 246µs -0.15% name old speed new speed delta GobDecode-4 23.1MB/s ± 1% 23.6MB/s ± 0% +2.17% (p=0.000 n=29+28) GobEncode-4 26.0MB/s ± 0% 26.3MB/s ± 0% +1.05% (p=0.000 n=28+28) Gzip-4 14.0MB/s ± 2% 14.1MB/s ± 1% +0.47% (p=0.026 n=30+30) Gunzip-4 139MB/s ± 0% 139MB/s ± 0% ~ (p=0.624 n=30+28) JSONEncode-4 39.1MB/s ± 0% 39.2MB/s ± 0% +0.24% (p=0.000 n=30+30) JSONDecode-4 7.31MB/s ± 0% 7.23MB/s ± 1% -1.07% (p=0.000 n=28+30) GoParse-4 3.65MB/s ± 0% 3.62MB/s ± 0% -0.77% (p=0.000 n=29+29) RegexpMatchEasy0_32-4 84.0MB/s ± 0% 84.1MB/s ± 0% +0.18% (p=0.000 n=28+30) RegexpMatchEasy0_1K-4 864MB/s ± 0% 861MB/s ± 1% -0.29% (p=0.000 n=29+30) RegexpMatchEasy1_32-4 89.5MB/s ± 0% 89.5MB/s ± 0% ~ (p=0.841 n=28+28) RegexpMatchEasy1_1K-4 502MB/s ± 0% 500MB/s ± 0% -0.51% (p=0.000 n=29+29) RegexpMatchMedium_32-4 1.69MB/s ± 0% 1.70MB/s ± 0% +0.41% (p=0.000 n=26+30) RegexpMatchMedium_1K-4 6.31MB/s ± 0% 6.30MB/s ± 0% ~ (p=0.129 n=30+25) RegexpMatchHard_32-4 3.35MB/s ± 0% 3.35MB/s ± 0% ~ (p=0.657 n=30+29) RegexpMatchHard_1K-4 3.57MB/s ± 0% 3.57MB/s ± 0% ~ (all equal) Revcomp-4 102MB/s ± 0% 101MB/s ± 0% ~ (p=0.213 n=29+29) Template-4 6.22MB/s ± 1% 6.19MB/s ± 1% -0.42% (p=0.005 n=30+29) [Geo mean] 24.1MB/s 24.2MB/s +0.08% Change-Id: I6c02d3c9975f6bd8bc215cb1fc14d29602b45649 Reviewed-on: https://go-review.googlesource.com/138095 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
221 lines
4.6 KiB
Go
221 lines
4.6 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// This file contains codegen tests related to arithmetic
|
|
// simplifications and optimizations on integer types.
|
|
// For codegen tests on float types, see floats.go.
|
|
|
|
// ----------------- //
|
|
// Subtraction //
|
|
// ----------------- //
|
|
|
|
func SubMem(arr []int, b int) int {
|
|
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
|
|
arr[2] -= b
|
|
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
|
|
arr[3] -= b
|
|
// 386:`DECL\s16\([A-Z]+\)`
|
|
arr[4]--
|
|
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
|
|
arr[5] -= 20
|
|
// 386:"SUBL\t4"
|
|
// amd64:"SUBQ\t8"
|
|
return arr[0] - arr[1]
|
|
}
|
|
|
|
// -------------------- //
|
|
// Multiplication //
|
|
// -------------------- //
|
|
|
|
func Pow2Muls(n1, n2 int) (int, int) {
|
|
// amd64:"SHLQ\t[$]5",-"IMULQ"
|
|
// 386:"SHLL\t[$]5",-"IMULL"
|
|
// arm:"SLL\t[$]5",-"MUL"
|
|
// arm64:"LSL\t[$]5",-"MUL"
|
|
a := n1 * 32
|
|
|
|
// amd64:"SHLQ\t[$]6",-"IMULQ"
|
|
// 386:"SHLL\t[$]6",-"IMULL"
|
|
// arm:"SLL\t[$]6",-"MUL"
|
|
// arm64:"LSL\t[$]6",-"MUL"
|
|
b := -64 * n2
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Mul_96(n int) int {
|
|
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`
|
|
return n * 96
|
|
}
|
|
|
|
func MulMemSrc(a []uint32, b []float32) {
|
|
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
|
|
a[0] *= a[1]
|
|
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
|
b[0] *= b[1]
|
|
}
|
|
|
|
// Multiplications merging tests
|
|
|
|
func MergeMuls1(n int) int {
|
|
// amd64:"IMUL3Q\t[$]46"
|
|
// 386:"IMUL3L\t[$]46"
|
|
return 15*n + 31*n // 46n
|
|
}
|
|
|
|
func MergeMuls2(n int) int {
|
|
// amd64:"IMUL3Q\t[$]23","ADDQ\t[$]29"
|
|
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
|
|
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
|
}
|
|
|
|
func MergeMuls3(a, n int) int {
|
|
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
|
|
return a*n + 19*n // (a+19)n
|
|
}
|
|
|
|
func MergeMuls4(n int) int {
|
|
// amd64:"IMUL3Q\t[$]14"
|
|
// 386:"IMUL3L\t[$]14"
|
|
return 23*n - 9*n // 14n
|
|
}
|
|
|
|
func MergeMuls5(a, n int) int {
|
|
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
|
|
return a*n - 19*n // (a-19)n
|
|
}
|
|
|
|
// -------------- //
|
|
// Division //
|
|
// -------------- //
|
|
|
|
func DivMemSrc(a []float64) {
|
|
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
|
a[0] /= a[1]
|
|
}
|
|
|
|
func Pow2Divs(n1 uint, n2 int) (uint, int) {
|
|
// 386:"SHRL\t[$]5",-"DIVL"
|
|
// amd64:"SHRQ\t[$]5",-"DIVQ"
|
|
// arm:"SRL\t[$]5",-".*udiv"
|
|
// arm64:"LSR\t[$]5",-"UDIV"
|
|
a := n1 / 32 // unsigned
|
|
|
|
// amd64:"SARQ\t[$]6",-"IDIVQ"
|
|
// 386:"SARL\t[$]6",-"IDIVL"
|
|
// arm:"SRA\t[$]6",-".*udiv"
|
|
// arm64:"ASR\t[$]6",-"SDIV"
|
|
b := n2 / 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant divisions get turned into MULs
|
|
func ConstDivs(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
a := n1 / 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
b := n2 / 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
func FloatDivs(a []float32) float32 {
|
|
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
|
|
return a[1] / a[2]
|
|
}
|
|
|
|
func Pow2Mods(n1 uint, n2 int) (uint, int) {
|
|
// 386:"ANDL\t[$]31",-"DIVL"
|
|
// amd64:"ANDQ\t[$]31",-"DIVQ"
|
|
// arm:"AND\t[$]31",-".*udiv"
|
|
// arm64:"AND\t[$]31",-"UDIV"
|
|
a := n1 % 32 // unsigned
|
|
|
|
// 386:-"IDIVL"
|
|
// amd64:-"IDIVQ"
|
|
// arm:-".*udiv"
|
|
// arm64:-"REM"
|
|
b := n2 % 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant modulo divs get turned into MULs
|
|
func ConstMods(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
a := n1 % 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
b := n2 % 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that len() and cap() calls divided by powers of two are
|
|
// optimized into shifts and ands
|
|
|
|
func LenDiv1(a []int) int {
|
|
// 386:"SHRL\t[$]10"
|
|
// amd64:"SHRQ\t[$]10"
|
|
return len(a) / 1024
|
|
}
|
|
|
|
func LenDiv2(s string) int {
|
|
// 386:"SHRL\t[$]11"
|
|
// amd64:"SHRQ\t[$]11"
|
|
return len(s) / (4097 >> 1)
|
|
}
|
|
|
|
func LenMod1(a []int) int {
|
|
// 386:"ANDL\t[$]1023"
|
|
// amd64:"ANDQ\t[$]1023"
|
|
return len(a) % 1024
|
|
}
|
|
|
|
func LenMod2(s string) int {
|
|
// 386:"ANDL\t[$]2047"
|
|
// amd64:"ANDQ\t[$]2047"
|
|
return len(s) % (4097 >> 1)
|
|
}
|
|
|
|
func CapDiv(a []int) int {
|
|
// 386:"SHRL\t[$]12"
|
|
// amd64:"SHRQ\t[$]12"
|
|
return cap(a) / ((1 << 11) + 2048)
|
|
}
|
|
|
|
func CapMod(a []int) int {
|
|
// 386:"ANDL\t[$]4095"
|
|
// amd64:"ANDQ\t[$]4095"
|
|
return cap(a) % ((1 << 11) + 2048)
|
|
}
|
|
|
|
func AddMul(x int) int {
|
|
// amd64:"LEAQ\t1"
|
|
return 2*x + 1
|
|
}
|
|
|
|
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm:`MULA`,-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r0 := a*b + c
|
|
// arm:`MULA`-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r1 := c*79 + a
|
|
// arm:`ADD`,-`MULA`-`MUL\s`
|
|
// arm64:`ADD`,-`MADD`,-`MULW`
|
|
r2 := b*64 + c
|
|
return r0, r1, r2
|
|
}
|