mirror of
https://github.com/golang/go
synced 2024-11-18 12:44:49 -07:00
cd65bbc01b
The SUBL instruction can take a memory operand, and this CL implements this optimization. The go1 benchmark shows a little improvement. name old time/op new time/op delta BinaryTree17-4 3.27s ± 2% 3.29s ± 3% ~ (p=0.322 n=37+40) Fannkuch11-4 3.49s ± 0% 3.53s ± 1% +1.21% (p=0.000 n=31+40) FmtFprintfEmpty-4 46.2ns ± 3% 46.3ns ± 2% ~ (p=0.351 n=40+28) FmtFprintfString-4 82.0ns ± 3% 81.5ns ± 2% -0.69% (p=0.002 n=40+30) FmtFprintfInt-4 94.6ns ± 3% 94.6ns ± 6% ~ (p=0.913 n=39+37) FmtFprintfIntInt-4 147ns ± 3% 150ns ± 2% +1.72% (p=0.000 n=40+25) FmtFprintfPrefixedInt-4 186ns ± 3% 186ns ± 0% -0.33% (p=0.006 n=40+25) FmtFprintfFloat-4 388ns ± 4% 388ns ± 4% ~ (p=0.162 n=40+40) FmtManyArgs-4 612ns ± 3% 616ns ± 4% ~ (p=0.223 n=40+40) GobDecode-4 7.35ms ± 5% 7.42ms ± 5% ~ (p=0.095 n=40+40) GobEncode-4 7.21ms ± 8% 7.23ms ± 4% ~ (p=0.294 n=40+40) Gzip-4 360ms ± 4% 359ms ± 4% ~ (p=0.097 n=40+40) Gunzip-4 46.1ms ± 3% 45.6ms ± 3% -1.20% (p=0.000 n=40+40) HTTPClientServer-4 64.0µs ± 2% 64.1µs ± 2% ~ (p=0.648 n=39+40) JSONEncode-4 21.9ms ± 4% 22.1ms ± 5% ~ (p=0.086 n=40+40) JSONDecode-4 67.9ms ± 4% 66.7ms ± 4% -1.63% (p=0.000 n=40+40) Mandelbrot200-4 5.19ms ± 3% 5.17ms ± 3% ~ (p=0.881 n=40+40) GoParse-4 3.34ms ± 3% 3.28ms ± 2% -1.78% (p=0.000 n=40+40) RegexpMatchEasy0_32-4 101ns ± 5% 99ns ± 3% -2.40% (p=0.000 n=40+40) RegexpMatchEasy0_1K-4 851ns ± 1% 848ns ± 3% -0.36% (p=0.004 n=33+40) RegexpMatchEasy1_32-4 109ns ± 5% 105ns ± 3% -3.53% (p=0.000 n=39+40) RegexpMatchEasy1_1K-4 1.03µs ± 4% 1.03µs ± 3% ~ (p=0.638 n=40+38) RegexpMatchMedium_32-4 131ns ± 5% 127ns ± 4% -3.36% (p=0.000 n=38+40) RegexpMatchMedium_1K-4 43.4µs ± 4% 43.2µs ± 3% -0.46% (p=0.008 n=40+40) RegexpMatchHard_32-4 2.21µs ± 4% 2.23µs ± 1% +0.77% (p=0.014 n=40+28) RegexpMatchHard_1K-4 67.6µs ± 4% 67.7µs ± 3% +0.11% (p=0.016 n=40+40) Revcomp-4 1.86s ± 3% 1.77s ± 2% -4.81% (p=0.000 n=40+40) Template-4 71.7ms ± 3% 71.6ms ± 4% ~ (p=0.200 n=40+40) TimeParse-4 436ns ± 4% 433ns ± 3% ~ (p=0.358 n=40+40) TimeFormat-4 413ns ± 4% 412ns ± 3% ~ (p=0.415 n=40+40) [Geo mean] 63.9µs 63.6µs -0.49% name old speed new speed delta GobDecode-4 105MB/s ± 5% 104MB/s ± 5% ~ (p=0.096 n=40+40) GobEncode-4 106MB/s ± 7% 106MB/s ± 3% ~ (p=0.385 n=39+40) Gzip-4 54.0MB/s ± 4% 54.0MB/s ± 4% ~ (p=0.100 n=40+40) Gunzip-4 421MB/s ± 3% 426MB/s ± 3% +1.21% (p=0.000 n=40+40) JSONEncode-4 88.5MB/s ± 5% 88.0MB/s ± 5% ~ (p=0.083 n=40+40) JSONDecode-4 28.6MB/s ± 4% 29.1MB/s ± 4% +1.65% (p=0.000 n=40+40) GoParse-4 17.3MB/s ± 3% 17.7MB/s ± 2% +1.82% (p=0.000 n=40+40) RegexpMatchEasy0_32-4 316MB/s ± 5% 323MB/s ± 4% +2.44% (p=0.000 n=40+40) RegexpMatchEasy0_1K-4 1.20GB/s ± 1% 1.21GB/s ± 3% +0.40% (p=0.004 n=33+40) RegexpMatchEasy1_32-4 291MB/s ± 7% 302MB/s ± 4% +3.82% (p=0.000 n=40+40) RegexpMatchEasy1_1K-4 993MB/s ± 4% 990MB/s ± 3% ~ (p=0.623 n=40+38) RegexpMatchMedium_32-4 7.61MB/s ± 5% 7.87MB/s ± 4% +3.36% (p=0.000 n=38+40) RegexpMatchMedium_1K-4 23.6MB/s ± 4% 23.7MB/s ± 4% +0.46% (p=0.007 n=40+40) RegexpMatchHard_32-4 14.5MB/s ± 4% 14.3MB/s ± 1% -0.79% (p=0.017 n=40+28) RegexpMatchHard_1K-4 15.1MB/s ± 4% 15.1MB/s ± 3% -0.11% (p=0.015 n=40+40) Revcomp-4 137MB/s ± 3% 144MB/s ± 3% +5.06% (p=0.000 n=40+40) Template-4 27.1MB/s ± 3% 27.1MB/s ± 4% ~ (p=0.211 n=40+40) [Geo mean] 78.9MB/s 79.7MB/s +1.01% Change-Id: I638fa4fef85833e8605919d693f9570cc3cf7334 Reviewed-on: https://go-review.googlesource.com/107275 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
176 lines
3.7 KiB
Go
176 lines
3.7 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// This file contains codegen tests related to arithmetic
|
|
// simplifications and optimizations on integer types.
|
|
// For codegen tests on float types, see floats.go.
|
|
|
|
// ----------------- //
|
|
// Subtraction //
|
|
// ----------------- //
|
|
|
|
func SubMem(arr []int) int {
|
|
// 386:"SUBL\t4"
|
|
// amd64:"SUBQ\t8"
|
|
return arr[0] - arr[1]
|
|
}
|
|
|
|
// -------------------- //
|
|
// Multiplication //
|
|
// -------------------- //
|
|
|
|
func Pow2Muls(n1, n2 int) (int, int) {
|
|
// amd64:"SHLQ\t[$]5",-"IMULQ"
|
|
// 386:"SHLL\t[$]5",-"IMULL"
|
|
// arm:"SLL\t[$]5",-"MUL"
|
|
// arm64:"LSL\t[$]5",-"MUL"
|
|
a := n1 * 32
|
|
|
|
// amd64:"SHLQ\t[$]6",-"IMULQ"
|
|
// 386:"SHLL\t[$]6",-"IMULL"
|
|
// arm:"SLL\t[$]6",-"MUL"
|
|
// arm64:"LSL\t[$]6",-"MUL"
|
|
b := -64 * n2
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Mul_96(n int) int {
|
|
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`
|
|
return n * 96
|
|
}
|
|
|
|
// Multiplications merging tests
|
|
|
|
func MergeMuls1(n int) int {
|
|
// amd64:"IMUL3Q\t[$]46"
|
|
// 386:"IMUL3L\t[$]46"
|
|
return 15*n + 31*n // 46n
|
|
}
|
|
|
|
func MergeMuls2(n int) int {
|
|
// amd64:"IMUL3Q\t[$]23","ADDQ\t[$]29"
|
|
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
|
|
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
|
}
|
|
|
|
func MergeMuls3(a, n int) int {
|
|
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
|
|
return a*n + 19*n // (a+19)n
|
|
}
|
|
|
|
func MergeMuls4(n int) int {
|
|
// amd64:"IMUL3Q\t[$]14"
|
|
// 386:"IMUL3L\t[$]14"
|
|
return 23*n - 9*n // 14n
|
|
}
|
|
|
|
func MergeMuls5(a, n int) int {
|
|
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
|
|
return a*n - 19*n // (a-19)n
|
|
}
|
|
|
|
// -------------- //
|
|
// Division //
|
|
// -------------- //
|
|
|
|
func Pow2Divs(n1 uint, n2 int) (uint, int) {
|
|
// 386:"SHRL\t[$]5",-"DIVL"
|
|
// amd64:"SHRQ\t[$]5",-"DIVQ"
|
|
// arm:"SRL\t[$]5",-".*udiv"
|
|
// arm64:"LSR\t[$]5",-"UDIV"
|
|
a := n1 / 32 // unsigned
|
|
|
|
// amd64:"SARQ\t[$]6",-"IDIVQ"
|
|
// 386:"SARL\t[$]6",-"IDIVL"
|
|
// arm:"SRA\t[$]6",-".*udiv"
|
|
// arm64:"ASR\t[$]6",-"SDIV"
|
|
b := n2 / 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant divisions get turned into MULs
|
|
func ConstDivs(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
a := n1 / 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
b := n2 / 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Pow2Mods(n1 uint, n2 int) (uint, int) {
|
|
// 386:"ANDL\t[$]31",-"DIVL"
|
|
// amd64:"ANDQ\t[$]31",-"DIVQ"
|
|
// arm:"AND\t[$]31",-".*udiv"
|
|
// arm64:"AND\t[$]31",-"UDIV"
|
|
a := n1 % 32 // unsigned
|
|
|
|
// 386:-"IDIVL"
|
|
// amd64:-"IDIVQ"
|
|
// arm:-".*udiv"
|
|
// arm64:-"REM"
|
|
b := n2 % 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant modulo divs get turned into MULs
|
|
func ConstMods(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
a := n1 % 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
b := n2 % 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that len() and cap() calls divided by powers of two are
|
|
// optimized into shifts and ands
|
|
|
|
func LenDiv1(a []int) int {
|
|
// 386:"SHRL\t[$]10"
|
|
// amd64:"SHRQ\t[$]10"
|
|
return len(a) / 1024
|
|
}
|
|
|
|
func LenDiv2(s string) int {
|
|
// 386:"SHRL\t[$]11"
|
|
// amd64:"SHRQ\t[$]11"
|
|
return len(s) / (4097 >> 1)
|
|
}
|
|
|
|
func LenMod1(a []int) int {
|
|
// 386:"ANDL\t[$]1023"
|
|
// amd64:"ANDQ\t[$]1023"
|
|
return len(a) % 1024
|
|
}
|
|
|
|
func LenMod2(s string) int {
|
|
// 386:"ANDL\t[$]2047"
|
|
// amd64:"ANDQ\t[$]2047"
|
|
return len(s) % (4097 >> 1)
|
|
}
|
|
|
|
func CapDiv(a []int) int {
|
|
// 386:"SHRL\t[$]12"
|
|
// amd64:"SHRQ\t[$]12"
|
|
return cap(a) / ((1 << 11) + 2048)
|
|
}
|
|
|
|
func CapMod(a []int) int {
|
|
// 386:"ANDL\t[$]4095"
|
|
// amd64:"ANDQ\t[$]4095"
|
|
return cap(a) % ((1 << 11) + 2048)
|
|
}
|