mirror of
https://github.com/golang/go
synced 2024-11-17 20:24:46 -07:00
732f6fa9d5
We can rewrite ANDQ with an immediate fitting in 32bit with an ANDL, which is shorter to encode. Looking at Go binary itself, before the change there was: ANDL: 2337 ANDQ: 4476 After the change: ANDL: 3790 ANDQ: 3024 So we got rid of 1452 ANDQs This makes the Linux x86_64 binary 0.03% smaller. There seems to be an impact on performance. Intel Cascade Lake benchmarks (with perflock): name old time/op new time/op delta BinaryTree17-8 1.91s ± 1% 1.89s ± 1% -1.22% (p=0.000 n=21+18) Fannkuch11-8 2.34s ± 0% 2.34s ± 0% ~ (p=0.052 n=20+20) FmtFprintfEmpty-8 27.7ns ± 1% 27.4ns ± 3% ~ (p=0.497 n=21+21) FmtFprintfString-8 53.2ns ± 0% 51.5ns ± 0% -3.21% (p=0.000 n=20+19) FmtFprintfInt-8 57.3ns ± 0% 55.7ns ± 0% -2.89% (p=0.000 n=19+19) FmtFprintfIntInt-8 92.3ns ± 0% 88.4ns ± 1% -4.23% (p=0.000 n=20+21) FmtFprintfPrefixedInt-8 103ns ± 0% 103ns ± 0% +0.23% (p=0.000 n=20+21) FmtFprintfFloat-8 147ns ± 0% 148ns ± 0% +0.75% (p=0.000 n=20+21) FmtManyArgs-8 384ns ± 0% 381ns ± 0% -0.63% (p=0.000 n=21+21) GobDecode-8 3.86ms ± 1% 3.88ms ± 1% +0.52% (p=0.000 n=20+21) GobEncode-8 2.77ms ± 1% 2.77ms ± 0% ~ (p=0.078 n=21+21) Gzip-8 168ms ± 1% 168ms ± 0% +0.24% (p=0.000 n=20+20) Gunzip-8 25.1ms ± 0% 24.3ms ± 0% -3.03% (p=0.000 n=21+21) HTTPClientServer-8 61.4µs ± 8% 59.1µs ±10% ~ (p=0.088 n=20+21) JSONEncode-8 6.86ms ± 0% 6.70ms ± 0% -2.29% (p=0.000 n=20+19) JSONDecode-8 30.8ms ± 1% 30.6ms ± 1% -0.82% (p=0.000 n=20+20) Mandelbrot200-8 3.85ms ± 0% 3.85ms ± 0% ~ (p=0.191 n=16+17) GoParse-8 2.61ms ± 2% 2.60ms ± 1% ~ (p=0.561 n=21+20) RegexpMatchEasy0_32-8 48.5ns ± 2% 45.9ns ± 3% -5.26% (p=0.000 n=20+21) RegexpMatchEasy0_1K-8 139ns ± 0% 139ns ± 0% +0.27% (p=0.000 n=18+20) RegexpMatchEasy1_32-8 41.3ns ± 0% 42.1ns ± 4% +1.95% (p=0.000 n=17+21) RegexpMatchEasy1_1K-8 216ns ± 2% 216ns ± 0% +0.17% (p=0.020 n=21+19) RegexpMatchMedium_32-8 790ns ± 7% 803ns ± 8% ~ (p=0.178 n=21+21) RegexpMatchMedium_1K-8 23.5µs ± 5% 23.7µs ± 5% ~ (p=0.421 n=21+21) RegexpMatchHard_32-8 1.09µs ± 1% 1.09µs ± 1% -0.53% (p=0.000 n=19+18) RegexpMatchHard_1K-8 33.0µs ± 0% 33.0µs ± 0% ~ (p=0.610 n=21+20) Revcomp-8 348ms ± 0% 353ms ± 0% +1.38% (p=0.000 n=17+18) Template-8 42.0ms ± 1% 41.9ms ± 1% -0.30% (p=0.049 n=20+20) TimeParse-8 185ns ± 0% 185ns ± 0% ~ (p=0.387 n=20+18) TimeFormat-8 237ns ± 1% 241ns ± 1% +1.57% (p=0.000 n=21+21) [Geo mean] 35.4µs 35.2µs -0.66% name old speed new speed delta GobDecode-8 199MB/s ± 1% 198MB/s ± 1% -0.52% (p=0.000 n=20+21) GobEncode-8 277MB/s ± 1% 277MB/s ± 0% ~ (p=0.075 n=21+21) Gzip-8 116MB/s ± 1% 115MB/s ± 0% -0.25% (p=0.000 n=20+20) Gunzip-8 773MB/s ± 0% 797MB/s ± 0% +3.12% (p=0.000 n=21+21) JSONEncode-8 283MB/s ± 0% 290MB/s ± 0% +2.35% (p=0.000 n=20+19) JSONDecode-8 63.0MB/s ± 1% 63.5MB/s ± 1% +0.82% (p=0.000 n=20+20) GoParse-8 22.2MB/s ± 2% 22.3MB/s ± 1% ~ (p=0.539 n=21+20) RegexpMatchEasy0_32-8 660MB/s ± 2% 697MB/s ± 3% +5.57% (p=0.000 n=20+21) RegexpMatchEasy0_1K-8 7.36GB/s ± 0% 7.34GB/s ± 0% -0.26% (p=0.000 n=18+20) RegexpMatchEasy1_32-8 775MB/s ± 0% 761MB/s ± 4% -1.88% (p=0.000 n=17+21) RegexpMatchEasy1_1K-8 4.74GB/s ± 2% 4.74GB/s ± 0% -0.18% (p=0.020 n=21+19) RegexpMatchMedium_32-8 40.6MB/s ± 7% 39.9MB/s ± 9% ~ (p=0.191 n=21+21) RegexpMatchMedium_1K-8 43.7MB/s ± 5% 43.2MB/s ± 5% ~ (p=0.435 n=21+21) RegexpMatchHard_32-8 29.3MB/s ± 1% 29.4MB/s ± 1% +0.53% (p=0.000 n=19+18) RegexpMatchHard_1K-8 31.0MB/s ± 0% 31.0MB/s ± 0% ~ (p=0.572 n=21+20) Revcomp-8 730MB/s ± 0% 720MB/s ± 0% -1.36% (p=0.000 n=17+18) Template-8 46.2MB/s ± 1% 46.3MB/s ± 1% +0.30% (p=0.041 n=20+20) [Geo mean] 204MB/s 205MB/s +0.30% Change-Id: Iac75d0ec184a515ce0e65e19559d5fe2e9840514 Reviewed-on: https://go-review.googlesource.com/c/go/+/354970 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Trust: Josh Bleecher Snyder <josharian@gmail.com> Trust: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org>
578 lines
14 KiB
Go
578 lines
14 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// This file contains codegen tests related to arithmetic
|
|
// simplifications and optimizations on integer types.
|
|
// For codegen tests on float types, see floats.go.
|
|
|
|
// ----------------- //
|
|
// Subtraction //
|
|
// ----------------- //
|
|
|
|
var ef int
|
|
|
|
func SubMem(arr []int, b, c, d int) int {
|
|
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
|
|
arr[2] -= b
|
|
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
|
|
arr[3] -= b
|
|
// 386:`DECL\s16\([A-Z]+\)`
|
|
arr[4]--
|
|
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
|
|
arr[5] -= 20
|
|
// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
|
|
ef -= arr[b]
|
|
// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[c] -= b
|
|
// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[d] -= 15
|
|
// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[b]--
|
|
// amd64:`DECQ\s64\([A-Z]+\)`
|
|
arr[8]--
|
|
// 386:"SUBL\t4"
|
|
// amd64:"SUBQ\t8"
|
|
return arr[0] - arr[1]
|
|
}
|
|
|
|
func SubFromConst(a int) int {
|
|
// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
|
|
// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
|
|
b := 40 - a
|
|
return b
|
|
}
|
|
|
|
func SubFromConstNeg(a int) int {
|
|
// ppc64le: `ADD\t[$]40,\sR[0-9]+,\sR`
|
|
// ppc64: `ADD\t[$]40,\sR[0-9]+,\sR`
|
|
c := 40 - (-a)
|
|
return c
|
|
}
|
|
|
|
func SubSubFromConst(a int) int {
|
|
// ppc64le: `ADD\t[$]20,\sR[0-9]+,\sR`
|
|
// ppc64: `ADD\t[$]20,\sR[0-9]+,\sR`
|
|
c := 40 - (20 - a)
|
|
return c
|
|
}
|
|
|
|
func AddSubFromConst(a int) int {
|
|
// ppc64le: `SUBC\tR[0-9]+,\s[$]60,\sR`
|
|
// ppc64: `SUBC\tR[0-9]+,\s[$]60,\sR`
|
|
c := 40 + (20 - a)
|
|
return c
|
|
}
|
|
|
|
func NegSubFromConst(a int) int {
|
|
// ppc64le: `ADD\t[$]-20,\sR[0-9]+,\sR`
|
|
// ppc64: `ADD\t[$]-20,\sR[0-9]+,\sR`
|
|
c := -(20 - a)
|
|
return c
|
|
}
|
|
|
|
func NegAddFromConstNeg(a int) int {
|
|
// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
|
|
// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
|
|
c := -(-40 + a)
|
|
return c
|
|
}
|
|
|
|
func SubSubNegSimplify(a, b int) int {
|
|
// amd64:"NEGQ"
|
|
r := (a - b) - a
|
|
return r
|
|
}
|
|
|
|
func SubAddSimplify(a, b int) int {
|
|
// amd64:-"SUBQ",-"ADDQ"
|
|
r := a + (b - a)
|
|
return r
|
|
}
|
|
|
|
func SubAddNegSimplify(a, b int) int {
|
|
// amd64:"NEGQ",-"ADDQ",-"SUBQ"
|
|
r := a - (b + a)
|
|
return r
|
|
}
|
|
|
|
func AddAddSubSimplify(a, b, c int) int {
|
|
// amd64:-"SUBQ"
|
|
r := a + (b + (c - a))
|
|
return r
|
|
}
|
|
|
|
// -------------------- //
|
|
// Multiplication //
|
|
// -------------------- //
|
|
|
|
func Pow2Muls(n1, n2 int) (int, int) {
|
|
// amd64:"SHLQ\t[$]5",-"IMULQ"
|
|
// 386:"SHLL\t[$]5",-"IMULL"
|
|
// arm:"SLL\t[$]5",-"MUL"
|
|
// arm64:"LSL\t[$]5",-"MUL"
|
|
// ppc64:"SLD\t[$]5",-"MUL"
|
|
// ppc64le:"SLD\t[$]5",-"MUL"
|
|
a := n1 * 32
|
|
|
|
// amd64:"SHLQ\t[$]6",-"IMULQ"
|
|
// 386:"SHLL\t[$]6",-"IMULL"
|
|
// arm:"SLL\t[$]6",-"MUL"
|
|
// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
|
|
// ppc64:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
|
|
// ppc64le:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
|
|
b := -64 * n2
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Mul_96(n int) int {
|
|
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
|
|
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
|
|
// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
|
|
// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
|
|
// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
|
|
return n * 96
|
|
}
|
|
|
|
func Mul_n120(n int) int {
|
|
// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
|
|
return n * -120
|
|
}
|
|
|
|
func MulMemSrc(a []uint32, b []float32) {
|
|
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
|
|
a[0] *= a[1]
|
|
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
|
// amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
|
b[0] *= b[1]
|
|
}
|
|
|
|
// Multiplications merging tests
|
|
|
|
func MergeMuls1(n int) int {
|
|
// amd64:"IMUL3Q\t[$]46"
|
|
// 386:"IMUL3L\t[$]46"
|
|
return 15*n + 31*n // 46n
|
|
}
|
|
|
|
func MergeMuls2(n int) int {
|
|
// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
|
|
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
|
|
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
|
}
|
|
|
|
func MergeMuls3(a, n int) int {
|
|
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
|
|
return a*n + 19*n // (a+19)n
|
|
}
|
|
|
|
func MergeMuls4(n int) int {
|
|
// amd64:"IMUL3Q\t[$]14"
|
|
// 386:"IMUL3L\t[$]14"
|
|
return 23*n - 9*n // 14n
|
|
}
|
|
|
|
func MergeMuls5(a, n int) int {
|
|
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
|
|
return a*n - 19*n // (a-19)n
|
|
}
|
|
|
|
// -------------- //
|
|
// Division //
|
|
// -------------- //
|
|
|
|
func DivMemSrc(a []float64) {
|
|
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
|
// amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
|
a[0] /= a[1]
|
|
}
|
|
|
|
func Pow2Divs(n1 uint, n2 int) (uint, int) {
|
|
// 386:"SHRL\t[$]5",-"DIVL"
|
|
// amd64:"SHRQ\t[$]5",-"DIVQ"
|
|
// arm:"SRL\t[$]5",-".*udiv"
|
|
// arm64:"LSR\t[$]5",-"UDIV"
|
|
// ppc64:"SRD"
|
|
// ppc64le:"SRD"
|
|
a := n1 / 32 // unsigned
|
|
|
|
// amd64:"SARQ\t[$]6",-"IDIVQ"
|
|
// 386:"SARL\t[$]6",-"IDIVL"
|
|
// arm:"SRA\t[$]6",-".*udiv"
|
|
// arm64:"ASR\t[$]6",-"SDIV"
|
|
// ppc64:"SRAD"
|
|
// ppc64le:"SRAD"
|
|
b := n2 / 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant divisions get turned into MULs
|
|
func ConstDivs(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
|
|
// arm64:`MOVD`,`UMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
a := n1 / 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
|
|
// arm64:`SMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
b := n2 / 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
func FloatDivs(a []float32) float32 {
|
|
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
|
|
// 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
|
|
return a[1] / a[2]
|
|
}
|
|
|
|
func Pow2Mods(n1 uint, n2 int) (uint, int) {
|
|
// 386:"ANDL\t[$]31",-"DIVL"
|
|
// amd64:"ANDL\t[$]31",-"DIVQ"
|
|
// arm:"AND\t[$]31",-".*udiv"
|
|
// arm64:"AND\t[$]31",-"UDIV"
|
|
// ppc64:"ANDCC\t[$]31"
|
|
// ppc64le:"ANDCC\t[$]31"
|
|
a := n1 % 32 // unsigned
|
|
|
|
// 386:"SHRL",-"IDIVL"
|
|
// amd64:"SHRQ",-"IDIVQ"
|
|
// arm:"SRA",-".*udiv"
|
|
// arm64:"ASR",-"REM"
|
|
// ppc64:"SRAD"
|
|
// ppc64le:"SRAD"
|
|
b := n2 % 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that signed divisibility checks get converted to AND on low bits
|
|
func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
|
|
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
|
|
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
|
|
// arm:"AND\t[$]63",-".*udiv",-"SRA"
|
|
// arm64:"AND\t[$]63",-"UDIV",-"ASR"
|
|
// ppc64:"ANDCC\t[$]63",-"SRAD"
|
|
// ppc64le:"ANDCC\t[$]63",-"SRAD"
|
|
a := n1%64 == 0 // signed divisible
|
|
|
|
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
|
|
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
|
|
// arm:"AND\t[$]63",-".*udiv",-"SRA"
|
|
// arm64:"AND\t[$]63",-"UDIV",-"ASR"
|
|
// ppc64:"ANDCC\t[$]63",-"SRAD"
|
|
// ppc64le:"ANDCC\t[$]63",-"SRAD"
|
|
b := n2%64 != 0 // signed indivisible
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant modulo divs get turned into MULs
|
|
func ConstMods(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
|
|
// arm64:`MOVD`,`UMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
a := n1 % 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
|
|
// arm64:`SMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
b := n2 % 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that divisibility checks x%c==0 are converted to MULs and rotates
|
|
func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
|
|
// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
|
|
// arm64:"MOVD\t[$]-6148914691236517205","MUL","ROR",-"DIV"
|
|
// arm:"MUL","CMP\t[$]715827882",-".*udiv"
|
|
// ppc64:"MULLD","ROTL\t[$]63"
|
|
// ppc64le:"MULLD","ROTL\t[$]63"
|
|
evenU := n1%6 == 0
|
|
|
|
// amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
|
|
// arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
|
|
// arm:"MUL","CMP\t[$]226050910",-".*udiv"
|
|
// ppc64:"MULLD",-"ROTL"
|
|
// ppc64le:"MULLD",-"ROTL"
|
|
oddU := n1%19 == 0
|
|
|
|
// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
|
|
// arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
|
|
// arm:"MUL","ADD\t[$]715827882",-".*udiv"
|
|
// ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
|
|
// ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
|
|
// ppc64/power9:"MADDLD","ROTL\t[$]63"
|
|
// ppc64le/power9:"MADDLD","ROTL\t[$]63"
|
|
evenS := n2%6 == 0
|
|
|
|
// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
|
|
// arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
|
|
// arm:"MUL","ADD\t[$]113025455",-".*udiv"
|
|
// ppc64/power8:"MULLD","ADD",-"ROTL"
|
|
// ppc64/power9:"MADDLD",-"ROTL"
|
|
// ppc64le/power8:"MULLD","ADD",-"ROTL"
|
|
// ppc64le/power9:"MADDLD",-"ROTL"
|
|
oddS := n2%19 == 0
|
|
|
|
return evenU, oddU, evenS, oddS
|
|
}
|
|
|
|
// Check that fix-up code is not generated for divisions where it has been proven that
|
|
// that the divisor is not -1 or that the dividend is > MinIntNN.
|
|
func NoFix64A(divr int64) (int64, int64) {
|
|
var d int64 = 42
|
|
var e int64 = 84
|
|
if divr > 5 {
|
|
d /= divr // amd64:-"JMP"
|
|
e %= divr // amd64:-"JMP"
|
|
// The following statement is to avoid conflict between the above check
|
|
// and the normal JMP generated at the end of the block.
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix64B(divd int64) (int64, int64) {
|
|
var d int64
|
|
var e int64
|
|
var divr int64 = -1
|
|
if divd > -9223372036854775808 {
|
|
d = divd / divr // amd64:-"JMP"
|
|
e = divd % divr // amd64:-"JMP"
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix32A(divr int32) (int32, int32) {
|
|
var d int32 = 42
|
|
var e int32 = 84
|
|
if divr > 5 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d /= divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e %= divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix32B(divd int32) (int32, int32) {
|
|
var d int32
|
|
var e int32
|
|
var divr int32 = -1
|
|
if divd > -2147483648 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d = divd / divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e = divd % divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix16A(divr int16) (int16, int16) {
|
|
var d int16 = 42
|
|
var e int16 = 84
|
|
if divr > 5 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d /= divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e %= divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix16B(divd int16) (int16, int16) {
|
|
var d int16
|
|
var e int16
|
|
var divr int16 = -1
|
|
if divd > -32768 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d = divd / divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e = divd % divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
// Check that len() and cap() calls divided by powers of two are
|
|
// optimized into shifts and ands
|
|
|
|
func LenDiv1(a []int) int {
|
|
// 386:"SHRL\t[$]10"
|
|
// amd64:"SHRQ\t[$]10"
|
|
// arm64:"LSR\t[$]10",-"SDIV"
|
|
// arm:"SRL\t[$]10",-".*udiv"
|
|
// ppc64:"SRD"\t[$]10"
|
|
// ppc64le:"SRD"\t[$]10"
|
|
return len(a) / 1024
|
|
}
|
|
|
|
func LenDiv2(s string) int {
|
|
// 386:"SHRL\t[$]11"
|
|
// amd64:"SHRQ\t[$]11"
|
|
// arm64:"LSR\t[$]11",-"SDIV"
|
|
// arm:"SRL\t[$]11",-".*udiv"
|
|
// ppc64:"SRD\t[$]11"
|
|
// ppc64le:"SRD\t[$]11"
|
|
return len(s) / (4097 >> 1)
|
|
}
|
|
|
|
func LenMod1(a []int) int {
|
|
// 386:"ANDL\t[$]1023"
|
|
// amd64:"ANDL\t[$]1023"
|
|
// arm64:"AND\t[$]1023",-"SDIV"
|
|
// arm/6:"AND",-".*udiv"
|
|
// arm/7:"BFC",-".*udiv",-"AND"
|
|
// ppc64:"ANDCC\t[$]1023"
|
|
// ppc64le:"ANDCC\t[$]1023"
|
|
return len(a) % 1024
|
|
}
|
|
|
|
func LenMod2(s string) int {
|
|
// 386:"ANDL\t[$]2047"
|
|
// amd64:"ANDL\t[$]2047"
|
|
// arm64:"AND\t[$]2047",-"SDIV"
|
|
// arm/6:"AND",-".*udiv"
|
|
// arm/7:"BFC",-".*udiv",-"AND"
|
|
// ppc64:"ANDCC\t[$]2047"
|
|
// ppc64le:"ANDCC\t[$]2047"
|
|
return len(s) % (4097 >> 1)
|
|
}
|
|
|
|
func CapDiv(a []int) int {
|
|
// 386:"SHRL\t[$]12"
|
|
// amd64:"SHRQ\t[$]12"
|
|
// arm64:"LSR\t[$]12",-"SDIV"
|
|
// arm:"SRL\t[$]12",-".*udiv"
|
|
// ppc64:"SRD\t[$]12"
|
|
// ppc64le:"SRD\t[$]12"
|
|
return cap(a) / ((1 << 11) + 2048)
|
|
}
|
|
|
|
func CapMod(a []int) int {
|
|
// 386:"ANDL\t[$]4095"
|
|
// amd64:"ANDL\t[$]4095"
|
|
// arm64:"AND\t[$]4095",-"SDIV"
|
|
// arm/6:"AND",-".*udiv"
|
|
// arm/7:"BFC",-".*udiv",-"AND"
|
|
// ppc64:"ANDCC\t[$]4095"
|
|
// ppc64le:"ANDCC\t[$]4095"
|
|
return cap(a) % ((1 << 11) + 2048)
|
|
}
|
|
|
|
func AddMul(x int) int {
|
|
// amd64:"LEAQ\t1"
|
|
return 2*x + 1
|
|
}
|
|
|
|
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm:`MULA`,-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r0 := a*b + c
|
|
// arm:`MULA`,-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r1 := c*79 + a
|
|
// arm:`ADD`,-`MULA`,-`MUL\s`
|
|
// arm64:`ADD`,-`MADD`,-`MULW`
|
|
r2 := b*64 + c
|
|
return r0, r1, r2
|
|
}
|
|
|
|
func MULS(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm/7:`MULS`,-`MUL\s`
|
|
// arm/6:`SUB`,`MUL\s`,-`MULS`
|
|
// arm64:`MSUBW`,-`MULW`
|
|
r0 := c - a*b
|
|
// arm/7:`MULS`,-`MUL\s`
|
|
// arm/6:`SUB`,`MUL\s`,-`MULS`
|
|
// arm64:`MSUBW`,-`MULW`
|
|
r1 := a - c*79
|
|
// arm/7:`SUB`,-`MULS`,-`MUL\s`
|
|
// arm64:`SUB`,-`MSUBW`,-`MULW`
|
|
r2 := c - b*64
|
|
return r0, r1, r2
|
|
}
|
|
|
|
func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// amd64:`INCL`
|
|
a++
|
|
// amd64:`DECL`
|
|
b--
|
|
// amd64:`SUBL.*-128`
|
|
c += 128
|
|
return a, b, c
|
|
}
|
|
|
|
// Divide -> shift rules usually require fixup for negative inputs.
|
|
// If the input is non-negative, make sure the fixup is eliminated.
|
|
func divInt(v int64) int64 {
|
|
if v < 0 {
|
|
return 0
|
|
}
|
|
// amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
|
|
return v / 512
|
|
}
|
|
|
|
// The reassociate rules "x - (z + C) -> (x - z) - C" and
|
|
// "(z + C) -x -> C + (z - x)" can optimize the following cases.
|
|
func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
|
|
// arm64:"SUB","ADD\t[$]2"
|
|
r0 := (i0 + 3) - (j0 + 1)
|
|
// arm64:"SUB","SUB\t[$]4"
|
|
r1 := (i1 - 3) - (j1 + 1)
|
|
// arm64:"SUB","ADD\t[$]4"
|
|
r2 := (i2 + 3) - (j2 - 1)
|
|
// arm64:"SUB","SUB\t[$]2"
|
|
r3 := (i3 - 3) - (j3 - 1)
|
|
return r0, r1, r2, r3
|
|
}
|
|
|
|
// The reassociate rules "x - (z + C) -> (x - z) - C" and
|
|
// "(C - z) - x -> C - (z + x)" can optimize the following cases.
|
|
func constantFold2(i0, j0, i1, j1 int) (int, int) {
|
|
// arm64:"ADD","MOVD\t[$]2","SUB"
|
|
r0 := (3 - i0) - (j0 + 1)
|
|
// arm64:"ADD","MOVD\t[$]4","SUB"
|
|
r1 := (3 - i1) - (j1 - 1)
|
|
return r0, r1
|
|
}
|
|
|
|
func constantFold3(i, j int) int {
|
|
// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
|
|
r := (5 * i) * (6 * j)
|
|
return r
|
|
}
|