1
0
mirror of https://github.com/golang/go synced 2024-11-24 12:40:12 -07:00
go/test/codegen/arithmetic.go
Jake Ciolek 732f6fa9d5 cmd/compile: use ANDL for small immediates
We can rewrite ANDQ with an immediate fitting in 32bit with an ANDL, which is shorter to encode.

Looking at Go binary itself, before the change there was:

ANDL: 2337
ANDQ: 4476

After the change:

ANDL: 3790
ANDQ: 3024

So we got rid of 1452 ANDQs

This makes the Linux x86_64 binary 0.03% smaller.

There seems to be an impact on performance.

Intel Cascade Lake benchmarks (with perflock):

name                     old time/op    new time/op    delta
BinaryTree17-8              1.91s ± 1%     1.89s ± 1%  -1.22%  (p=0.000 n=21+18)
Fannkuch11-8                2.34s ± 0%     2.34s ± 0%    ~     (p=0.052 n=20+20)
FmtFprintfEmpty-8          27.7ns ± 1%    27.4ns ± 3%    ~     (p=0.497 n=21+21)
FmtFprintfString-8         53.2ns ± 0%    51.5ns ± 0%  -3.21%  (p=0.000 n=20+19)
FmtFprintfInt-8            57.3ns ± 0%    55.7ns ± 0%  -2.89%  (p=0.000 n=19+19)
FmtFprintfIntInt-8         92.3ns ± 0%    88.4ns ± 1%  -4.23%  (p=0.000 n=20+21)
FmtFprintfPrefixedInt-8     103ns ± 0%     103ns ± 0%  +0.23%  (p=0.000 n=20+21)
FmtFprintfFloat-8           147ns ± 0%     148ns ± 0%  +0.75%  (p=0.000 n=20+21)
FmtManyArgs-8               384ns ± 0%     381ns ± 0%  -0.63%  (p=0.000 n=21+21)
GobDecode-8                3.86ms ± 1%    3.88ms ± 1%  +0.52%  (p=0.000 n=20+21)
GobEncode-8                2.77ms ± 1%    2.77ms ± 0%    ~     (p=0.078 n=21+21)
Gzip-8                      168ms ± 1%     168ms ± 0%  +0.24%  (p=0.000 n=20+20)
Gunzip-8                   25.1ms ± 0%    24.3ms ± 0%  -3.03%  (p=0.000 n=21+21)
HTTPClientServer-8         61.4µs ± 8%    59.1µs ±10%    ~     (p=0.088 n=20+21)
JSONEncode-8               6.86ms ± 0%    6.70ms ± 0%  -2.29%  (p=0.000 n=20+19)
JSONDecode-8               30.8ms ± 1%    30.6ms ± 1%  -0.82%  (p=0.000 n=20+20)
Mandelbrot200-8            3.85ms ± 0%    3.85ms ± 0%    ~     (p=0.191 n=16+17)
GoParse-8                  2.61ms ± 2%    2.60ms ± 1%    ~     (p=0.561 n=21+20)
RegexpMatchEasy0_32-8      48.5ns ± 2%    45.9ns ± 3%  -5.26%  (p=0.000 n=20+21)
RegexpMatchEasy0_1K-8       139ns ± 0%     139ns ± 0%  +0.27%  (p=0.000 n=18+20)
RegexpMatchEasy1_32-8      41.3ns ± 0%    42.1ns ± 4%  +1.95%  (p=0.000 n=17+21)
RegexpMatchEasy1_1K-8       216ns ± 2%     216ns ± 0%  +0.17%  (p=0.020 n=21+19)
RegexpMatchMedium_32-8      790ns ± 7%     803ns ± 8%    ~     (p=0.178 n=21+21)
RegexpMatchMedium_1K-8     23.5µs ± 5%    23.7µs ± 5%    ~     (p=0.421 n=21+21)
RegexpMatchHard_32-8       1.09µs ± 1%    1.09µs ± 1%  -0.53%  (p=0.000 n=19+18)
RegexpMatchHard_1K-8       33.0µs ± 0%    33.0µs ± 0%    ~     (p=0.610 n=21+20)
Revcomp-8                   348ms ± 0%     353ms ± 0%  +1.38%  (p=0.000 n=17+18)
Template-8                 42.0ms ± 1%    41.9ms ± 1%  -0.30%  (p=0.049 n=20+20)
TimeParse-8                 185ns ± 0%     185ns ± 0%    ~     (p=0.387 n=20+18)
TimeFormat-8                237ns ± 1%     241ns ± 1%  +1.57%  (p=0.000 n=21+21)
[Geo mean]                 35.4µs         35.2µs       -0.66%

name                     old speed      new speed      delta
GobDecode-8               199MB/s ± 1%   198MB/s ± 1%  -0.52%  (p=0.000 n=20+21)
GobEncode-8               277MB/s ± 1%   277MB/s ± 0%    ~     (p=0.075 n=21+21)
Gzip-8                    116MB/s ± 1%   115MB/s ± 0%  -0.25%  (p=0.000 n=20+20)
Gunzip-8                  773MB/s ± 0%   797MB/s ± 0%  +3.12%  (p=0.000 n=21+21)
JSONEncode-8              283MB/s ± 0%   290MB/s ± 0%  +2.35%  (p=0.000 n=20+19)
JSONDecode-8             63.0MB/s ± 1%  63.5MB/s ± 1%  +0.82%  (p=0.000 n=20+20)
GoParse-8                22.2MB/s ± 2%  22.3MB/s ± 1%    ~     (p=0.539 n=21+20)
RegexpMatchEasy0_32-8     660MB/s ± 2%   697MB/s ± 3%  +5.57%  (p=0.000 n=20+21)
RegexpMatchEasy0_1K-8    7.36GB/s ± 0%  7.34GB/s ± 0%  -0.26%  (p=0.000 n=18+20)
RegexpMatchEasy1_32-8     775MB/s ± 0%   761MB/s ± 4%  -1.88%  (p=0.000 n=17+21)
RegexpMatchEasy1_1K-8    4.74GB/s ± 2%  4.74GB/s ± 0%  -0.18%  (p=0.020 n=21+19)
RegexpMatchMedium_32-8   40.6MB/s ± 7%  39.9MB/s ± 9%    ~     (p=0.191 n=21+21)
RegexpMatchMedium_1K-8   43.7MB/s ± 5%  43.2MB/s ± 5%    ~     (p=0.435 n=21+21)
RegexpMatchHard_32-8     29.3MB/s ± 1%  29.4MB/s ± 1%  +0.53%  (p=0.000 n=19+18)
RegexpMatchHard_1K-8     31.0MB/s ± 0%  31.0MB/s ± 0%    ~     (p=0.572 n=21+20)
Revcomp-8                 730MB/s ± 0%   720MB/s ± 0%  -1.36%  (p=0.000 n=17+18)
Template-8               46.2MB/s ± 1%  46.3MB/s ± 1%  +0.30%  (p=0.041 n=20+20)
[Geo mean]                204MB/s        205MB/s       +0.30%

Change-Id: Iac75d0ec184a515ce0e65e19559d5fe2e9840514
Reviewed-on: https://go-review.googlesource.com/c/go/+/354970
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Trust: Josh Bleecher Snyder <josharian@gmail.com>
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
2021-10-12 22:02:39 +00:00

578 lines
14 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// This file contains codegen tests related to arithmetic
// simplifications and optimizations on integer types.
// For codegen tests on float types, see floats.go.
// ----------------- //
// Subtraction //
// ----------------- //
var ef int
func SubMem(arr []int, b, c, d int) int {
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
arr[2] -= b
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
arr[3] -= b
// 386:`DECL\s16\([A-Z]+\)`
arr[4]--
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
arr[5] -= 20
// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
ef -= arr[b]
// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[c] -= b
// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[d] -= 15
// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[b]--
// amd64:`DECQ\s64\([A-Z]+\)`
arr[8]--
// 386:"SUBL\t4"
// amd64:"SUBQ\t8"
return arr[0] - arr[1]
}
func SubFromConst(a int) int {
// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
b := 40 - a
return b
}
func SubFromConstNeg(a int) int {
// ppc64le: `ADD\t[$]40,\sR[0-9]+,\sR`
// ppc64: `ADD\t[$]40,\sR[0-9]+,\sR`
c := 40 - (-a)
return c
}
func SubSubFromConst(a int) int {
// ppc64le: `ADD\t[$]20,\sR[0-9]+,\sR`
// ppc64: `ADD\t[$]20,\sR[0-9]+,\sR`
c := 40 - (20 - a)
return c
}
func AddSubFromConst(a int) int {
// ppc64le: `SUBC\tR[0-9]+,\s[$]60,\sR`
// ppc64: `SUBC\tR[0-9]+,\s[$]60,\sR`
c := 40 + (20 - a)
return c
}
func NegSubFromConst(a int) int {
// ppc64le: `ADD\t[$]-20,\sR[0-9]+,\sR`
// ppc64: `ADD\t[$]-20,\sR[0-9]+,\sR`
c := -(20 - a)
return c
}
func NegAddFromConstNeg(a int) int {
// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
c := -(-40 + a)
return c
}
func SubSubNegSimplify(a, b int) int {
// amd64:"NEGQ"
r := (a - b) - a
return r
}
func SubAddSimplify(a, b int) int {
// amd64:-"SUBQ",-"ADDQ"
r := a + (b - a)
return r
}
func SubAddNegSimplify(a, b int) int {
// amd64:"NEGQ",-"ADDQ",-"SUBQ"
r := a - (b + a)
return r
}
func AddAddSubSimplify(a, b, c int) int {
// amd64:-"SUBQ"
r := a + (b + (c - a))
return r
}
// -------------------- //
// Multiplication //
// -------------------- //
func Pow2Muls(n1, n2 int) (int, int) {
// amd64:"SHLQ\t[$]5",-"IMULQ"
// 386:"SHLL\t[$]5",-"IMULL"
// arm:"SLL\t[$]5",-"MUL"
// arm64:"LSL\t[$]5",-"MUL"
// ppc64:"SLD\t[$]5",-"MUL"
// ppc64le:"SLD\t[$]5",-"MUL"
a := n1 * 32
// amd64:"SHLQ\t[$]6",-"IMULQ"
// 386:"SHLL\t[$]6",-"IMULL"
// arm:"SLL\t[$]6",-"MUL"
// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
// ppc64:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
// ppc64le:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
b := -64 * n2
return a, b
}
func Mul_96(n int) int {
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
return n * 96
}
func Mul_n120(n int) int {
// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
return n * -120
}
func MulMemSrc(a []uint32, b []float32) {
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
a[0] *= a[1]
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
// amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
b[0] *= b[1]
}
// Multiplications merging tests
func MergeMuls1(n int) int {
// amd64:"IMUL3Q\t[$]46"
// 386:"IMUL3L\t[$]46"
return 15*n + 31*n // 46n
}
func MergeMuls2(n int) int {
// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
}
func MergeMuls3(a, n int) int {
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
return a*n + 19*n // (a+19)n
}
func MergeMuls4(n int) int {
// amd64:"IMUL3Q\t[$]14"
// 386:"IMUL3L\t[$]14"
return 23*n - 9*n // 14n
}
func MergeMuls5(a, n int) int {
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
return a*n - 19*n // (a-19)n
}
// -------------- //
// Division //
// -------------- //
func DivMemSrc(a []float64) {
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
// amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
a[0] /= a[1]
}
func Pow2Divs(n1 uint, n2 int) (uint, int) {
// 386:"SHRL\t[$]5",-"DIVL"
// amd64:"SHRQ\t[$]5",-"DIVQ"
// arm:"SRL\t[$]5",-".*udiv"
// arm64:"LSR\t[$]5",-"UDIV"
// ppc64:"SRD"
// ppc64le:"SRD"
a := n1 / 32 // unsigned
// amd64:"SARQ\t[$]6",-"IDIVQ"
// 386:"SARL\t[$]6",-"IDIVL"
// arm:"SRA\t[$]6",-".*udiv"
// arm64:"ASR\t[$]6",-"SDIV"
// ppc64:"SRAD"
// ppc64le:"SRAD"
b := n2 / 64 // signed
return a, b
}
// Check that constant divisions get turned into MULs
func ConstDivs(n1 uint, n2 int) (uint, int) {
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
// arm64:`MOVD`,`UMULH`,-`DIV`
// arm:`MOVW`,`MUL`,-`.*udiv`
a := n1 / 17 // unsigned
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
// arm64:`SMULH`,-`DIV`
// arm:`MOVW`,`MUL`,-`.*udiv`
b := n2 / 17 // signed
return a, b
}
func FloatDivs(a []float32) float32 {
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
// 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
return a[1] / a[2]
}
func Pow2Mods(n1 uint, n2 int) (uint, int) {
// 386:"ANDL\t[$]31",-"DIVL"
// amd64:"ANDL\t[$]31",-"DIVQ"
// arm:"AND\t[$]31",-".*udiv"
// arm64:"AND\t[$]31",-"UDIV"
// ppc64:"ANDCC\t[$]31"
// ppc64le:"ANDCC\t[$]31"
a := n1 % 32 // unsigned
// 386:"SHRL",-"IDIVL"
// amd64:"SHRQ",-"IDIVQ"
// arm:"SRA",-".*udiv"
// arm64:"ASR",-"REM"
// ppc64:"SRAD"
// ppc64le:"SRAD"
b := n2 % 64 // signed
return a, b
}
// Check that signed divisibility checks get converted to AND on low bits
func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
// arm:"AND\t[$]63",-".*udiv",-"SRA"
// arm64:"AND\t[$]63",-"UDIV",-"ASR"
// ppc64:"ANDCC\t[$]63",-"SRAD"
// ppc64le:"ANDCC\t[$]63",-"SRAD"
a := n1%64 == 0 // signed divisible
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
// arm:"AND\t[$]63",-".*udiv",-"SRA"
// arm64:"AND\t[$]63",-"UDIV",-"ASR"
// ppc64:"ANDCC\t[$]63",-"SRAD"
// ppc64le:"ANDCC\t[$]63",-"SRAD"
b := n2%64 != 0 // signed indivisible
return a, b
}
// Check that constant modulo divs get turned into MULs
func ConstMods(n1 uint, n2 int) (uint, int) {
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
// arm64:`MOVD`,`UMULH`,-`DIV`
// arm:`MOVW`,`MUL`,-`.*udiv`
a := n1 % 17 // unsigned
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
// arm64:`SMULH`,-`DIV`
// arm:`MOVW`,`MUL`,-`.*udiv`
b := n2 % 17 // signed
return a, b
}
// Check that divisibility checks x%c==0 are converted to MULs and rotates
func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
// arm64:"MOVD\t[$]-6148914691236517205","MUL","ROR",-"DIV"
// arm:"MUL","CMP\t[$]715827882",-".*udiv"
// ppc64:"MULLD","ROTL\t[$]63"
// ppc64le:"MULLD","ROTL\t[$]63"
evenU := n1%6 == 0
// amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
// 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
// arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
// arm:"MUL","CMP\t[$]226050910",-".*udiv"
// ppc64:"MULLD",-"ROTL"
// ppc64le:"MULLD",-"ROTL"
oddU := n1%19 == 0
// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
// arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
// arm:"MUL","ADD\t[$]715827882",-".*udiv"
// ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
// ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
// ppc64/power9:"MADDLD","ROTL\t[$]63"
// ppc64le/power9:"MADDLD","ROTL\t[$]63"
evenS := n2%6 == 0
// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
// arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
// arm:"MUL","ADD\t[$]113025455",-".*udiv"
// ppc64/power8:"MULLD","ADD",-"ROTL"
// ppc64/power9:"MADDLD",-"ROTL"
// ppc64le/power8:"MULLD","ADD",-"ROTL"
// ppc64le/power9:"MADDLD",-"ROTL"
oddS := n2%19 == 0
return evenU, oddU, evenS, oddS
}
// Check that fix-up code is not generated for divisions where it has been proven that
// that the divisor is not -1 or that the dividend is > MinIntNN.
func NoFix64A(divr int64) (int64, int64) {
var d int64 = 42
var e int64 = 84
if divr > 5 {
d /= divr // amd64:-"JMP"
e %= divr // amd64:-"JMP"
// The following statement is to avoid conflict between the above check
// and the normal JMP generated at the end of the block.
d += e
}
return d, e
}
func NoFix64B(divd int64) (int64, int64) {
var d int64
var e int64
var divr int64 = -1
if divd > -9223372036854775808 {
d = divd / divr // amd64:-"JMP"
e = divd % divr // amd64:-"JMP"
d += e
}
return d, e
}
func NoFix32A(divr int32) (int32, int32) {
var d int32 = 42
var e int32 = 84
if divr > 5 {
// amd64:-"JMP"
// 386:-"JMP"
d /= divr
// amd64:-"JMP"
// 386:-"JMP"
e %= divr
d += e
}
return d, e
}
func NoFix32B(divd int32) (int32, int32) {
var d int32
var e int32
var divr int32 = -1
if divd > -2147483648 {
// amd64:-"JMP"
// 386:-"JMP"
d = divd / divr
// amd64:-"JMP"
// 386:-"JMP"
e = divd % divr
d += e
}
return d, e
}
func NoFix16A(divr int16) (int16, int16) {
var d int16 = 42
var e int16 = 84
if divr > 5 {
// amd64:-"JMP"
// 386:-"JMP"
d /= divr
// amd64:-"JMP"
// 386:-"JMP"
e %= divr
d += e
}
return d, e
}
func NoFix16B(divd int16) (int16, int16) {
var d int16
var e int16
var divr int16 = -1
if divd > -32768 {
// amd64:-"JMP"
// 386:-"JMP"
d = divd / divr
// amd64:-"JMP"
// 386:-"JMP"
e = divd % divr
d += e
}
return d, e
}
// Check that len() and cap() calls divided by powers of two are
// optimized into shifts and ands
func LenDiv1(a []int) int {
// 386:"SHRL\t[$]10"
// amd64:"SHRQ\t[$]10"
// arm64:"LSR\t[$]10",-"SDIV"
// arm:"SRL\t[$]10",-".*udiv"
// ppc64:"SRD"\t[$]10"
// ppc64le:"SRD"\t[$]10"
return len(a) / 1024
}
func LenDiv2(s string) int {
// 386:"SHRL\t[$]11"
// amd64:"SHRQ\t[$]11"
// arm64:"LSR\t[$]11",-"SDIV"
// arm:"SRL\t[$]11",-".*udiv"
// ppc64:"SRD\t[$]11"
// ppc64le:"SRD\t[$]11"
return len(s) / (4097 >> 1)
}
func LenMod1(a []int) int {
// 386:"ANDL\t[$]1023"
// amd64:"ANDL\t[$]1023"
// arm64:"AND\t[$]1023",-"SDIV"
// arm/6:"AND",-".*udiv"
// arm/7:"BFC",-".*udiv",-"AND"
// ppc64:"ANDCC\t[$]1023"
// ppc64le:"ANDCC\t[$]1023"
return len(a) % 1024
}
func LenMod2(s string) int {
// 386:"ANDL\t[$]2047"
// amd64:"ANDL\t[$]2047"
// arm64:"AND\t[$]2047",-"SDIV"
// arm/6:"AND",-".*udiv"
// arm/7:"BFC",-".*udiv",-"AND"
// ppc64:"ANDCC\t[$]2047"
// ppc64le:"ANDCC\t[$]2047"
return len(s) % (4097 >> 1)
}
func CapDiv(a []int) int {
// 386:"SHRL\t[$]12"
// amd64:"SHRQ\t[$]12"
// arm64:"LSR\t[$]12",-"SDIV"
// arm:"SRL\t[$]12",-".*udiv"
// ppc64:"SRD\t[$]12"
// ppc64le:"SRD\t[$]12"
return cap(a) / ((1 << 11) + 2048)
}
func CapMod(a []int) int {
// 386:"ANDL\t[$]4095"
// amd64:"ANDL\t[$]4095"
// arm64:"AND\t[$]4095",-"SDIV"
// arm/6:"AND",-".*udiv"
// arm/7:"BFC",-".*udiv",-"AND"
// ppc64:"ANDCC\t[$]4095"
// ppc64le:"ANDCC\t[$]4095"
return cap(a) % ((1 << 11) + 2048)
}
func AddMul(x int) int {
// amd64:"LEAQ\t1"
return 2*x + 1
}
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
// arm:`MULA`,-`MUL\s`
// arm64:`MADDW`,-`MULW`
r0 := a*b + c
// arm:`MULA`,-`MUL\s`
// arm64:`MADDW`,-`MULW`
r1 := c*79 + a
// arm:`ADD`,-`MULA`,-`MUL\s`
// arm64:`ADD`,-`MADD`,-`MULW`
r2 := b*64 + c
return r0, r1, r2
}
func MULS(a, b, c uint32) (uint32, uint32, uint32) {
// arm/7:`MULS`,-`MUL\s`
// arm/6:`SUB`,`MUL\s`,-`MULS`
// arm64:`MSUBW`,-`MULW`
r0 := c - a*b
// arm/7:`MULS`,-`MUL\s`
// arm/6:`SUB`,`MUL\s`,-`MULS`
// arm64:`MSUBW`,-`MULW`
r1 := a - c*79
// arm/7:`SUB`,-`MULS`,-`MUL\s`
// arm64:`SUB`,-`MSUBW`,-`MULW`
r2 := c - b*64
return r0, r1, r2
}
func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
// amd64:`INCL`
a++
// amd64:`DECL`
b--
// amd64:`SUBL.*-128`
c += 128
return a, b, c
}
// Divide -> shift rules usually require fixup for negative inputs.
// If the input is non-negative, make sure the fixup is eliminated.
func divInt(v int64) int64 {
if v < 0 {
return 0
}
// amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
return v / 512
}
// The reassociate rules "x - (z + C) -> (x - z) - C" and
// "(z + C) -x -> C + (z - x)" can optimize the following cases.
func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
// arm64:"SUB","ADD\t[$]2"
r0 := (i0 + 3) - (j0 + 1)
// arm64:"SUB","SUB\t[$]4"
r1 := (i1 - 3) - (j1 + 1)
// arm64:"SUB","ADD\t[$]4"
r2 := (i2 + 3) - (j2 - 1)
// arm64:"SUB","SUB\t[$]2"
r3 := (i3 - 3) - (j3 - 1)
return r0, r1, r2, r3
}
// The reassociate rules "x - (z + C) -> (x - z) - C" and
// "(C - z) - x -> C - (z + x)" can optimize the following cases.
func constantFold2(i0, j0, i1, j1 int) (int, int) {
// arm64:"ADD","MOVD\t[$]2","SUB"
r0 := (3 - i0) - (j0 + 1)
// arm64:"ADD","MOVD\t[$]4","SUB"
r1 := (3 - i1) - (j1 - 1)
return r0, r1
}
func constantFold3(i, j int) int {
// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
r := (5 * i) * (6 * j)
return r
}