cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 14:15:39 -07:00
|
|
|
// asmcheck
|
|
|
|
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package codegen
|
|
|
|
|
|
|
|
// This file contains codegen tests related to bit field
|
|
|
|
// insertion/extraction simplifications/optimizations.
|
|
|
|
|
|
|
|
func extr1(x, x2 uint64) uint64 {
|
|
|
|
return x<<7 + x2>>57 // arm64:"EXTR\t[$]57,"
|
|
|
|
}
|
|
|
|
|
|
|
|
func extr2(x, x2 uint64) uint64 {
|
|
|
|
return x<<7 | x2>>57 // arm64:"EXTR\t[$]57,"
|
|
|
|
}
|
|
|
|
|
|
|
|
func extr3(x, x2 uint64) uint64 {
|
|
|
|
return x<<7 ^ x2>>57 // arm64:"EXTR\t[$]57,"
|
|
|
|
}
|
|
|
|
|
|
|
|
func extr4(x, x2 uint32) uint32 {
|
|
|
|
return x<<7 + x2>>25 // arm64:"EXTRW\t[$]25,"
|
|
|
|
}
|
|
|
|
|
|
|
|
func extr5(x, x2 uint32) uint32 {
|
|
|
|
return x<<7 | x2>>25 // arm64:"EXTRW\t[$]25,"
|
|
|
|
}
|
|
|
|
|
|
|
|
func extr6(x, x2 uint32) uint32 {
|
|
|
|
return x<<7 ^ x2>>25 // arm64:"EXTRW\t[$]25,"
|
|
|
|
}
|
|
|
|
|
|
|
|
// check 32-bit shift masking
|
|
|
|
func mask32(x uint32) uint32 {
|
|
|
|
return (x << 29) >> 29 // arm64:"AND\t[$]7, R[0-9]+",-"LSR",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
// check 16-bit shift masking
|
|
|
|
func mask16(x uint16) uint16 {
|
|
|
|
return (x << 14) >> 14 // arm64:"AND\t[$]3, R[0-9]+",-"LSR",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
// check 8-bit shift masking
|
|
|
|
func mask8(x uint8) uint8 {
|
|
|
|
return (x << 7) >> 7 // arm64:"AND\t[$]1, R[0-9]+",-"LSR",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func maskshift(x uint64) uint64 {
|
|
|
|
// arm64:"AND\t[$]4095, R[0-9]+",-"LSL",-"LSR",-"UBFIZ",-"UBFX"
|
|
|
|
return ((x << 5) & (0xfff << 5)) >> 5
|
|
|
|
}
|
|
|
|
|
|
|
|
// bitfield ops
|
|
|
|
// bfi
|
|
|
|
func bfi1(x, y uint64) uint64 {
|
|
|
|
// arm64:"BFI\t[$]4, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
|
|
|
return ((x & 0xfff) << 4) | (y & 0xffffffffffff000f)
|
|
|
|
}
|
|
|
|
|
|
|
|
func bfi2(x, y uint64) uint64 {
|
|
|
|
// arm64:"BFI\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
|
|
|
|
return (x << 24 >> 12) | (y & 0xfff0000000000fff)
|
|
|
|
}
|
|
|
|
|
|
|
|
// bfxil
|
|
|
|
func bfxil1(x, y uint64) uint64 {
|
|
|
|
// arm64:"BFXIL\t[$]5, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
|
|
|
return ((x >> 5) & 0xfff) | (y & 0xfffffffffffff000)
|
|
|
|
}
|
|
|
|
|
|
|
|
func bfxil2(x, y uint64) uint64 {
|
|
|
|
// arm64:"BFXIL\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
|
|
|
|
return (x << 12 >> 24) | (y & 0xffffff0000000000)
|
|
|
|
}
|
|
|
|
|
|
|
|
// sbfiz
|
|
|
|
func sbfiz1(x int64) int64 {
|
|
|
|
// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
|
|
|
|
return (x << 4) >> 3
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfiz2(x int32) int64 {
|
|
|
|
return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]29",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfiz3(x int16) int64 {
|
|
|
|
return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]13",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfiz4(x int8) int64 {
|
|
|
|
return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]5",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfiz5(x int32) int32 {
|
|
|
|
// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
|
|
|
|
return (x << 4) >> 3
|
|
|
|
}
|
|
|
|
|
|
|
|
// sbfx
|
|
|
|
func sbfx1(x int64) int64 {
|
|
|
|
return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfx2(x int64) int64 {
|
|
|
|
return (x << 60) >> 60 // arm64:"SBFX\tZR, R[0-9]+, [$]4",-"LSL",-"ASR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfx3(x int32) int64 {
|
|
|
|
return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]29",-"ASR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfx4(x int16) int64 {
|
|
|
|
return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]13",-"ASR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfx5(x int8) int64 {
|
|
|
|
return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]5",-"ASR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sbfx6(x int32) int32 {
|
|
|
|
return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
|
|
|
|
}
|
|
|
|
|
|
|
|
// ubfiz
|
|
|
|
func ubfiz1(x uint64) uint64 {
|
|
|
|
// arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND"
|
|
|
|
return (x & 0xfff) << 3
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz2(x uint64) uint64 {
|
|
|
|
// arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND"
|
|
|
|
return (x << 4) & 0xfff0
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz3(x uint32) uint64 {
|
|
|
|
return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]32",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz4(x uint16) uint64 {
|
|
|
|
return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz5(x uint8) uint64 {
|
|
|
|
return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]8",-"LSL"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz6(x uint64) uint64 {
|
|
|
|
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR"
|
|
|
|
return (x << 4) >> 3
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz7(x uint32) uint32 {
|
|
|
|
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"LSR"
|
|
|
|
return (x << 4) >> 3
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz8(x uint64) uint64 {
|
|
|
|
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR"
|
|
|
|
return ((x & 0xfffff) << 4) >> 3
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz9(x uint64) uint64 {
|
|
|
|
// arm64:"UBFIZ\t[$]5, R[0-9]+, [$]13",-"LSL",-"LSR",-"AND"
|
|
|
|
return ((x << 3) & 0xffff) << 2
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfiz10(x uint64) uint64 {
|
|
|
|
// arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
|
|
|
return ((x << 5) & (0xfff << 5)) << 2
|
|
|
|
}
|
|
|
|
|
|
|
|
// ubfx
|
|
|
|
func ubfx1(x uint64) uint64 {
|
|
|
|
// arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND"
|
|
|
|
return (x >> 25) & 1023
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx2(x uint64) uint64 {
|
|
|
|
// arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND"
|
|
|
|
return (x & 0x0ff0) >> 4
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx3(x uint32) uint64 {
|
|
|
|
return uint64(x >> 15) // arm64:"UBFX\t[$]15, R[0-9]+, [$]17",-"LSR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx4(x uint16) uint64 {
|
|
|
|
return uint64(x >> 9) // arm64:"UBFX\t[$]9, R[0-9]+, [$]7",-"LSR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx5(x uint8) uint64 {
|
|
|
|
return uint64(x >> 3) // arm64:"UBFX\t[$]3, R[0-9]+, [$]5",-"LSR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx6(x uint64) uint64 {
|
|
|
|
return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx7(x uint32) uint32 {
|
|
|
|
return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx8(x uint64) uint64 {
|
|
|
|
// arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
|
|
|
return ((x << 1) >> 2) & 0xfff
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx9(x uint64) uint64 {
|
|
|
|
// arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND"
|
|
|
|
return ((x >> 3) & 0xfff) >> 1
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx10(x uint64) uint64 {
|
|
|
|
// arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR"
|
|
|
|
return ((x >> 2) << 5) >> 8
|
|
|
|
}
|
|
|
|
|
|
|
|
func ubfx11(x uint64) uint64 {
|
|
|
|
// arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR"
|
|
|
|
return ((x & 0xfffff) << 3) >> 4
|
|
|
|
}
|
2018-04-09 06:24:35 -06:00
|
|
|
|
|
|
|
// Check that we don't emit comparisons for constant shifts.
|
|
|
|
//go:nosplit
|
|
|
|
func shift_no_cmp(x int) int {
|
|
|
|
// arm64:`LSL\t[$]17`,-`CMP`
|
|
|
|
// mips64:`SLLV\t[$]17`,-`SGT`
|
|
|
|
return x << 17
|
|
|
|
}
|