mirror of
https://github.com/golang/go
synced 2024-11-23 19:40:08 -07:00
732f6fa9d5
We can rewrite ANDQ with an immediate fitting in 32bit with an ANDL, which is shorter to encode. Looking at Go binary itself, before the change there was: ANDL: 2337 ANDQ: 4476 After the change: ANDL: 3790 ANDQ: 3024 So we got rid of 1452 ANDQs This makes the Linux x86_64 binary 0.03% smaller. There seems to be an impact on performance. Intel Cascade Lake benchmarks (with perflock): name old time/op new time/op delta BinaryTree17-8 1.91s ± 1% 1.89s ± 1% -1.22% (p=0.000 n=21+18) Fannkuch11-8 2.34s ± 0% 2.34s ± 0% ~ (p=0.052 n=20+20) FmtFprintfEmpty-8 27.7ns ± 1% 27.4ns ± 3% ~ (p=0.497 n=21+21) FmtFprintfString-8 53.2ns ± 0% 51.5ns ± 0% -3.21% (p=0.000 n=20+19) FmtFprintfInt-8 57.3ns ± 0% 55.7ns ± 0% -2.89% (p=0.000 n=19+19) FmtFprintfIntInt-8 92.3ns ± 0% 88.4ns ± 1% -4.23% (p=0.000 n=20+21) FmtFprintfPrefixedInt-8 103ns ± 0% 103ns ± 0% +0.23% (p=0.000 n=20+21) FmtFprintfFloat-8 147ns ± 0% 148ns ± 0% +0.75% (p=0.000 n=20+21) FmtManyArgs-8 384ns ± 0% 381ns ± 0% -0.63% (p=0.000 n=21+21) GobDecode-8 3.86ms ± 1% 3.88ms ± 1% +0.52% (p=0.000 n=20+21) GobEncode-8 2.77ms ± 1% 2.77ms ± 0% ~ (p=0.078 n=21+21) Gzip-8 168ms ± 1% 168ms ± 0% +0.24% (p=0.000 n=20+20) Gunzip-8 25.1ms ± 0% 24.3ms ± 0% -3.03% (p=0.000 n=21+21) HTTPClientServer-8 61.4µs ± 8% 59.1µs ±10% ~ (p=0.088 n=20+21) JSONEncode-8 6.86ms ± 0% 6.70ms ± 0% -2.29% (p=0.000 n=20+19) JSONDecode-8 30.8ms ± 1% 30.6ms ± 1% -0.82% (p=0.000 n=20+20) Mandelbrot200-8 3.85ms ± 0% 3.85ms ± 0% ~ (p=0.191 n=16+17) GoParse-8 2.61ms ± 2% 2.60ms ± 1% ~ (p=0.561 n=21+20) RegexpMatchEasy0_32-8 48.5ns ± 2% 45.9ns ± 3% -5.26% (p=0.000 n=20+21) RegexpMatchEasy0_1K-8 139ns ± 0% 139ns ± 0% +0.27% (p=0.000 n=18+20) RegexpMatchEasy1_32-8 41.3ns ± 0% 42.1ns ± 4% +1.95% (p=0.000 n=17+21) RegexpMatchEasy1_1K-8 216ns ± 2% 216ns ± 0% +0.17% (p=0.020 n=21+19) RegexpMatchMedium_32-8 790ns ± 7% 803ns ± 8% ~ (p=0.178 n=21+21) RegexpMatchMedium_1K-8 23.5µs ± 5% 23.7µs ± 5% ~ (p=0.421 n=21+21) RegexpMatchHard_32-8 1.09µs ± 1% 1.09µs ± 1% -0.53% (p=0.000 n=19+18) RegexpMatchHard_1K-8 33.0µs ± 0% 33.0µs ± 0% ~ (p=0.610 n=21+20) Revcomp-8 348ms ± 0% 353ms ± 0% +1.38% (p=0.000 n=17+18) Template-8 42.0ms ± 1% 41.9ms ± 1% -0.30% (p=0.049 n=20+20) TimeParse-8 185ns ± 0% 185ns ± 0% ~ (p=0.387 n=20+18) TimeFormat-8 237ns ± 1% 241ns ± 1% +1.57% (p=0.000 n=21+21) [Geo mean] 35.4µs 35.2µs -0.66% name old speed new speed delta GobDecode-8 199MB/s ± 1% 198MB/s ± 1% -0.52% (p=0.000 n=20+21) GobEncode-8 277MB/s ± 1% 277MB/s ± 0% ~ (p=0.075 n=21+21) Gzip-8 116MB/s ± 1% 115MB/s ± 0% -0.25% (p=0.000 n=20+20) Gunzip-8 773MB/s ± 0% 797MB/s ± 0% +3.12% (p=0.000 n=21+21) JSONEncode-8 283MB/s ± 0% 290MB/s ± 0% +2.35% (p=0.000 n=20+19) JSONDecode-8 63.0MB/s ± 1% 63.5MB/s ± 1% +0.82% (p=0.000 n=20+20) GoParse-8 22.2MB/s ± 2% 22.3MB/s ± 1% ~ (p=0.539 n=21+20) RegexpMatchEasy0_32-8 660MB/s ± 2% 697MB/s ± 3% +5.57% (p=0.000 n=20+21) RegexpMatchEasy0_1K-8 7.36GB/s ± 0% 7.34GB/s ± 0% -0.26% (p=0.000 n=18+20) RegexpMatchEasy1_32-8 775MB/s ± 0% 761MB/s ± 4% -1.88% (p=0.000 n=17+21) RegexpMatchEasy1_1K-8 4.74GB/s ± 2% 4.74GB/s ± 0% -0.18% (p=0.020 n=21+19) RegexpMatchMedium_32-8 40.6MB/s ± 7% 39.9MB/s ± 9% ~ (p=0.191 n=21+21) RegexpMatchMedium_1K-8 43.7MB/s ± 5% 43.2MB/s ± 5% ~ (p=0.435 n=21+21) RegexpMatchHard_32-8 29.3MB/s ± 1% 29.4MB/s ± 1% +0.53% (p=0.000 n=19+18) RegexpMatchHard_1K-8 31.0MB/s ± 0% 31.0MB/s ± 0% ~ (p=0.572 n=21+20) Revcomp-8 730MB/s ± 0% 720MB/s ± 0% -1.36% (p=0.000 n=17+18) Template-8 46.2MB/s ± 1% 46.3MB/s ± 1% +0.30% (p=0.041 n=20+20) [Geo mean] 204MB/s 205MB/s +0.30% Change-Id: Iac75d0ec184a515ce0e65e19559d5fe2e9840514 Reviewed-on: https://go-review.googlesource.com/c/go/+/354970 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Trust: Josh Bleecher Snyder <josharian@gmail.com> Trust: Keith Randall <khr@golang.org> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org>
366 lines
6.0 KiB
Go
366 lines
6.0 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "math/bits"
|
|
|
|
/************************************
|
|
* 64-bit instructions
|
|
************************************/
|
|
|
|
func bitcheck64_constleft(a uint64) (n int) {
|
|
// amd64:"BTQ\t[$]63"
|
|
if a&(1<<63) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]60"
|
|
if a&(1<<60) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&(1<<0) != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck64_constright(a [8]uint64) (n int) {
|
|
// amd64:"BTQ\t[$]63"
|
|
if (a[0]>>63)&1 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]63"
|
|
if a[1]>>63 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]63"
|
|
if a[2]>>63 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]60"
|
|
if (a[3]>>60)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]1"
|
|
if (a[4]>>1)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if (a[5]>>0)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]7"
|
|
if (a[6]>>5)&4 == 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck64_var(a, b uint64) (n int) {
|
|
// amd64:"BTQ"
|
|
if a&(1<<(b&63)) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ",-"BT.\t[$]0"
|
|
if (b>>(a&63))&1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck64_mask(a uint64) (n int) {
|
|
// amd64:"BTQ\t[$]63"
|
|
if a&0x8000000000000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]59"
|
|
if a&0x800000000000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&0x1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func biton64(a, b uint64) (n uint64) {
|
|
// amd64:"BTSQ"
|
|
n += b | (1 << (a & 63))
|
|
|
|
// amd64:"BTSQ\t[$]63"
|
|
n += a | (1 << 63)
|
|
|
|
// amd64:"BTSQ\t[$]60"
|
|
n += a | (1 << 60)
|
|
|
|
// amd64:"ORQ\t[$]1"
|
|
n += a | (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitoff64(a, b uint64) (n uint64) {
|
|
// amd64:"BTRQ"
|
|
n += b &^ (1 << (a & 63))
|
|
|
|
// amd64:"BTRQ\t[$]63"
|
|
n += a &^ (1 << 63)
|
|
|
|
// amd64:"BTRQ\t[$]60"
|
|
n += a &^ (1 << 60)
|
|
|
|
// amd64:"ANDQ\t[$]-2"
|
|
n += a &^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitcompl64(a, b uint64) (n uint64) {
|
|
// amd64:"BTCQ"
|
|
n += b ^ (1 << (a & 63))
|
|
|
|
// amd64:"BTCQ\t[$]63"
|
|
n += a ^ (1 << 63)
|
|
|
|
// amd64:"BTCQ\t[$]60"
|
|
n += a ^ (1 << 60)
|
|
|
|
// amd64:"XORQ\t[$]1"
|
|
n += a ^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
/************************************
|
|
* 32-bit instructions
|
|
************************************/
|
|
|
|
func bitcheck32_constleft(a uint32) (n int) {
|
|
// amd64:"BTL\t[$]31"
|
|
if a&(1<<31) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]28"
|
|
if a&(1<<28) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&(1<<0) != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck32_constright(a [8]uint32) (n int) {
|
|
// amd64:"BTL\t[$]31"
|
|
if (a[0]>>31)&1 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]31"
|
|
if a[1]>>31 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]31"
|
|
if a[2]>>31 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]28"
|
|
if (a[3]>>28)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]1"
|
|
if (a[4]>>1)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if (a[5]>>0)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]7"
|
|
if (a[6]>>5)&4 == 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck32_var(a, b uint32) (n int) {
|
|
// amd64:"BTL"
|
|
if a&(1<<(b&31)) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL",-"BT.\t[$]0"
|
|
if (b>>(a&31))&1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck32_mask(a uint32) (n int) {
|
|
// amd64:"BTL\t[$]31"
|
|
if a&0x80000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]27"
|
|
if a&0x8000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&0x1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func biton32(a, b uint32) (n uint32) {
|
|
// amd64:"BTSL"
|
|
n += b | (1 << (a & 31))
|
|
|
|
// amd64:"BTSL\t[$]31"
|
|
n += a | (1 << 31)
|
|
|
|
// amd64:"BTSL\t[$]28"
|
|
n += a | (1 << 28)
|
|
|
|
// amd64:"ORL\t[$]1"
|
|
n += a | (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitoff32(a, b uint32) (n uint32) {
|
|
// amd64:"BTRL"
|
|
n += b &^ (1 << (a & 31))
|
|
|
|
// amd64:"BTRL\t[$]31"
|
|
n += a &^ (1 << 31)
|
|
|
|
// amd64:"BTRL\t[$]28"
|
|
n += a &^ (1 << 28)
|
|
|
|
// amd64:"ANDL\t[$]-2"
|
|
n += a &^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitcompl32(a, b uint32) (n uint32) {
|
|
// amd64:"BTCL"
|
|
n += b ^ (1 << (a & 31))
|
|
|
|
// amd64:"BTCL\t[$]31"
|
|
n += a ^ (1 << 31)
|
|
|
|
// amd64:"BTCL\t[$]28"
|
|
n += a ^ (1 << 28)
|
|
|
|
// amd64:"XORL\t[$]1"
|
|
n += a ^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
// check direct operation on memory with constant and shifted constant sources
|
|
func bitOpOnMem(a []uint32, b, c, d uint32) {
|
|
// amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)`
|
|
a[0] &= 200
|
|
// amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)`
|
|
a[1] |= 220
|
|
// amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)`
|
|
a[2] ^= 240
|
|
}
|
|
|
|
func bitcheckMostNegative(b uint8) bool {
|
|
// amd64:"TESTB"
|
|
return b&0x80 == 0x80
|
|
}
|
|
|
|
// Check AND masking on arm64 (Issue #19857)
|
|
|
|
func and_mask_1(a uint64) uint64 {
|
|
// arm64:`AND\t`
|
|
return a & ((1 << 63) - 1)
|
|
}
|
|
|
|
func and_mask_2(a uint64) uint64 {
|
|
// arm64:`AND\t`
|
|
return a & (1 << 63)
|
|
}
|
|
|
|
func and_mask_3(a, b uint32) (uint32, uint32) {
|
|
// arm/7:`BIC`,-`AND`
|
|
a &= 0xffffaaaa
|
|
// arm/7:`BFC`,-`AND`,-`BIC`
|
|
b &= 0xffc003ff
|
|
return a, b
|
|
}
|
|
|
|
// Check generation of arm64 BIC/EON/ORN instructions
|
|
|
|
func op_bic(x, y uint32) uint32 {
|
|
// arm64:`BIC\t`,-`AND`
|
|
return x &^ y
|
|
}
|
|
|
|
func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 {
|
|
// arm64:`EON\t`,-`EOR`,-`MVN`
|
|
a[0] = x ^ (y ^ 0xffffffff)
|
|
|
|
// arm64:`EON\t`,-`EOR`,-`MVN`
|
|
a[1] = ^(y ^ z)
|
|
|
|
// arm64:`EON\t`,-`XOR`
|
|
a[2] = x ^ ^z
|
|
|
|
// arm64:`EON\t`,-`EOR`,-`MVN`
|
|
return n ^ (m ^ 0xffffffffffffffff)
|
|
}
|
|
|
|
func op_orn(x, y uint32) uint32 {
|
|
// arm64:`ORN\t`,-`ORR`
|
|
return x | ^y
|
|
}
|
|
|
|
// check bitsets
|
|
func bitSetPowerOf2Test(x int) bool {
|
|
// amd64:"BTL\t[$]3"
|
|
return x&8 == 8
|
|
}
|
|
|
|
func bitSetTest(x int) bool {
|
|
// amd64:"ANDL\t[$]9, AX"
|
|
// amd64:"CMPQ\tAX, [$]9"
|
|
return x&9 == 9
|
|
}
|
|
|
|
// mask contiguous one bits
|
|
func cont1Mask64U(x uint64) uint64 {
|
|
// s390x:"RISBGZ\t[$]16, [$]47, [$]0,"
|
|
return x & 0x0000ffffffff0000
|
|
}
|
|
|
|
// mask contiguous zero bits
|
|
func cont0Mask64U(x uint64) uint64 {
|
|
// s390x:"RISBGZ\t[$]48, [$]15, [$]0,"
|
|
return x & 0xffff00000000ffff
|
|
}
|
|
|
|
func issue44228a(a []int64, i int) bool {
|
|
// amd64: "BTQ", -"SHL"
|
|
return a[i>>6]&(1<<(i&63)) != 0
|
|
}
|
|
func issue44228b(a []int32, i int) bool {
|
|
// amd64: "BTL", -"SHL"
|
|
return a[i>>5]&(1<<(i&31)) != 0
|
|
}
|
|
|
|
func issue48467(x, y uint64) uint64 {
|
|
// arm64: -"NEG"
|
|
d, borrow := bits.Sub64(x, y, 0)
|
|
return x - d&(-borrow)
|
|
}
|