mirror of
https://github.com/golang/go
synced 2024-11-16 19:24:49 -07:00
9f2411894b
BFC (Bit Field Clear) was introduced in ARMv7, which can simplify ANDconst and BICconst. And this CL implements that optimization. 1. The total size of pkg/android_arm decreases about 3KB, excluding cmd/compile/. 2. There is no regression in the go1 benchmark result, and some cases (FmtFprintfEmpty-4 and RegexpMatchMedium_32-4) even get slight improvement. name old time/op new time/op delta BinaryTree17-4 25.3s ± 1% 25.2s ± 1% ~ (p=0.072 n=30+29) Fannkuch11-4 13.3s ± 0% 13.3s ± 0% +0.13% (p=0.000 n=30+26) FmtFprintfEmpty-4 407ns ± 0% 394ns ± 0% -3.19% (p=0.000 n=26+28) FmtFprintfString-4 664ns ± 0% 662ns ± 0% -0.22% (p=0.000 n=30+30) FmtFprintfInt-4 712ns ± 0% 706ns ± 0% -0.79% (p=0.000 n=30+30) FmtFprintfIntInt-4 1.06µs ± 0% 1.05µs ± 0% -0.38% (p=0.000 n=30+30) FmtFprintfPrefixedInt-4 1.16µs ± 0% 1.16µs ± 0% -0.13% (p=0.000 n=30+29) FmtFprintfFloat-4 2.24µs ± 0% 2.23µs ± 0% -0.51% (p=0.000 n=29+21) FmtManyArgs-4 4.09µs ± 0% 4.06µs ± 0% -0.83% (p=0.000 n=28+30) GobDecode-4 55.0ms ± 5% 55.4ms ± 5% ~ (p=0.307 n=30+30) GobEncode-4 51.2ms ± 1% 51.9ms ± 1% +1.23% (p=0.000 n=29+30) Gzip-4 2.64s ± 0% 2.60s ± 0% -1.35% (p=0.000 n=30+29) Gunzip-4 309ms ± 0% 308ms ± 0% -0.27% (p=0.000 n=30+30) HTTPClientServer-4 1.03ms ± 5% 1.02ms ± 4% ~ (p=0.117 n=30+29) JSONEncode-4 101ms ± 2% 101ms ± 2% ~ (p=0.338 n=29+29) JSONDecode-4 383ms ± 2% 382ms ± 2% ~ (p=0.751 n=26+30) Mandelbrot200-4 18.4ms ± 0% 18.4ms ± 0% -0.10% (p=0.000 n=29+29) GoParse-4 22.6ms ± 0% 22.5ms ± 0% -0.39% (p=0.000 n=30+30) RegexpMatchEasy0_32-4 761ns ± 0% 750ns ± 0% -1.47% (p=0.000 n=26+29) RegexpMatchEasy0_1K-4 4.33µs ± 0% 4.34µs ± 0% +0.27% (p=0.000 n=25+28) RegexpMatchEasy1_32-4 809ns ± 0% 795ns ± 0% -1.74% (p=0.000 n=27+25) RegexpMatchEasy1_1K-4 5.54µs ± 0% 5.53µs ± 0% -0.18% (p=0.000 n=29+29) RegexpMatchMedium_32-4 1.11µs ± 0% 1.08µs ± 0% -2.78% (p=0.000 n=27+29) RegexpMatchMedium_1K-4 255µs ± 0% 255µs ± 0% -0.02% (p=0.029 n=30+30) RegexpMatchHard_32-4 14.7µs ± 0% 14.7µs ± 0% -0.28% (p=0.000 n=30+29) RegexpMatchHard_1K-4 439µs ± 0% 439µs ± 0% ~ (p=0.907 n=23+27) Revcomp-4 41.9ms ± 1% 41.9ms ± 1% ~ (p=0.230 n=28+30) Template-4 522ms ± 1% 528ms ± 1% +1.25% (p=0.000 n=30+30) TimeParse-4 3.34µs ± 0% 3.35µs ± 0% +0.23% (p=0.000 n=30+27) TimeFormat-4 6.06µs ± 0% 6.13µs ± 0% +1.08% (p=0.000 n=29+29) [Geo mean] 384µs 382µs -0.37% name old speed new speed delta GobDecode-4 14.0MB/s ± 5% 13.9MB/s ± 5% ~ (p=0.308 n=30+30) GobEncode-4 15.0MB/s ± 1% 14.8MB/s ± 1% -1.22% (p=0.000 n=29+30) Gzip-4 7.36MB/s ± 0% 7.46MB/s ± 0% +1.35% (p=0.000 n=30+30) Gunzip-4 62.8MB/s ± 0% 63.0MB/s ± 0% +0.27% (p=0.000 n=30+30) JSONEncode-4 19.2MB/s ± 2% 19.2MB/s ± 2% ~ (p=0.312 n=29+29) JSONDecode-4 5.05MB/s ± 3% 5.08MB/s ± 2% ~ (p=0.356 n=29+30) GoParse-4 2.56MB/s ± 0% 2.57MB/s ± 0% +0.39% (p=0.000 n=23+27) RegexpMatchEasy0_32-4 42.0MB/s ± 0% 42.6MB/s ± 0% +1.50% (p=0.000 n=26+28) RegexpMatchEasy0_1K-4 236MB/s ± 0% 236MB/s ± 0% -0.27% (p=0.000 n=25+28) RegexpMatchEasy1_32-4 39.6MB/s ± 0% 40.2MB/s ± 0% +1.73% (p=0.000 n=27+27) RegexpMatchEasy1_1K-4 185MB/s ± 0% 185MB/s ± 0% +0.18% (p=0.000 n=29+29) RegexpMatchMedium_32-4 900kB/s ± 0% 920kB/s ± 0% +2.22% (p=0.000 n=29+29) RegexpMatchMedium_1K-4 4.02MB/s ± 0% 4.02MB/s ± 0% +0.07% (p=0.004 n=30+27) RegexpMatchHard_32-4 2.17MB/s ± 0% 2.18MB/s ± 0% +0.46% (p=0.000 n=30+26) RegexpMatchHard_1K-4 2.33MB/s ± 0% 2.33MB/s ± 0% ~ (all equal) Revcomp-4 60.6MB/s ± 1% 60.7MB/s ± 1% ~ (p=0.207 n=28+30) Template-4 3.72MB/s ± 1% 3.67MB/s ± 1% -1.23% (p=0.000 n=30+30) [Geo mean] 12.9MB/s 12.9MB/s +0.29% Change-Id: I07f497f8bb476c950dc555491d00c9066fb64a4e Reviewed-on: https://go-review.googlesource.com/134232 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
311 lines
4.8 KiB
Go
311 lines
4.8 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
/************************************
|
|
* 64-bit instructions
|
|
************************************/
|
|
|
|
func bitcheck64_constleft(a uint64) (n int) {
|
|
// amd64:"BTQ\t[$]63"
|
|
if a&(1<<63) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]60"
|
|
if a&(1<<60) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&(1<<0) != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck64_constright(a [8]uint64) (n int) {
|
|
// amd64:"BTQ\t[$]63"
|
|
if (a[0]>>63)&1 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]63"
|
|
if a[1]>>63 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]63"
|
|
if a[2]>>63 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]60"
|
|
if (a[3]>>60)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]1"
|
|
if (a[4]>>1)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if (a[5]>>0)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]7"
|
|
if (a[6]>>5)&4 == 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck64_var(a, b uint64) (n int) {
|
|
// amd64:"BTQ"
|
|
if a&(1<<(b&63)) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ",-"BT.\t[$]0"
|
|
if (b>>(a&63))&1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck64_mask(a uint64) (n int) {
|
|
// amd64:"BTQ\t[$]63"
|
|
if a&0x8000000000000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTQ\t[$]59"
|
|
if a&0x800000000000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&0x1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func biton64(a, b uint64) (n uint64) {
|
|
// amd64:"BTSQ"
|
|
n += b | (1 << (a & 63))
|
|
|
|
// amd64:"BTSQ\t[$]63"
|
|
n += a | (1 << 63)
|
|
|
|
// amd64:"BTSQ\t[$]60"
|
|
n += a | (1 << 60)
|
|
|
|
// amd64:"ORQ\t[$]1"
|
|
n += a | (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitoff64(a, b uint64) (n uint64) {
|
|
// amd64:"BTRQ"
|
|
n += b &^ (1 << (a & 63))
|
|
|
|
// amd64:"BTRQ\t[$]63"
|
|
n += a &^ (1 << 63)
|
|
|
|
// amd64:"BTRQ\t[$]60"
|
|
n += a &^ (1 << 60)
|
|
|
|
// amd64:"ANDQ\t[$]-2"
|
|
n += a &^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitcompl64(a, b uint64) (n uint64) {
|
|
// amd64:"BTCQ"
|
|
n += b ^ (1 << (a & 63))
|
|
|
|
// amd64:"BTCQ\t[$]63"
|
|
n += a ^ (1 << 63)
|
|
|
|
// amd64:"BTCQ\t[$]60"
|
|
n += a ^ (1 << 60)
|
|
|
|
// amd64:"XORQ\t[$]1"
|
|
n += a ^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
/************************************
|
|
* 32-bit instructions
|
|
************************************/
|
|
|
|
func bitcheck32_constleft(a uint32) (n int) {
|
|
// amd64:"BTL\t[$]31"
|
|
if a&(1<<31) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]28"
|
|
if a&(1<<28) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&(1<<0) != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck32_constright(a [8]uint32) (n int) {
|
|
// amd64:"BTL\t[$]31"
|
|
if (a[0]>>31)&1 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]31"
|
|
if a[1]>>31 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]31"
|
|
if a[2]>>31 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]28"
|
|
if (a[3]>>28)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]1"
|
|
if (a[4]>>1)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if (a[5]>>0)&1 == 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]7"
|
|
if (a[6]>>5)&4 == 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck32_var(a, b uint32) (n int) {
|
|
// amd64:"BTL"
|
|
if a&(1<<(b&31)) != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL",-"BT.\t[$]0"
|
|
if (b>>(a&31))&1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func bitcheck32_mask(a uint32) (n int) {
|
|
// amd64:"BTL\t[$]31"
|
|
if a&0x80000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]27"
|
|
if a&0x8000000 != 0 {
|
|
return 1
|
|
}
|
|
// amd64:"BTL\t[$]0"
|
|
if a&0x1 != 0 {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func biton32(a, b uint32) (n uint32) {
|
|
// amd64:"BTSL"
|
|
n += b | (1 << (a & 31))
|
|
|
|
// amd64:"BTSL\t[$]31"
|
|
n += a | (1 << 31)
|
|
|
|
// amd64:"BTSL\t[$]28"
|
|
n += a | (1 << 28)
|
|
|
|
// amd64:"ORL\t[$]1"
|
|
n += a | (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitoff32(a, b uint32) (n uint32) {
|
|
// amd64:"BTRL"
|
|
n += b &^ (1 << (a & 31))
|
|
|
|
// amd64:"BTRL\t[$]31"
|
|
n += a &^ (1 << 31)
|
|
|
|
// amd64:"BTRL\t[$]28"
|
|
n += a &^ (1 << 28)
|
|
|
|
// amd64:"ANDL\t[$]-2"
|
|
n += a &^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
func bitcompl32(a, b uint32) (n uint32) {
|
|
// amd64:"BTCL"
|
|
n += b ^ (1 << (a & 31))
|
|
|
|
// amd64:"BTCL\t[$]31"
|
|
n += a ^ (1 << 31)
|
|
|
|
// amd64:"BTCL\t[$]28"
|
|
n += a ^ (1 << 28)
|
|
|
|
// amd64:"XORL\t[$]1"
|
|
n += a ^ (1 << 0)
|
|
|
|
return n
|
|
}
|
|
|
|
// check direct operation on memory with constant source
|
|
func bitOpOnMem(a []uint32) {
|
|
// amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
|
|
a[0] &= 200
|
|
// amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
|
|
a[1] |= 220
|
|
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
|
|
a[2] ^= 240
|
|
}
|
|
|
|
// Check AND masking on arm64 (Issue #19857)
|
|
|
|
func and_mask_1(a uint64) uint64 {
|
|
// arm64:`AND\t`
|
|
return a & ((1 << 63) - 1)
|
|
}
|
|
|
|
func and_mask_2(a uint64) uint64 {
|
|
// arm64:`AND\t`
|
|
return a & (1 << 63)
|
|
}
|
|
|
|
func and_mask_3(a, b uint32) (uint32, uint32) {
|
|
// arm/7:`BIC`,-`AND`
|
|
a &= 0xffffaaaa
|
|
// arm/7:`BFC`,-`AND`,-`BIC`
|
|
b &= 0xffc003ff
|
|
return a, b
|
|
}
|
|
|
|
// Check generation of arm64 BIC/EON/ORN instructions
|
|
|
|
func op_bic(x, y uint32) uint32 {
|
|
// arm64:`BIC\t`,-`AND`
|
|
return x &^ y
|
|
}
|
|
|
|
func op_eon(x, y uint32) uint32 {
|
|
// arm64:`EON\t`,-`XOR`
|
|
return x ^ ^y
|
|
}
|
|
|
|
func op_orn(x, y uint32) uint32 {
|
|
// arm64:`ORN\t`,-`ORR`
|
|
return x | ^y
|
|
}
|