1
0
mirror of https://github.com/golang/go synced 2024-11-16 19:24:49 -07:00
go/test/codegen/bits.go
Ben Shi 9f2411894b cmd/compile: optimize arm's bit operation
BFC (Bit Field Clear) was introduced in ARMv7, which can simplify
ANDconst and BICconst. And this CL implements that optimization.

1. The total size of pkg/android_arm decreases about 3KB, excluding
cmd/compile/.

2. There is no regression in the go1 benchmark result, and some
cases (FmtFprintfEmpty-4 and RegexpMatchMedium_32-4) even get
slight improvement.

name                     old time/op    new time/op    delta
BinaryTree17-4              25.3s ± 1%     25.2s ± 1%    ~     (p=0.072 n=30+29)
Fannkuch11-4                13.3s ± 0%     13.3s ± 0%  +0.13%  (p=0.000 n=30+26)
FmtFprintfEmpty-4           407ns ± 0%     394ns ± 0%  -3.19%  (p=0.000 n=26+28)
FmtFprintfString-4          664ns ± 0%     662ns ± 0%  -0.22%  (p=0.000 n=30+30)
FmtFprintfInt-4             712ns ± 0%     706ns ± 0%  -0.79%  (p=0.000 n=30+30)
FmtFprintfIntInt-4         1.06µs ± 0%    1.05µs ± 0%  -0.38%  (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4    1.16µs ± 0%    1.16µs ± 0%  -0.13%  (p=0.000 n=30+29)
FmtFprintfFloat-4          2.24µs ± 0%    2.23µs ± 0%  -0.51%  (p=0.000 n=29+21)
FmtManyArgs-4              4.09µs ± 0%    4.06µs ± 0%  -0.83%  (p=0.000 n=28+30)
GobDecode-4                55.0ms ± 5%    55.4ms ± 5%    ~     (p=0.307 n=30+30)
GobEncode-4                51.2ms ± 1%    51.9ms ± 1%  +1.23%  (p=0.000 n=29+30)
Gzip-4                      2.64s ± 0%     2.60s ± 0%  -1.35%  (p=0.000 n=30+29)
Gunzip-4                    309ms ± 0%     308ms ± 0%  -0.27%  (p=0.000 n=30+30)
HTTPClientServer-4         1.03ms ± 5%    1.02ms ± 4%    ~     (p=0.117 n=30+29)
JSONEncode-4                101ms ± 2%     101ms ± 2%    ~     (p=0.338 n=29+29)
JSONDecode-4                383ms ± 2%     382ms ± 2%    ~     (p=0.751 n=26+30)
Mandelbrot200-4            18.4ms ± 0%    18.4ms ± 0%  -0.10%  (p=0.000 n=29+29)
GoParse-4                  22.6ms ± 0%    22.5ms ± 0%  -0.39%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4       761ns ± 0%     750ns ± 0%  -1.47%  (p=0.000 n=26+29)
RegexpMatchEasy0_1K-4      4.33µs ± 0%    4.34µs ± 0%  +0.27%  (p=0.000 n=25+28)
RegexpMatchEasy1_32-4       809ns ± 0%     795ns ± 0%  -1.74%  (p=0.000 n=27+25)
RegexpMatchEasy1_1K-4      5.54µs ± 0%    5.53µs ± 0%  -0.18%  (p=0.000 n=29+29)
RegexpMatchMedium_32-4     1.11µs ± 0%    1.08µs ± 0%  -2.78%  (p=0.000 n=27+29)
RegexpMatchMedium_1K-4      255µs ± 0%     255µs ± 0%  -0.02%  (p=0.029 n=30+30)
RegexpMatchHard_32-4       14.7µs ± 0%    14.7µs ± 0%  -0.28%  (p=0.000 n=30+29)
RegexpMatchHard_1K-4        439µs ± 0%     439µs ± 0%    ~     (p=0.907 n=23+27)
Revcomp-4                  41.9ms ± 1%    41.9ms ± 1%    ~     (p=0.230 n=28+30)
Template-4                  522ms ± 1%     528ms ± 1%  +1.25%  (p=0.000 n=30+30)
TimeParse-4                3.34µs ± 0%    3.35µs ± 0%  +0.23%  (p=0.000 n=30+27)
TimeFormat-4               6.06µs ± 0%    6.13µs ± 0%  +1.08%  (p=0.000 n=29+29)
[Geo mean]                  384µs          382µs       -0.37%

name                     old speed      new speed      delta
GobDecode-4              14.0MB/s ± 5%  13.9MB/s ± 5%    ~     (p=0.308 n=30+30)
GobEncode-4              15.0MB/s ± 1%  14.8MB/s ± 1%  -1.22%  (p=0.000 n=29+30)
Gzip-4                   7.36MB/s ± 0%  7.46MB/s ± 0%  +1.35%  (p=0.000 n=30+30)
Gunzip-4                 62.8MB/s ± 0%  63.0MB/s ± 0%  +0.27%  (p=0.000 n=30+30)
JSONEncode-4             19.2MB/s ± 2%  19.2MB/s ± 2%    ~     (p=0.312 n=29+29)
JSONDecode-4             5.05MB/s ± 3%  5.08MB/s ± 2%    ~     (p=0.356 n=29+30)
GoParse-4                2.56MB/s ± 0%  2.57MB/s ± 0%  +0.39%  (p=0.000 n=23+27)
RegexpMatchEasy0_32-4    42.0MB/s ± 0%  42.6MB/s ± 0%  +1.50%  (p=0.000 n=26+28)
RegexpMatchEasy0_1K-4     236MB/s ± 0%   236MB/s ± 0%  -0.27%  (p=0.000 n=25+28)
RegexpMatchEasy1_32-4    39.6MB/s ± 0%  40.2MB/s ± 0%  +1.73%  (p=0.000 n=27+27)
RegexpMatchEasy1_1K-4     185MB/s ± 0%   185MB/s ± 0%  +0.18%  (p=0.000 n=29+29)
RegexpMatchMedium_32-4    900kB/s ± 0%   920kB/s ± 0%  +2.22%  (p=0.000 n=29+29)
RegexpMatchMedium_1K-4   4.02MB/s ± 0%  4.02MB/s ± 0%  +0.07%  (p=0.004 n=30+27)
RegexpMatchHard_32-4     2.17MB/s ± 0%  2.18MB/s ± 0%  +0.46%  (p=0.000 n=30+26)
RegexpMatchHard_1K-4     2.33MB/s ± 0%  2.33MB/s ± 0%    ~     (all equal)
Revcomp-4                60.6MB/s ± 1%  60.7MB/s ± 1%    ~     (p=0.207 n=28+30)
Template-4               3.72MB/s ± 1%  3.67MB/s ± 1%  -1.23%  (p=0.000 n=30+30)
[Geo mean]               12.9MB/s       12.9MB/s       +0.29%

Change-Id: I07f497f8bb476c950dc555491d00c9066fb64a4e
Reviewed-on: https://go-review.googlesource.com/134232
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-09-11 14:37:51 +00:00

311 lines
4.8 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
/************************************
* 64-bit instructions
************************************/
func bitcheck64_constleft(a uint64) (n int) {
// amd64:"BTQ\t[$]63"
if a&(1<<63) != 0 {
return 1
}
// amd64:"BTQ\t[$]60"
if a&(1<<60) != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&(1<<0) != 0 {
return 1
}
return 0
}
func bitcheck64_constright(a [8]uint64) (n int) {
// amd64:"BTQ\t[$]63"
if (a[0]>>63)&1 != 0 {
return 1
}
// amd64:"BTQ\t[$]63"
if a[1]>>63 != 0 {
return 1
}
// amd64:"BTQ\t[$]63"
if a[2]>>63 == 0 {
return 1
}
// amd64:"BTQ\t[$]60"
if (a[3]>>60)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]1"
if (a[4]>>1)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]0"
if (a[5]>>0)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]7"
if (a[6]>>5)&4 == 0 {
return 1
}
return 0
}
func bitcheck64_var(a, b uint64) (n int) {
// amd64:"BTQ"
if a&(1<<(b&63)) != 0 {
return 1
}
// amd64:"BTQ",-"BT.\t[$]0"
if (b>>(a&63))&1 != 0 {
return 1
}
return 0
}
func bitcheck64_mask(a uint64) (n int) {
// amd64:"BTQ\t[$]63"
if a&0x8000000000000000 != 0 {
return 1
}
// amd64:"BTQ\t[$]59"
if a&0x800000000000000 != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&0x1 != 0 {
return 1
}
return 0
}
func biton64(a, b uint64) (n uint64) {
// amd64:"BTSQ"
n += b | (1 << (a & 63))
// amd64:"BTSQ\t[$]63"
n += a | (1 << 63)
// amd64:"BTSQ\t[$]60"
n += a | (1 << 60)
// amd64:"ORQ\t[$]1"
n += a | (1 << 0)
return n
}
func bitoff64(a, b uint64) (n uint64) {
// amd64:"BTRQ"
n += b &^ (1 << (a & 63))
// amd64:"BTRQ\t[$]63"
n += a &^ (1 << 63)
// amd64:"BTRQ\t[$]60"
n += a &^ (1 << 60)
// amd64:"ANDQ\t[$]-2"
n += a &^ (1 << 0)
return n
}
func bitcompl64(a, b uint64) (n uint64) {
// amd64:"BTCQ"
n += b ^ (1 << (a & 63))
// amd64:"BTCQ\t[$]63"
n += a ^ (1 << 63)
// amd64:"BTCQ\t[$]60"
n += a ^ (1 << 60)
// amd64:"XORQ\t[$]1"
n += a ^ (1 << 0)
return n
}
/************************************
* 32-bit instructions
************************************/
func bitcheck32_constleft(a uint32) (n int) {
// amd64:"BTL\t[$]31"
if a&(1<<31) != 0 {
return 1
}
// amd64:"BTL\t[$]28"
if a&(1<<28) != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&(1<<0) != 0 {
return 1
}
return 0
}
func bitcheck32_constright(a [8]uint32) (n int) {
// amd64:"BTL\t[$]31"
if (a[0]>>31)&1 != 0 {
return 1
}
// amd64:"BTL\t[$]31"
if a[1]>>31 != 0 {
return 1
}
// amd64:"BTL\t[$]31"
if a[2]>>31 == 0 {
return 1
}
// amd64:"BTL\t[$]28"
if (a[3]>>28)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]1"
if (a[4]>>1)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]0"
if (a[5]>>0)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]7"
if (a[6]>>5)&4 == 0 {
return 1
}
return 0
}
func bitcheck32_var(a, b uint32) (n int) {
// amd64:"BTL"
if a&(1<<(b&31)) != 0 {
return 1
}
// amd64:"BTL",-"BT.\t[$]0"
if (b>>(a&31))&1 != 0 {
return 1
}
return 0
}
func bitcheck32_mask(a uint32) (n int) {
// amd64:"BTL\t[$]31"
if a&0x80000000 != 0 {
return 1
}
// amd64:"BTL\t[$]27"
if a&0x8000000 != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&0x1 != 0 {
return 1
}
return 0
}
func biton32(a, b uint32) (n uint32) {
// amd64:"BTSL"
n += b | (1 << (a & 31))
// amd64:"BTSL\t[$]31"
n += a | (1 << 31)
// amd64:"BTSL\t[$]28"
n += a | (1 << 28)
// amd64:"ORL\t[$]1"
n += a | (1 << 0)
return n
}
func bitoff32(a, b uint32) (n uint32) {
// amd64:"BTRL"
n += b &^ (1 << (a & 31))
// amd64:"BTRL\t[$]31"
n += a &^ (1 << 31)
// amd64:"BTRL\t[$]28"
n += a &^ (1 << 28)
// amd64:"ANDL\t[$]-2"
n += a &^ (1 << 0)
return n
}
func bitcompl32(a, b uint32) (n uint32) {
// amd64:"BTCL"
n += b ^ (1 << (a & 31))
// amd64:"BTCL\t[$]31"
n += a ^ (1 << 31)
// amd64:"BTCL\t[$]28"
n += a ^ (1 << 28)
// amd64:"XORL\t[$]1"
n += a ^ (1 << 0)
return n
}
// check direct operation on memory with constant source
func bitOpOnMem(a []uint32) {
// amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
a[0] &= 200
// amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
a[2] ^= 240
}
// Check AND masking on arm64 (Issue #19857)
func and_mask_1(a uint64) uint64 {
// arm64:`AND\t`
return a & ((1 << 63) - 1)
}
func and_mask_2(a uint64) uint64 {
// arm64:`AND\t`
return a & (1 << 63)
}
func and_mask_3(a, b uint32) (uint32, uint32) {
// arm/7:`BIC`,-`AND`
a &= 0xffffaaaa
// arm/7:`BFC`,-`AND`,-`BIC`
b &= 0xffc003ff
return a, b
}
// Check generation of arm64 BIC/EON/ORN instructions
func op_bic(x, y uint32) uint32 {
// arm64:`BIC\t`,-`AND`
return x &^ y
}
func op_eon(x, y uint32) uint32 {
// arm64:`EON\t`,-`XOR`
return x ^ ^y
}
func op_orn(x, y uint32) uint32 {
// arm64:`ORN\t`,-`ORR`
return x | ^y
}