2018-02-26 17:59:58 -07:00
|
|
|
// asmcheck
|
|
|
|
|
2018-03-02 13:06:09 -07:00
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2018-02-26 17:59:58 -07:00
|
|
|
package codegen
|
|
|
|
|
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
if t.wall&hasMonotonic != 0 {
0x1073465 488b08 MOVQ 0(AX), CX
0x1073468 4889ca MOVQ CX, DX
0x107346b 48c1e93f SHRQ $0x3f, CX
0x107346f 48c1e13f SHLQ $0x3f, CX
0x1073473 48f7c1ffffffff TESTQ $-0x1, CX
0x107347a 746b JE 0x10734e7
if t.wall&hasMonotonic != 0 {
0x1073435 488b08 MOVQ 0(AX), CX
0x1073438 480fbae13f BTQ $0x3f, CX
0x107343d 7363 JAE 0x10734a2
Another example:
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX
0x10734cf 48c1e61e SHLQ $0x1e, SI
0x10734d3 4809ce ORQ CX, SI
0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX
0x10734e0 4809f1 ORQ SI, CX
0x10734e3 488908 MOVQ CX, 0(AX)
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX
0x1073492 48c1e61e SHLQ $0x1e, SI
0x1073496 4809f2 ORQ SI, DX
0x1073499 480fbaea3f BTSQ $0x3f, DX
0x107349e 488910 MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name old time/op new time/op delta
BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9)
Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9)
FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8)
FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10)
FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10)
FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10)
FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9)
FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10)
GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10)
Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10)
HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9)
JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10)
Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10)
GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10)
RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10)
RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9)
RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9)
RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9)
Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10)
Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10)
TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10)
TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10)
name old speed new speed delta
GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10)
Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10)
JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10)
GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10)
RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10)
RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9)
RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9)
RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9)
Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10)
Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-02-17 05:54:03 -07:00
|
|
|
/************************************
|
|
|
|
* 64-bit instructions
|
|
|
|
************************************/
|
|
|
|
|
|
|
|
func bitcheck64_constleft(a uint64) (n int) {
|
|
|
|
// amd64:"BTQ\t[$]63"
|
|
|
|
if a&(1<<63) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTQ\t[$]60"
|
|
|
|
if a&(1<<60) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]0"
|
|
|
|
if a&(1<<0) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcheck64_constright(a [8]uint64) (n int) {
|
|
|
|
// amd64:"BTQ\t[$]63"
|
|
|
|
if (a[0]>>63)&1 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTQ\t[$]63"
|
|
|
|
if a[1]>>63 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTQ\t[$]63"
|
|
|
|
if a[2]>>63 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTQ\t[$]60"
|
|
|
|
if (a[3]>>60)&1 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]1"
|
|
|
|
if (a[4]>>1)&1 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]0"
|
|
|
|
if (a[5]>>0)&1 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]7"
|
|
|
|
if (a[6]>>5)&4 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcheck64_var(a, b uint64) (n int) {
|
|
|
|
// amd64:"BTQ"
|
|
|
|
if a&(1<<(b&63)) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTQ",-"BT.\t[$]0"
|
|
|
|
if (b>>(a&63))&1 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcheck64_mask(a uint64) (n int) {
|
|
|
|
// amd64:"BTQ\t[$]63"
|
|
|
|
if a&0x8000000000000000 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTQ\t[$]59"
|
|
|
|
if a&0x800000000000000 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]0"
|
|
|
|
if a&0x1 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func biton64(a, b uint64) (n uint64) {
|
|
|
|
// amd64:"BTSQ"
|
|
|
|
n += b | (1 << (a & 63))
|
|
|
|
|
|
|
|
// amd64:"BTSQ\t[$]63"
|
|
|
|
n += a | (1 << 63)
|
|
|
|
|
|
|
|
// amd64:"BTSQ\t[$]60"
|
|
|
|
n += a | (1 << 60)
|
|
|
|
|
|
|
|
// amd64:"ORQ\t[$]1"
|
|
|
|
n += a | (1 << 0)
|
|
|
|
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitoff64(a, b uint64) (n uint64) {
|
|
|
|
// amd64:"BTRQ"
|
|
|
|
n += b &^ (1 << (a & 63))
|
|
|
|
|
|
|
|
// amd64:"BTRQ\t[$]63"
|
|
|
|
n += a &^ (1 << 63)
|
|
|
|
|
|
|
|
// amd64:"BTRQ\t[$]60"
|
|
|
|
n += a &^ (1 << 60)
|
|
|
|
|
|
|
|
// amd64:"ANDQ\t[$]-2"
|
|
|
|
n += a &^ (1 << 0)
|
|
|
|
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcompl64(a, b uint64) (n uint64) {
|
|
|
|
// amd64:"BTCQ"
|
|
|
|
n += b ^ (1 << (a & 63))
|
|
|
|
|
|
|
|
// amd64:"BTCQ\t[$]63"
|
|
|
|
n += a ^ (1 << 63)
|
|
|
|
|
|
|
|
// amd64:"BTCQ\t[$]60"
|
|
|
|
n += a ^ (1 << 60)
|
|
|
|
|
|
|
|
// amd64:"XORQ\t[$]1"
|
|
|
|
n += a ^ (1 << 0)
|
|
|
|
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
/************************************
|
|
|
|
* 32-bit instructions
|
|
|
|
************************************/
|
|
|
|
|
|
|
|
func bitcheck32_constleft(a uint32) (n int) {
|
|
|
|
// amd64:"BTL\t[$]31"
|
|
|
|
if a&(1<<31) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]28"
|
|
|
|
if a&(1<<28) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]0"
|
|
|
|
if a&(1<<0) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcheck32_constright(a [8]uint32) (n int) {
|
|
|
|
// amd64:"BTL\t[$]31"
|
|
|
|
if (a[0]>>31)&1 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]31"
|
|
|
|
if a[1]>>31 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]31"
|
|
|
|
if a[2]>>31 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]28"
|
|
|
|
if (a[3]>>28)&1 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]1"
|
|
|
|
if (a[4]>>1)&1 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]0"
|
|
|
|
if (a[5]>>0)&1 == 0 {
|
2018-02-26 17:59:58 -07:00
|
|
|
return 1
|
|
|
|
}
|
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
if t.wall&hasMonotonic != 0 {
0x1073465 488b08 MOVQ 0(AX), CX
0x1073468 4889ca MOVQ CX, DX
0x107346b 48c1e93f SHRQ $0x3f, CX
0x107346f 48c1e13f SHLQ $0x3f, CX
0x1073473 48f7c1ffffffff TESTQ $-0x1, CX
0x107347a 746b JE 0x10734e7
if t.wall&hasMonotonic != 0 {
0x1073435 488b08 MOVQ 0(AX), CX
0x1073438 480fbae13f BTQ $0x3f, CX
0x107343d 7363 JAE 0x10734a2
Another example:
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX
0x10734cf 48c1e61e SHLQ $0x1e, SI
0x10734d3 4809ce ORQ CX, SI
0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX
0x10734e0 4809f1 ORQ SI, CX
0x10734e3 488908 MOVQ CX, 0(AX)
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX
0x1073492 48c1e61e SHLQ $0x1e, SI
0x1073496 4809f2 ORQ SI, DX
0x1073499 480fbaea3f BTSQ $0x3f, DX
0x107349e 488910 MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name old time/op new time/op delta
BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9)
Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9)
FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8)
FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10)
FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10)
FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10)
FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9)
FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10)
GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10)
Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10)
HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9)
JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10)
Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10)
GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10)
RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10)
RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9)
RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9)
RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9)
Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10)
Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10)
TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10)
TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10)
name old speed new speed delta
GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10)
Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10)
JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10)
GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10)
RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10)
RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9)
RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9)
RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9)
Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10)
Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-02-17 05:54:03 -07:00
|
|
|
// amd64:"BTL\t[$]7"
|
|
|
|
if (a[6]>>5)&4 == 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcheck32_var(a, b uint32) (n int) {
|
|
|
|
// amd64:"BTL"
|
|
|
|
if a&(1<<(b&31)) != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL",-"BT.\t[$]0"
|
|
|
|
if (b>>(a&31))&1 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcheck32_mask(a uint32) (n int) {
|
|
|
|
// amd64:"BTL\t[$]31"
|
|
|
|
if a&0x80000000 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]27"
|
|
|
|
if a&0x8000000 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
// amd64:"BTL\t[$]0"
|
|
|
|
if a&0x1 != 0 {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func biton32(a, b uint32) (n uint32) {
|
|
|
|
// amd64:"BTSL"
|
|
|
|
n += b | (1 << (a & 31))
|
|
|
|
|
|
|
|
// amd64:"BTSL\t[$]31"
|
|
|
|
n += a | (1 << 31)
|
|
|
|
|
|
|
|
// amd64:"BTSL\t[$]28"
|
|
|
|
n += a | (1 << 28)
|
|
|
|
|
|
|
|
// amd64:"ORL\t[$]1"
|
|
|
|
n += a | (1 << 0)
|
|
|
|
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitoff32(a, b uint32) (n uint32) {
|
|
|
|
// amd64:"BTRL"
|
|
|
|
n += b &^ (1 << (a & 31))
|
|
|
|
|
|
|
|
// amd64:"BTRL\t[$]31"
|
|
|
|
n += a &^ (1 << 31)
|
|
|
|
|
|
|
|
// amd64:"BTRL\t[$]28"
|
|
|
|
n += a &^ (1 << 28)
|
|
|
|
|
|
|
|
// amd64:"ANDL\t[$]-2"
|
|
|
|
n += a &^ (1 << 0)
|
|
|
|
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitcompl32(a, b uint32) (n uint32) {
|
|
|
|
// amd64:"BTCL"
|
|
|
|
n += b ^ (1 << (a & 31))
|
|
|
|
|
|
|
|
// amd64:"BTCL\t[$]31"
|
|
|
|
n += a ^ (1 << 31)
|
|
|
|
|
|
|
|
// amd64:"BTCL\t[$]28"
|
|
|
|
n += a ^ (1 << 28)
|
|
|
|
|
|
|
|
// amd64:"XORL\t[$]1"
|
|
|
|
n += a ^ (1 << 0)
|
|
|
|
|
|
|
|
return n
|
2018-02-26 17:59:58 -07:00
|
|
|
}
|
2018-04-10 03:20:20 -06:00
|
|
|
|
2018-06-26 20:46:17 -06:00
|
|
|
// check direct operation on memory with constant source
|
|
|
|
func bitOpOnMem(a []uint32) {
|
|
|
|
// amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
|
|
|
|
a[0] &= 200
|
|
|
|
// amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
|
|
|
|
a[1] |= 220
|
|
|
|
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
|
|
|
|
a[2] ^= 240
|
2018-09-16 20:05:22 -06:00
|
|
|
// amd64:`BTRL\s[$]15,\s12\([A-Z]+\)`,-`ANDL`
|
|
|
|
a[3] &= 0xffff7fff
|
|
|
|
// amd64:`BTSL\s[$]14,\s16\([A-Z]+\)`,-`ORL`
|
|
|
|
a[4] |= 0x4000
|
|
|
|
// amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL`
|
|
|
|
a[5] ^= 0x2000
|
2018-06-26 20:46:17 -06:00
|
|
|
}
|
|
|
|
|
2018-04-10 03:20:20 -06:00
|
|
|
// Check AND masking on arm64 (Issue #19857)
|
|
|
|
|
|
|
|
func and_mask_1(a uint64) uint64 {
|
|
|
|
// arm64:`AND\t`
|
|
|
|
return a & ((1 << 63) - 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
func and_mask_2(a uint64) uint64 {
|
|
|
|
// arm64:`AND\t`
|
|
|
|
return a & (1 << 63)
|
|
|
|
}
|
|
|
|
|
2018-09-10 02:29:52 -06:00
|
|
|
func and_mask_3(a, b uint32) (uint32, uint32) {
|
2018-07-10 19:30:32 -06:00
|
|
|
// arm/7:`BIC`,-`AND`
|
2018-09-10 02:29:52 -06:00
|
|
|
a &= 0xffffaaaa
|
|
|
|
// arm/7:`BFC`,-`AND`,-`BIC`
|
|
|
|
b &= 0xffc003ff
|
|
|
|
return a, b
|
2018-07-10 19:30:32 -06:00
|
|
|
}
|
|
|
|
|
2018-04-10 03:20:20 -06:00
|
|
|
// Check generation of arm64 BIC/EON/ORN instructions
|
|
|
|
|
|
|
|
func op_bic(x, y uint32) uint32 {
|
|
|
|
// arm64:`BIC\t`,-`AND`
|
|
|
|
return x &^ y
|
|
|
|
}
|
|
|
|
|
|
|
|
func op_eon(x, y uint32) uint32 {
|
|
|
|
// arm64:`EON\t`,-`XOR`
|
|
|
|
return x ^ ^y
|
|
|
|
}
|
|
|
|
|
|
|
|
func op_orn(x, y uint32) uint32 {
|
|
|
|
// arm64:`ORN\t`,-`ORR`
|
|
|
|
return x | ^y
|
|
|
|
}
|
2019-05-08 04:02:23 -06:00
|
|
|
|
|
|
|
// check bitsets
|
|
|
|
func bitSetPowerOf2Test(x int) bool {
|
|
|
|
// amd64:"BTL\t[$]3"
|
|
|
|
return x&8 == 8
|
|
|
|
}
|
|
|
|
|
|
|
|
func bitSetTest(x int) bool {
|
|
|
|
// amd64:"ANDQ\t[$]9, AX"
|
|
|
|
// amd64:"CMPQ\tAX, [$]9"
|
|
|
|
return x&9 == 9
|
|
|
|
}
|