mirror of
https://github.com/golang/go
synced 2024-11-26 18:26:48 -07:00
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support for BT(S|R|C)(Q|L). Example of code changes from time.(*Time).addSec: if t.wall&hasMonotonic != 0 { 0x1073465 488b08 MOVQ 0(AX), CX 0x1073468 4889ca MOVQ CX, DX 0x107346b 48c1e93f SHRQ $0x3f, CX 0x107346f 48c1e13f SHLQ $0x3f, CX 0x1073473 48f7c1ffffffff TESTQ $-0x1, CX 0x107347a 746b JE 0x10734e7 if t.wall&hasMonotonic != 0 { 0x1073435 488b08 MOVQ 0(AX), CX 0x1073438 480fbae13f BTQ $0x3f, CX 0x107343d 7363 JAE 0x10734a2 Another example: t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic 0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX 0x10734cf 48c1e61e SHLQ $0x1e, SI 0x10734d3 4809ce ORQ CX, SI 0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX 0x10734e0 4809f1 ORQ SI, CX 0x10734e3 488908 MOVQ CX, 0(AX) t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic 0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX 0x1073492 48c1e61e SHLQ $0x1e, SI 0x1073496 4809f2 ORQ SI, DX 0x1073499 480fbaea3f BTSQ $0x3f, DX 0x107349e 488910 MOVQ DX, 0(AX) Go1 benchmarks seem unaffected, and I would be surprised otherwise: name old time/op new time/op delta BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9) Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9) FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8) FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10) FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10) FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9) FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10) FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9) FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10) GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10) GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10) Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10) Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10) HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9) JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10) JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10) Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10) GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10) RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10) RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10) RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10) RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9) RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9) RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10) RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10) RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9) Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10) Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10) TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10) TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10) name old speed new speed delta GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10) GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10) Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10) Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10) JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10) JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10) GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10) RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10) RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10) RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10) RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9) RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9) RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10) RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10) RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9) Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10) Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10) The rules match ~1800 times during all.bash. Fixes #18943 Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4 Reviewed-on: https://go-review.googlesource.com/94766 Run-TryBot: Giovanni Bajo <rasky@develer.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
3afd2d7fc8
commit
79112707bb
@ -194,7 +194,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
|
||||
ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
|
||||
ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
|
||||
ssa.OpAMD64PXOR:
|
||||
ssa.OpAMD64PXOR,
|
||||
ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
|
||||
ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
|
||||
ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
|
||||
r := v.Reg()
|
||||
if r != v.Args[0].Reg() {
|
||||
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
||||
@ -573,7 +576,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p.From.Offset = v.AuxInt
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
|
||||
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
|
||||
ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
|
||||
ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
|
||||
ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = v.AuxInt
|
||||
|
@ -279,45 +279,6 @@ var linuxAMD64Tests = []*asmTest{
|
||||
`,
|
||||
pos: []string{"\tSHLQ\t\\$5,", "\tLEAQ\t\\(.*\\)\\(.*\\*2\\),"},
|
||||
},
|
||||
// Bit test ops on amd64, issue 18943.
|
||||
{
|
||||
fn: `
|
||||
func f37(a, b uint64) int {
|
||||
if a&(1<<(b&63)) != 0 {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tBTQ\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func f38(a, b uint64) bool {
|
||||
return a&(1<<(b&63)) != 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tBTQ\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func f39(a uint64) int {
|
||||
if a&(1<<60) != 0 {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tBTQ\t\\$60"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func f40(a uint64) bool {
|
||||
return a&(1<<60) != 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tBTQ\t\\$60"},
|
||||
},
|
||||
// see issue 19595.
|
||||
// We want to merge load+op in f58, but not in f59.
|
||||
{
|
||||
|
@ -643,8 +643,10 @@
|
||||
(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
|
||||
|
||||
// Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
|
||||
// Note that ULT and SETB check the carry flag; they are identical to CS and SETCS.
|
||||
// Same, mutatis mutandis, for UGE and SETAE, and CC and SETCC.
|
||||
// Note that BTx instructions use the carry bit, so we need to convert tests for zero flag
|
||||
// into tests for carry flags.
|
||||
// ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis
|
||||
// mutandis, for UGE and SETAE, and CC and SETCC.
|
||||
((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTL x y))
|
||||
((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTQ x y))
|
||||
((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl
|
||||
@ -673,6 +675,94 @@
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
|
||||
|
||||
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
|
||||
// and further combining shifts.
|
||||
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x)
|
||||
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x)
|
||||
(BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x)
|
||||
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x)
|
||||
(BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x)
|
||||
(BTLconst [0] s:(SHRL x y)) -> (BTL y x)
|
||||
|
||||
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
|
||||
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTS(Q|L) x y)
|
||||
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTC(Q|L) x y)
|
||||
|
||||
// Convert ORconst into BTS, if the code gets smaller, with boundary being
|
||||
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
|
||||
((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
|
||||
-> (BT(S|C)Qconst [log2(c)] x)
|
||||
((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
|
||||
-> (BT(S|C)Lconst [log2uint32(c)] x)
|
||||
((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
|
||||
-> (BT(S|C)Qconst [log2(c)] x)
|
||||
((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
|
||||
-> (BT(S|C)Lconst [log2uint32(c)] x)
|
||||
|
||||
// Recognize bit clearing: a &^= 1<<b
|
||||
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) && !config.nacl -> (BTR(Q|L) x y)
|
||||
(ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
|
||||
-> (BTRQconst [log2(^c)] x)
|
||||
(ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
|
||||
-> (BTRLconst [log2uint32(^c)] x)
|
||||
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
|
||||
-> (BTRQconst [log2(^c)] x)
|
||||
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
|
||||
-> (BTRLconst [log2uint32(^c)] x)
|
||||
|
||||
// Special-case bit patterns on first/last bit.
|
||||
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
|
||||
// for instance:
|
||||
// x & 0xFFFF0000 -> (x >> 16) << 16
|
||||
// x & 0x80000000 -> (x >> 31) << 31
|
||||
//
|
||||
// In case the mask is just one bit (like second example above), it conflicts
|
||||
// with the above rules to detect bit-testing / bit-clearing of first/last bit.
|
||||
// We thus special-case them, by detecting the shift patterns.
|
||||
|
||||
// Special case resetting first/last bit
|
||||
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) && !config.nacl
|
||||
-> (BTR(L|Q)const [0] x)
|
||||
(SHRLconst [1] (SHLLconst [1] x)) && !config.nacl
|
||||
-> (BTRLconst [31] x)
|
||||
(SHRQconst [1] (SHLQconst [1] x)) && !config.nacl
|
||||
-> (BTRQconst [63] x)
|
||||
|
||||
// Special case testing first/last bit (with double-shift generated by generic.rules)
|
||||
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 && !config.nacl
|
||||
-> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
|
||||
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 && !config.nacl
|
||||
-> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [63] x) mem)
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTLconst [31] x) mem)
|
||||
|
||||
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 && !config.nacl
|
||||
-> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x))
|
||||
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 && !config.nacl
|
||||
-> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x))
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [0] x) mem)
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTLconst [0] x) mem)
|
||||
|
||||
// Special-case manually testing last bit with "a>>63 != 0" (without "&1")
|
||||
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 && !config.nacl
|
||||
-> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
|
||||
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 && !config.nacl
|
||||
-> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [63] x) mem)
|
||||
(SET(NE|EQ)mem [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 && !config.nacl
|
||||
-> (SET(B|AE)mem [off] {sym} ptr (BTLconst [31] x) mem)
|
||||
|
||||
// Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
|
||||
(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
|
||||
(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
|
||||
(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
|
||||
(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
|
||||
|
||||
// Fold boolean negation into SETcc.
|
||||
(XORLconst [1] (SETNE x)) -> (SETEQ x)
|
||||
(XORLconst [1] (SETEQ x)) -> (SETNE x)
|
||||
|
@ -264,10 +264,22 @@ func init() {
|
||||
{name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32
|
||||
{name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64
|
||||
|
||||
{name: "BTL", argLength: 2, reg: gp2flags, asm: "BTL", typ: "Flags"}, // test whether bit arg0 % 32 in arg1 is set
|
||||
{name: "BTQ", argLength: 2, reg: gp2flags, asm: "BTQ", typ: "Flags"}, // test whether bit arg0 % 64 in arg1 is set
|
||||
{name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
|
||||
{name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
|
||||
{name: "BTL", argLength: 2, reg: gp2flags, asm: "BTL", typ: "Flags"}, // test whether bit arg0 % 32 in arg1 is set
|
||||
{name: "BTQ", argLength: 2, reg: gp2flags, asm: "BTQ", typ: "Flags"}, // test whether bit arg0 % 64 in arg1 is set
|
||||
{name: "BTCL", argLength: 2, reg: gp21, asm: "BTCL", resultInArg0: true, clobberFlags: true}, // complement bit arg0 % 32 in arg1
|
||||
{name: "BTCQ", argLength: 2, reg: gp21, asm: "BTCQ", resultInArg0: true, clobberFlags: true}, // complement bit arg0 % 64 in arg1
|
||||
{name: "BTRL", argLength: 2, reg: gp21, asm: "BTRL", resultInArg0: true, clobberFlags: true}, // reset bit arg0 % 32 in arg1
|
||||
{name: "BTRQ", argLength: 2, reg: gp21, asm: "BTRQ", resultInArg0: true, clobberFlags: true}, // reset bit arg0 % 64 in arg1
|
||||
{name: "BTSL", argLength: 2, reg: gp21, asm: "BTSL", resultInArg0: true, clobberFlags: true}, // set bit arg0 % 32 in arg1
|
||||
{name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg0 % 64 in arg1
|
||||
{name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
|
||||
{name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
|
||||
{name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32
|
||||
{name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64
|
||||
{name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32
|
||||
{name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64
|
||||
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
|
||||
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
|
||||
|
||||
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
|
||||
{name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
|
||||
|
@ -499,8 +499,20 @@ const (
|
||||
OpAMD64UCOMISD
|
||||
OpAMD64BTL
|
||||
OpAMD64BTQ
|
||||
OpAMD64BTCL
|
||||
OpAMD64BTCQ
|
||||
OpAMD64BTRL
|
||||
OpAMD64BTRQ
|
||||
OpAMD64BTSL
|
||||
OpAMD64BTSQ
|
||||
OpAMD64BTLconst
|
||||
OpAMD64BTQconst
|
||||
OpAMD64BTCLconst
|
||||
OpAMD64BTCQconst
|
||||
OpAMD64BTRLconst
|
||||
OpAMD64BTRQconst
|
||||
OpAMD64BTSLconst
|
||||
OpAMD64BTSQconst
|
||||
OpAMD64TESTQ
|
||||
OpAMD64TESTL
|
||||
OpAMD64TESTW
|
||||
@ -5901,6 +5913,102 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTCL",
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTCL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTCQ",
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTCQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTRL",
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTRL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTRQ",
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTRQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTSL",
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTSL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTSQ",
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTSQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTLconst",
|
||||
auxType: auxInt8,
|
||||
@ -5923,6 +6031,102 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTCLconst",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTCL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTCQconst",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTCQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTRLconst",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTRL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTRQconst",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTRQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTSLconst",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTSL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BTSQconst",
|
||||
auxType: auxInt8,
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
clobberFlags: true,
|
||||
asm: x86.ABTSQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "TESTQ",
|
||||
argLen: 2,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,9 +6,258 @@
|
||||
|
||||
package codegen
|
||||
|
||||
func bitcheck(a, b uint64) int {
|
||||
if a&(1<<(b&63)) != 0 { // amd64:"BTQ"
|
||||
/************************************
|
||||
* 64-bit instructions
|
||||
************************************/
|
||||
|
||||
func bitcheck64_constleft(a uint64) (n int) {
|
||||
// amd64:"BTQ\t[$]63"
|
||||
if a&(1<<63) != 0 {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
// amd64:"BTQ\t[$]60"
|
||||
if a&(1<<60) != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]0"
|
||||
if a&(1<<0) != 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func bitcheck64_constright(a [8]uint64) (n int) {
|
||||
// amd64:"BTQ\t[$]63"
|
||||
if (a[0]>>63)&1 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTQ\t[$]63"
|
||||
if a[1]>>63 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTQ\t[$]63"
|
||||
if a[2]>>63 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTQ\t[$]60"
|
||||
if (a[3]>>60)&1 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]1"
|
||||
if (a[4]>>1)&1 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]0"
|
||||
if (a[5]>>0)&1 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]7"
|
||||
if (a[6]>>5)&4 == 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func bitcheck64_var(a, b uint64) (n int) {
|
||||
// amd64:"BTQ"
|
||||
if a&(1<<(b&63)) != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTQ",-"BT.\t[$]0"
|
||||
if (b>>(a&63))&1 != 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func bitcheck64_mask(a uint64) (n int) {
|
||||
// amd64:"BTQ\t[$]63"
|
||||
if a&0x8000000000000000 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTQ\t[$]59"
|
||||
if a&0x800000000000000 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]0"
|
||||
if a&0x1 != 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func biton64(a, b uint64) (n uint64) {
|
||||
// amd64:"BTSQ"
|
||||
n += b | (1 << (a & 63))
|
||||
|
||||
// amd64:"BTSQ\t[$]63"
|
||||
n += a | (1 << 63)
|
||||
|
||||
// amd64:"BTSQ\t[$]60"
|
||||
n += a | (1 << 60)
|
||||
|
||||
// amd64:"ORQ\t[$]1"
|
||||
n += a | (1 << 0)
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func bitoff64(a, b uint64) (n uint64) {
|
||||
// amd64:"BTRQ"
|
||||
n += b &^ (1 << (a & 63))
|
||||
|
||||
// amd64:"BTRQ\t[$]63"
|
||||
n += a &^ (1 << 63)
|
||||
|
||||
// amd64:"BTRQ\t[$]60"
|
||||
n += a &^ (1 << 60)
|
||||
|
||||
// amd64:"ANDQ\t[$]-2"
|
||||
n += a &^ (1 << 0)
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func bitcompl64(a, b uint64) (n uint64) {
|
||||
// amd64:"BTCQ"
|
||||
n += b ^ (1 << (a & 63))
|
||||
|
||||
// amd64:"BTCQ\t[$]63"
|
||||
n += a ^ (1 << 63)
|
||||
|
||||
// amd64:"BTCQ\t[$]60"
|
||||
n += a ^ (1 << 60)
|
||||
|
||||
// amd64:"XORQ\t[$]1"
|
||||
n += a ^ (1 << 0)
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
/************************************
|
||||
* 32-bit instructions
|
||||
************************************/
|
||||
|
||||
func bitcheck32_constleft(a uint32) (n int) {
|
||||
// amd64:"BTL\t[$]31"
|
||||
if a&(1<<31) != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]28"
|
||||
if a&(1<<28) != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]0"
|
||||
if a&(1<<0) != 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func bitcheck32_constright(a [8]uint32) (n int) {
|
||||
// amd64:"BTL\t[$]31"
|
||||
if (a[0]>>31)&1 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]31"
|
||||
if a[1]>>31 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]31"
|
||||
if a[2]>>31 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]28"
|
||||
if (a[3]>>28)&1 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]1"
|
||||
if (a[4]>>1)&1 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]0"
|
||||
if (a[5]>>0)&1 == 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]7"
|
||||
if (a[6]>>5)&4 == 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func bitcheck32_var(a, b uint32) (n int) {
|
||||
// amd64:"BTL"
|
||||
if a&(1<<(b&31)) != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL",-"BT.\t[$]0"
|
||||
if (b>>(a&31))&1 != 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func bitcheck32_mask(a uint32) (n int) {
|
||||
// amd64:"BTL\t[$]31"
|
||||
if a&0x80000000 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]27"
|
||||
if a&0x8000000 != 0 {
|
||||
return 1
|
||||
}
|
||||
// amd64:"BTL\t[$]0"
|
||||
if a&0x1 != 0 {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func biton32(a, b uint32) (n uint32) {
|
||||
// amd64:"BTSL"
|
||||
n += b | (1 << (a & 31))
|
||||
|
||||
// amd64:"BTSL\t[$]31"
|
||||
n += a | (1 << 31)
|
||||
|
||||
// amd64:"BTSL\t[$]28"
|
||||
n += a | (1 << 28)
|
||||
|
||||
// amd64:"ORL\t[$]1"
|
||||
n += a | (1 << 0)
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func bitoff32(a, b uint32) (n uint32) {
|
||||
// amd64:"BTRL"
|
||||
n += b &^ (1 << (a & 31))
|
||||
|
||||
// amd64:"BTRL\t[$]31"
|
||||
n += a &^ (1 << 31)
|
||||
|
||||
// amd64:"BTRL\t[$]28"
|
||||
n += a &^ (1 << 28)
|
||||
|
||||
// amd64:"ANDL\t[$]-2"
|
||||
n += a &^ (1 << 0)
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func bitcompl32(a, b uint32) (n uint32) {
|
||||
// amd64:"BTCL"
|
||||
n += b ^ (1 << (a & 31))
|
||||
|
||||
// amd64:"BTCL\t[$]31"
|
||||
n += a ^ (1 << 31)
|
||||
|
||||
// amd64:"BTCL\t[$]28"
|
||||
n += a ^ (1 << 28)
|
||||
|
||||
// amd64:"XORL\t[$]1"
|
||||
n += a ^ (1 << 0)
|
||||
|
||||
return n
|
||||
}
|
||||
|
@ -41,12 +41,12 @@ func sqrt(x float64) float64 {
|
||||
|
||||
// Check that it's using integer registers
|
||||
func abs(x, y float64) {
|
||||
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1,"
|
||||
// amd64:"BTRQ\t[$]63"
|
||||
// s390x:"LPDFR\t",-"MOVD\t" (no integer load/store)
|
||||
// ppc64le:"FABS\t"
|
||||
sink64[0] = math.Abs(x)
|
||||
|
||||
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1,"
|
||||
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
||||
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
|
||||
// ppc64le:"FNABS\t"
|
||||
sink64[1] = -math.Abs(y)
|
||||
@ -60,12 +60,12 @@ func abs32(x float32) float32 {
|
||||
|
||||
// Check that it's using integer registers
|
||||
func copysign(a, b, c float64) {
|
||||
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1","SHRQ\t[$]63","SHLQ\t[$]63","ORQ"
|
||||
// amd64:"BTRQ\t[$]63","SHRQ\t[$]63","SHLQ\t[$]63","ORQ"
|
||||
// s390x:"CPSDR",-"MOVD" (no integer load/store)
|
||||
// ppc64le:"FCPSGN"
|
||||
sink64[0] = math.Copysign(a, b)
|
||||
|
||||
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1",-"SHRQ\t[$]63",-"SHLQ\t[$]63","ORQ"
|
||||
// amd64:"BTSQ\t[$]63"
|
||||
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
|
||||
// ppc64le:"FCPSGN"
|
||||
sink64[1] = math.Copysign(c, -1)
|
||||
|
@ -199,19 +199,19 @@ func TrailingZeros64(n uint64) int {
|
||||
}
|
||||
|
||||
func TrailingZeros32(n uint32) int {
|
||||
// amd64:"MOVQ\t\\$4294967296","ORQ\t[^$]","BSFQ"
|
||||
// amd64:"BTSQ\\t\\$32","BSFQ"
|
||||
// s390x:"FLOGR","MOVWZ"
|
||||
return bits.TrailingZeros32(n)
|
||||
}
|
||||
|
||||
func TrailingZeros16(n uint16) int {
|
||||
// amd64:"BSFQ","ORQ\t\\$65536"
|
||||
// amd64:"BSFQ","BTSQ\\t\\$16"
|
||||
// s390x:"FLOGR","OR\t\\$65536"
|
||||
return bits.TrailingZeros16(n)
|
||||
}
|
||||
|
||||
func TrailingZeros8(n uint8) int {
|
||||
// amd64:"BSFQ","ORQ\t\\$256"
|
||||
// amd64:"BSFQ","BTSQ\\t\\$8"
|
||||
// s390x:"FLOGR","OR\t\\$256"
|
||||
return bits.TrailingZeros8(n)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user