mirror of
https://github.com/golang/go
synced 2024-11-16 20:14:48 -07:00
cmd/compile: add SHLX&SHRX without load
Change-Id: I79eb5e7d6bcb23f26d3a100e915efff6dae70391 Reviewed-on: https://go-review.googlesource.com/c/go/+/399061 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
517781b391
commit
66f03f79da
@ -282,7 +282,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.To.Reg = v.Reg()
|
||||
p.SetFrom3Reg(v.Args[1].Reg())
|
||||
|
||||
case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ:
|
||||
case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
|
||||
ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
|
||||
ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
|
||||
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
|
||||
p.SetFrom3Reg(v.Args[0].Reg())
|
||||
|
||||
|
@ -206,8 +206,10 @@
|
||||
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
|
||||
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
|
||||
|
||||
// Prefer SARX instruction because it has less register restriction on the shift input.
|
||||
// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input.
|
||||
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
|
||||
(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
|
||||
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
|
||||
|
||||
// Lowering integer comparisons
|
||||
(Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
|
||||
@ -593,6 +595,8 @@
|
||||
// mutandis, for UGE and SETAE, and CC and SETCC.
|
||||
((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
|
||||
((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
|
||||
((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
|
||||
((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
|
||||
((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
|
||||
=> ((ULT|UGE) (BTLconst [int8(log32(c))] x))
|
||||
((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
|
||||
@ -601,6 +605,8 @@
|
||||
=> ((ULT|UGE) (BTQconst [int8(log64(c))] x))
|
||||
(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
|
||||
(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
|
||||
(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
|
||||
(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
|
||||
(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
|
||||
=> (SET(B|AE) (BTLconst [int8(log32(c))] x))
|
||||
(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
|
||||
@ -612,6 +618,10 @@
|
||||
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
|
||||
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
|
||||
=> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
|
||||
(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
|
||||
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
|
||||
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
|
||||
=> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
|
||||
(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
|
||||
=> (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
|
||||
(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
|
||||
@ -624,9 +634,10 @@
|
||||
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
|
||||
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
|
||||
(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
|
||||
(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x)
|
||||
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
|
||||
(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
|
||||
(BTLconst [0] s:(SHRL x y)) => (BTL y x)
|
||||
(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
|
||||
|
||||
// Rewrite a & 1 != 1 into a & 1 == 0.
|
||||
// Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
|
||||
@ -638,6 +649,8 @@
|
||||
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
|
||||
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
|
||||
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
|
||||
(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
|
||||
(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
|
||||
|
||||
// Convert ORconst into BTS, if the code gets smaller, with boundary being
|
||||
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
|
||||
@ -653,6 +666,8 @@
|
||||
// Recognize bit clearing: a &^= 1<<b
|
||||
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
|
||||
(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
|
||||
(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
|
||||
(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
|
||||
(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
|
||||
=> (BTRQconst [int8(log32(^c))] x)
|
||||
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
|
||||
@ -794,6 +809,8 @@
|
||||
|
||||
(SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
|
||||
(SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
|
||||
(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
|
||||
(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
|
||||
|
||||
(SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
|
||||
(SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
|
||||
@ -801,6 +818,8 @@
|
||||
(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
|
||||
(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
|
||||
(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
|
||||
(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
|
||||
(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
|
||||
|
||||
(SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
|
||||
(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
|
||||
@ -810,25 +829,25 @@
|
||||
(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
|
||||
|
||||
// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
|
||||
|
||||
((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
|
||||
((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
|
||||
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
|
||||
|
||||
((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
|
||||
((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
|
||||
|
||||
// Constant rotate instructions
|
||||
((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
|
||||
@ -860,9 +879,13 @@
|
||||
// it in order to strip it out.
|
||||
(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
|
||||
(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
|
||||
(ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
|
||||
(ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
|
||||
|
||||
(ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
|
||||
(ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
|
||||
(ORL (SHLXL x y) (ANDL (SHRXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
|
||||
(ORL (SHRXL x y) (ANDL (SHLXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
|
||||
|
||||
// Help with rotate detection
|
||||
(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT)
|
||||
@ -877,6 +900,15 @@
|
||||
(SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
|
||||
&& v.Type.Size() == 2
|
||||
=> (RORW x y)
|
||||
(ORL (SHLXL x (AND(Q|L)const y [15]))
|
||||
(ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
|
||||
(SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16]))))
|
||||
&& v.Type.Size() == 2
|
||||
=> (ROLW x y)
|
||||
(ORL (SHRW x (AND(Q|L)const y [15]))
|
||||
(SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
|
||||
&& v.Type.Size() == 2
|
||||
=> (RORW x y)
|
||||
|
||||
(ORL (SHLL x (AND(Q|L)const y [ 7]))
|
||||
(ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
|
||||
@ -887,6 +919,15 @@
|
||||
(SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
|
||||
&& v.Type.Size() == 1
|
||||
=> (RORB x y)
|
||||
(ORL (SHLXL x (AND(Q|L)const y [ 7]))
|
||||
(ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
|
||||
(SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
|
||||
&& v.Type.Size() == 1
|
||||
=> (ROLB x y)
|
||||
(ORL (SHRB x (AND(Q|L)const y [ 7]))
|
||||
(SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
|
||||
&& v.Type.Size() == 1
|
||||
=> (RORB x y)
|
||||
|
||||
// rotate left negative = rotate right
|
||||
(ROLQ x (NEG(Q|L) y)) => (RORQ x y)
|
||||
@ -920,6 +961,7 @@
|
||||
|
||||
// Multi-register shifts
|
||||
(ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
|
||||
(ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
|
||||
|
||||
// Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
|
||||
// because the x86 instructions are defined to use all 5 bits of the shift even
|
||||
@ -2257,5 +2299,5 @@
|
||||
=> @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
|
||||
|
||||
(SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
|
||||
|
@ -955,6 +955,10 @@ func init() {
|
||||
// CPUID feature: BMI2.
|
||||
{name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
|
||||
{name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
|
||||
{name: "SHLXQ", argLength: 2, reg: gp21, asm: "SHLXQ"}, // arg0 << arg1, shift amount is mod 64
|
||||
{name: "SHLXL", argLength: 2, reg: gp21, asm: "SHLXL"}, // arg0 << arg1, shift amount is mod 32
|
||||
{name: "SHRXQ", argLength: 2, reg: gp21, asm: "SHRXQ"}, // unsigned arg0 >> arg1, shift amount is mod 64
|
||||
{name: "SHRXL", argLength: 2, reg: gp21, asm: "SHRXL"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32
|
||||
|
||||
{name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
|
||||
{name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64
|
||||
|
@ -1064,6 +1064,10 @@ const (
|
||||
OpAMD64MOVBEQstoreidx8
|
||||
OpAMD64SARXQ
|
||||
OpAMD64SARXL
|
||||
OpAMD64SHLXQ
|
||||
OpAMD64SHLXL
|
||||
OpAMD64SHRXQ
|
||||
OpAMD64SHRXL
|
||||
OpAMD64SARXLload
|
||||
OpAMD64SARXQload
|
||||
OpAMD64SHLXLload
|
||||
@ -14154,6 +14158,62 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXQ",
|
||||
argLen: 2,
|
||||
asm: x86.ASHLXQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXL",
|
||||
argLen: 2,
|
||||
asm: x86.ASHLXL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXQ",
|
||||
argLen: 2,
|
||||
asm: x86.ASHRXQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXL",
|
||||
argLen: 2,
|
||||
asm: x86.ASHRXL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SARXLload",
|
||||
auxType: auxSymOff,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -72,7 +72,23 @@ func sarx32_load(x []int32, i int) int32 {
|
||||
return s
|
||||
}
|
||||
|
||||
func shlrx64(x []uint64, i int, s uint64) uint64 {
|
||||
func shlrx64(x, y uint64) uint64 {
|
||||
// amd64/v3:"SHRXQ"
|
||||
s := x >> y
|
||||
// amd64/v3:"SHLXQ"
|
||||
s = s << y
|
||||
return s
|
||||
}
|
||||
|
||||
func shlrx32(x, y uint32) uint32 {
|
||||
// amd64/v3:"SHRXL"
|
||||
s := x >> y
|
||||
// amd64/v3:"SHLXL"
|
||||
s = s << y
|
||||
return s
|
||||
}
|
||||
|
||||
func shlrx64_load(x []uint64, i int, s uint64) uint64 {
|
||||
// amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
s = x[i] >> i
|
||||
// amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
@ -80,7 +96,7 @@ func shlrx64(x []uint64, i int, s uint64) uint64 {
|
||||
return s
|
||||
}
|
||||
|
||||
func shlrx32(x []uint32, i int, s uint32) uint32 {
|
||||
func shlrx32_load(x []uint32, i int, s uint32) uint32 {
|
||||
// amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
s = x[i] >> i
|
||||
// amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
|
Loading…
Reference in New Issue
Block a user