mirror of
https://github.com/golang/go
synced 2024-11-11 19:21:37 -07:00
cmd/compile: add SARX instruction for GOAMD64>=3
name old time/op new time/op delta ShiftArithmeticRight-8 0.68ns ± 5% 0.30ns ± 6% -56.14% (p=0.000 n=10+10) Change-Id: I052a0d7b9e6526d526276444e588b0cc288beff4 Reviewed-on: https://go-review.googlesource.com/c/go/+/399055 Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
2b31abc528
commit
d6320f1a58
@ -282,6 +282,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.To.Reg = v.Reg()
|
||||
p.SetFrom3Reg(v.Args[1].Reg())
|
||||
|
||||
case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ:
|
||||
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
|
||||
p.SetFrom3Reg(v.Args[0].Reg())
|
||||
|
||||
case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
|
||||
ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload:
|
||||
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
|
||||
|
@ -239,6 +239,7 @@ var featureToOpcodes = map[string][]string{
|
||||
// native objdump doesn't include [QL] on linux.
|
||||
"popcnt": {"popcntq", "popcntl", "popcnt"},
|
||||
"bmi1": {"andnq", "andnl", "andn", "blsiq", "blsil", "blsi", "blsmskq", "blsmskl", "blsmsk", "blsrq", "blsrl", "blsr", "tzcntq", "tzcntl", "tzcnt"},
|
||||
"bmi2": {"sarxq", "sarxl", "sarx", "shlxq", "shlxl", "shlx", "shrxq", "shrxl", "shrx"},
|
||||
"sse41": {"roundsd"},
|
||||
"fma": {"vfmadd231sd"},
|
||||
"movbe": {"movbeqq", "movbeq", "movbell", "movbel", "movbe"},
|
||||
|
@ -206,6 +206,9 @@
|
||||
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
|
||||
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
|
||||
|
||||
// Prefer SARX instruction because it has less register restriction on the shift input.
|
||||
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
|
||||
|
||||
// Lowering integer comparisons
|
||||
(Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
|
||||
(Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y))
|
||||
@ -803,28 +806,29 @@
|
||||
(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
|
||||
(SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
|
||||
(SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
|
||||
|
||||
(SARXQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
|
||||
(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
|
||||
|
||||
// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
|
||||
((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
|
||||
((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
|
||||
((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
|
||||
((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
|
||||
|
||||
((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
|
||||
((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
|
||||
((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
|
||||
((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
|
||||
((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
|
||||
((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
|
||||
|
||||
((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
|
||||
((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
|
||||
((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
|
||||
((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
|
||||
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
|
||||
|
||||
((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
|
||||
((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
|
||||
((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
|
||||
((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
|
||||
((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
|
||||
((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
|
||||
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
|
||||
|
||||
// Constant rotate instructions
|
||||
((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
|
||||
|
@ -953,6 +953,9 @@ func init() {
|
||||
{name: "MOVBEQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
|
||||
|
||||
// CPUID feature: BMI2.
|
||||
{name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
|
||||
{name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
|
||||
|
||||
{name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32
|
||||
{name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64
|
||||
{name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
|
||||
|
@ -1062,6 +1062,8 @@ const (
|
||||
OpAMD64MOVBELstoreidx8
|
||||
OpAMD64MOVBEQstoreidx1
|
||||
OpAMD64MOVBEQstoreidx8
|
||||
OpAMD64SARXQ
|
||||
OpAMD64SARXL
|
||||
OpAMD64SHLXLload
|
||||
OpAMD64SHLXQload
|
||||
OpAMD64SHRXLload
|
||||
@ -14117,6 +14119,34 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SARXQ",
|
||||
argLen: 2,
|
||||
asm: x86.ASARXQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SARXL",
|
||||
argLen: 2,
|
||||
asm: x86.ASARXL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXLload",
|
||||
auxType: auxSymOff,
|
||||
|
@ -382,6 +382,10 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64SARW(v)
|
||||
case OpAMD64SARWconst:
|
||||
return rewriteValueAMD64_OpAMD64SARWconst(v)
|
||||
case OpAMD64SARXL:
|
||||
return rewriteValueAMD64_OpAMD64SARXL(v)
|
||||
case OpAMD64SARXQ:
|
||||
return rewriteValueAMD64_OpAMD64SARXQ(v)
|
||||
case OpAMD64SBBLcarrymask:
|
||||
return rewriteValueAMD64_OpAMD64SBBLcarrymask(v)
|
||||
case OpAMD64SBBQ:
|
||||
@ -19844,6 +19848,19 @@ func rewriteValueAMD64_OpAMD64SARL(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (SARL x y)
|
||||
// cond: buildcfg.GOAMD64 >= 3
|
||||
// result: (SARXL x y)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
if !(buildcfg.GOAMD64 >= 3) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARL x (MOVQconst [c]))
|
||||
// result: (SARLconst [int8(c&31)] x)
|
||||
for {
|
||||
@ -20066,6 +20083,19 @@ func rewriteValueAMD64_OpAMD64SARQ(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (SARQ x y)
|
||||
// cond: buildcfg.GOAMD64 >= 3
|
||||
// result: (SARXQ x y)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
if !(buildcfg.GOAMD64 >= 3) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARQ x (MOVQconst [c]))
|
||||
// result: (SARQconst [int8(c&63)] x)
|
||||
for {
|
||||
@ -20341,6 +20371,398 @@ func rewriteValueAMD64_OpAMD64SARWconst(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SARXL(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (SARXL x (MOVQconst [c]))
|
||||
// result: (SARLconst [int8(c&31)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64SARLconst)
|
||||
v.AuxInt = int8ToAuxInt(int8(c & 31))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (MOVLconst [c]))
|
||||
// result: (SARLconst [int8(c&31)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
v.reset(OpAMD64SARLconst)
|
||||
v.AuxInt = int8ToAuxInt(int8(c & 31))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (ADDQconst [c] y))
|
||||
// cond: c & 31 == 0
|
||||
// result: (SARXL x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&31 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (NEGQ <t> (ADDQconst [c] y)))
|
||||
// cond: c & 31 == 0
|
||||
// result: (SARXL x (NEGQ <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGQ {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&31 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (ANDQconst [c] y))
|
||||
// cond: c & 31 == 31
|
||||
// result: (SARXL x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ANDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&31 == 31) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (NEGQ <t> (ANDQconst [c] y)))
|
||||
// cond: c & 31 == 31
|
||||
// result: (SARXL x (NEGQ <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGQ {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ANDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&31 == 31) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (ADDLconst [c] y))
|
||||
// cond: c & 31 == 0
|
||||
// result: (SARXL x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ADDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&31 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (NEGL <t> (ADDLconst [c] y)))
|
||||
// cond: c & 31 == 0
|
||||
// result: (SARXL x (NEGL <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGL {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ADDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&31 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (ANDLconst [c] y))
|
||||
// cond: c & 31 == 31
|
||||
// result: (SARXL x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ANDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&31 == 31) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXL x (NEGL <t> (ANDLconst [c] y)))
|
||||
// cond: c & 31 == 31
|
||||
// result: (SARXL x (NEGL <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGL {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ANDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&31 == 31) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXL)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (SARXQ x (MOVQconst [c]))
|
||||
// result: (SARQconst [int8(c&63)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64SARQconst)
|
||||
v.AuxInt = int8ToAuxInt(int8(c & 63))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (MOVLconst [c]))
|
||||
// result: (SARQconst [int8(c&63)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
v.reset(OpAMD64SARQconst)
|
||||
v.AuxInt = int8ToAuxInt(int8(c & 63))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (ADDQconst [c] y))
|
||||
// cond: c & 63 == 0
|
||||
// result: (SARXQ x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&63 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (NEGQ <t> (ADDQconst [c] y)))
|
||||
// cond: c & 63 == 0
|
||||
// result: (SARXQ x (NEGQ <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGQ {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&63 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (ANDQconst [c] y))
|
||||
// cond: c & 63 == 63
|
||||
// result: (SARXQ x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ANDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&63 == 63) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (NEGQ <t> (ANDQconst [c] y)))
|
||||
// cond: c & 63 == 63
|
||||
// result: (SARXQ x (NEGQ <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGQ {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ANDQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&63 == 63) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (ADDLconst [c] y))
|
||||
// cond: c & 63 == 0
|
||||
// result: (SARXQ x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ADDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&63 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (NEGL <t> (ADDLconst [c] y)))
|
||||
// cond: c & 63 == 0
|
||||
// result: (SARXQ x (NEGL <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGL {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ADDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&63 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (ANDLconst [c] y))
|
||||
// cond: c & 63 == 63
|
||||
// result: (SARXQ x y)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64ANDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1.AuxInt)
|
||||
y := v_1.Args[0]
|
||||
if !(c&63 == 63) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (SARXQ x (NEGL <t> (ANDLconst [c] y)))
|
||||
// cond: c & 63 == 63
|
||||
// result: (SARXQ x (NEGL <t> y))
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64NEGL {
|
||||
break
|
||||
}
|
||||
t := v_1.Type
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpAMD64ANDLconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt32(v_1_0.AuxInt)
|
||||
y := v_1_0.Args[0]
|
||||
if !(c&63 == 63) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SARXQ)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
|
||||
v0.AddArg(y)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (SBBLcarrymask (FlagEQ))
|
||||
|
@ -1029,3 +1029,13 @@ func TestShiftGeneric(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var shiftSink64 int64
|
||||
|
||||
func BenchmarkShiftArithmeticRight(b *testing.B) {
|
||||
x := shiftSink64
|
||||
for i := 0; i < b.N; i++ {
|
||||
x = x >> (i & 63)
|
||||
}
|
||||
shiftSink64 = x
|
||||
}
|
||||
|
@ -46,6 +46,16 @@ func blsr32(x int32) int32 {
|
||||
return x & (x - 1)
|
||||
}
|
||||
|
||||
func sarx64(x, y int64) int64 {
|
||||
// amd64/v3:"SARXQ"
|
||||
return x >> y
|
||||
}
|
||||
|
||||
func sarx32(x, y int32) int32 {
|
||||
// amd64/v3:"SARXL"
|
||||
return x >> y
|
||||
}
|
||||
|
||||
func shlrx64(x []uint64, i int, s uint64) uint64 {
|
||||
// amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
s = x[i] >> i
|
||||
|
Loading…
Reference in New Issue
Block a user