1
0
mirror of https://github.com/golang/go synced 2024-11-11 19:21:37 -07:00

cmd/compile: add SARX instruction for GOAMD64>=3

name                    old time/op  new time/op  delta
ShiftArithmeticRight-8  0.68ns ± 5%  0.30ns ± 6%  -56.14%  (p=0.000 n=10+10)

Change-Id: I052a0d7b9e6526d526276444e588b0cc288beff4
Reviewed-on: https://go-review.googlesource.com/c/go/+/399055
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Wayne Zuo 2022-04-08 16:44:13 +08:00 committed by Gopher Robot
parent 2b31abc528
commit d6320f1a58
8 changed files with 501 additions and 17 deletions

View File

@ -282,6 +282,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = v.Reg()
p.SetFrom3Reg(v.Args[1].Reg())
case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ:
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
p.SetFrom3Reg(v.Args[0].Reg())
case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload:
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())

View File

@ -239,6 +239,7 @@ var featureToOpcodes = map[string][]string{
// native objdump doesn't include [QL] on linux.
"popcnt": {"popcntq", "popcntl", "popcnt"},
"bmi1": {"andnq", "andnl", "andn", "blsiq", "blsil", "blsi", "blsmskq", "blsmskl", "blsmsk", "blsrq", "blsrl", "blsr", "tzcntq", "tzcntl", "tzcnt"},
"bmi2": {"sarxq", "sarxl", "sarx", "shlxq", "shlxl", "shlx", "shrxq", "shrxl", "shrx"},
"sse41": {"roundsd"},
"fma": {"vfmadd231sd"},
"movbe": {"movbeqq", "movbeq", "movbell", "movbel", "movbe"},

View File

@ -206,6 +206,9 @@
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
// Prefer SARX instruction because it has less register restriction on the shift input.
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
// Lowering integer comparisons
(Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
(Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y))
@ -803,28 +806,29 @@
(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
(SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
(SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
(SARXQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
// Constant rotate instructions
((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])

View File

@ -953,6 +953,9 @@ func init() {
{name: "MOVBEQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
// CPUID feature: BMI2.
{name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
{name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
{name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32
{name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64
{name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32

View File

@ -1062,6 +1062,8 @@ const (
OpAMD64MOVBELstoreidx8
OpAMD64MOVBEQstoreidx1
OpAMD64MOVBEQstoreidx8
OpAMD64SARXQ
OpAMD64SARXL
OpAMD64SHLXLload
OpAMD64SHLXQload
OpAMD64SHRXLload
@ -14117,6 +14119,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "SARXQ",
argLen: 2,
asm: x86.ASARXQ,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "SARXL",
argLen: 2,
asm: x86.ASARXL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "SHLXLload",
auxType: auxSymOff,

View File

@ -382,6 +382,10 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64SARW(v)
case OpAMD64SARWconst:
return rewriteValueAMD64_OpAMD64SARWconst(v)
case OpAMD64SARXL:
return rewriteValueAMD64_OpAMD64SARXL(v)
case OpAMD64SARXQ:
return rewriteValueAMD64_OpAMD64SARXQ(v)
case OpAMD64SBBLcarrymask:
return rewriteValueAMD64_OpAMD64SBBLcarrymask(v)
case OpAMD64SBBQ:
@ -19844,6 +19848,19 @@ func rewriteValueAMD64_OpAMD64SARL(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SARL x y)
// cond: buildcfg.GOAMD64 >= 3
// result: (SARXL x y)
for {
x := v_0
y := v_1
if !(buildcfg.GOAMD64 >= 3) {
break
}
v.reset(OpAMD64SARXL)
v.AddArg2(x, y)
return true
}
// match: (SARL x (MOVQconst [c]))
// result: (SARLconst [int8(c&31)] x)
for {
@ -20066,6 +20083,19 @@ func rewriteValueAMD64_OpAMD64SARQ(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SARQ x y)
// cond: buildcfg.GOAMD64 >= 3
// result: (SARXQ x y)
for {
x := v_0
y := v_1
if !(buildcfg.GOAMD64 >= 3) {
break
}
v.reset(OpAMD64SARXQ)
v.AddArg2(x, y)
return true
}
// match: (SARQ x (MOVQconst [c]))
// result: (SARQconst [int8(c&63)] x)
for {
@ -20341,6 +20371,398 @@ func rewriteValueAMD64_OpAMD64SARWconst(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64SARXL(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SARXL x (MOVQconst [c]))
// result: (SARLconst [int8(c&31)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64SARLconst)
v.AuxInt = int8ToAuxInt(int8(c & 31))
v.AddArg(x)
return true
}
// match: (SARXL x (MOVLconst [c]))
// result: (SARLconst [int8(c&31)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVLconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
v.reset(OpAMD64SARLconst)
v.AuxInt = int8ToAuxInt(int8(c & 31))
v.AddArg(x)
return true
}
// match: (SARXL x (ADDQconst [c] y))
// cond: c & 31 == 0
// result: (SARXL x y)
for {
x := v_0
if v_1.Op != OpAMD64ADDQconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&31 == 0) {
break
}
v.reset(OpAMD64SARXL)
v.AddArg2(x, y)
return true
}
// match: (SARXL x (NEGQ <t> (ADDQconst [c] y)))
// cond: c & 31 == 0
// result: (SARXL x (NEGQ <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGQ {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ADDQconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&31 == 0) {
break
}
v.reset(OpAMD64SARXL)
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
// match: (SARXL x (ANDQconst [c] y))
// cond: c & 31 == 31
// result: (SARXL x y)
for {
x := v_0
if v_1.Op != OpAMD64ANDQconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&31 == 31) {
break
}
v.reset(OpAMD64SARXL)
v.AddArg2(x, y)
return true
}
// match: (SARXL x (NEGQ <t> (ANDQconst [c] y)))
// cond: c & 31 == 31
// result: (SARXL x (NEGQ <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGQ {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ANDQconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&31 == 31) {
break
}
v.reset(OpAMD64SARXL)
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
// match: (SARXL x (ADDLconst [c] y))
// cond: c & 31 == 0
// result: (SARXL x y)
for {
x := v_0
if v_1.Op != OpAMD64ADDLconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&31 == 0) {
break
}
v.reset(OpAMD64SARXL)
v.AddArg2(x, y)
return true
}
// match: (SARXL x (NEGL <t> (ADDLconst [c] y)))
// cond: c & 31 == 0
// result: (SARXL x (NEGL <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGL {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ADDLconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&31 == 0) {
break
}
v.reset(OpAMD64SARXL)
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
// match: (SARXL x (ANDLconst [c] y))
// cond: c & 31 == 31
// result: (SARXL x y)
for {
x := v_0
if v_1.Op != OpAMD64ANDLconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&31 == 31) {
break
}
v.reset(OpAMD64SARXL)
v.AddArg2(x, y)
return true
}
// match: (SARXL x (NEGL <t> (ANDLconst [c] y)))
// cond: c & 31 == 31
// result: (SARXL x (NEGL <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGL {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ANDLconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&31 == 31) {
break
}
v.reset(OpAMD64SARXL)
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SARXQ x (MOVQconst [c]))
// result: (SARQconst [int8(c&63)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64SARQconst)
v.AuxInt = int8ToAuxInt(int8(c & 63))
v.AddArg(x)
return true
}
// match: (SARXQ x (MOVLconst [c]))
// result: (SARQconst [int8(c&63)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVLconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
v.reset(OpAMD64SARQconst)
v.AuxInt = int8ToAuxInt(int8(c & 63))
v.AddArg(x)
return true
}
// match: (SARXQ x (ADDQconst [c] y))
// cond: c & 63 == 0
// result: (SARXQ x y)
for {
x := v_0
if v_1.Op != OpAMD64ADDQconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&63 == 0) {
break
}
v.reset(OpAMD64SARXQ)
v.AddArg2(x, y)
return true
}
// match: (SARXQ x (NEGQ <t> (ADDQconst [c] y)))
// cond: c & 63 == 0
// result: (SARXQ x (NEGQ <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGQ {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ADDQconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&63 == 0) {
break
}
v.reset(OpAMD64SARXQ)
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
// match: (SARXQ x (ANDQconst [c] y))
// cond: c & 63 == 63
// result: (SARXQ x y)
for {
x := v_0
if v_1.Op != OpAMD64ANDQconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&63 == 63) {
break
}
v.reset(OpAMD64SARXQ)
v.AddArg2(x, y)
return true
}
// match: (SARXQ x (NEGQ <t> (ANDQconst [c] y)))
// cond: c & 63 == 63
// result: (SARXQ x (NEGQ <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGQ {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ANDQconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&63 == 63) {
break
}
v.reset(OpAMD64SARXQ)
v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
// match: (SARXQ x (ADDLconst [c] y))
// cond: c & 63 == 0
// result: (SARXQ x y)
for {
x := v_0
if v_1.Op != OpAMD64ADDLconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&63 == 0) {
break
}
v.reset(OpAMD64SARXQ)
v.AddArg2(x, y)
return true
}
// match: (SARXQ x (NEGL <t> (ADDLconst [c] y)))
// cond: c & 63 == 0
// result: (SARXQ x (NEGL <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGL {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ADDLconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&63 == 0) {
break
}
v.reset(OpAMD64SARXQ)
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
// match: (SARXQ x (ANDLconst [c] y))
// cond: c & 63 == 63
// result: (SARXQ x y)
for {
x := v_0
if v_1.Op != OpAMD64ANDLconst {
break
}
c := auxIntToInt32(v_1.AuxInt)
y := v_1.Args[0]
if !(c&63 == 63) {
break
}
v.reset(OpAMD64SARXQ)
v.AddArg2(x, y)
return true
}
// match: (SARXQ x (NEGL <t> (ANDLconst [c] y)))
// cond: c & 63 == 63
// result: (SARXQ x (NEGL <t> y))
for {
x := v_0
if v_1.Op != OpAMD64NEGL {
break
}
t := v_1.Type
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpAMD64ANDLconst {
break
}
c := auxIntToInt32(v_1_0.AuxInt)
y := v_1_0.Args[0]
if !(c&63 == 63) {
break
}
v.reset(OpAMD64SARXQ)
v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
v0.AddArg(y)
v.AddArg2(x, v0)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool {
v_0 := v.Args[0]
// match: (SBBLcarrymask (FlagEQ))

View File

@ -1029,3 +1029,13 @@ func TestShiftGeneric(t *testing.T) {
}
}
}
var shiftSink64 int64
func BenchmarkShiftArithmeticRight(b *testing.B) {
x := shiftSink64
for i := 0; i < b.N; i++ {
x = x >> (i & 63)
}
shiftSink64 = x
}

View File

@ -46,6 +46,16 @@ func blsr32(x int32) int32 {
return x & (x - 1)
}
func sarx64(x, y int64) int64 {
// amd64/v3:"SARXQ"
return x >> y
}
func sarx32(x, y int32) int32 {
// amd64/v3:"SARXL"
return x >> y
}
func shlrx64(x []uint64, i int, s uint64) uint64 {
// amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
s = x[i] >> i