mirror of
https://github.com/golang/go
synced 2024-11-25 20:27:57 -07:00
cmd/compile: use shlx&shrx instruction for GOAMD64>=v3
The SHRX/SHLX instruction can take any general register as the shift count operand, and can read source from memory. This CL introduces some operators to combine load and shift to one instruction. For #47120 Change-Id: I13b48f53c7d30067a72eb2c8382242045dead36a Reviewed-on: https://go-review.googlesource.com/c/go/+/385174 Reviewed-by: Keith Randall <khr@golang.org> Trust: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
6f1dce0fcb
commit
7fbabe8d57
@ -280,6 +280,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.To.Reg = v.Reg()
|
||||
p.SetFrom3Reg(v.Args[1].Reg())
|
||||
|
||||
case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
|
||||
ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload:
|
||||
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
|
||||
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
|
||||
ssagen.AddAux(&m, v)
|
||||
p.SetFrom3(m)
|
||||
|
||||
case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
|
||||
ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
|
||||
ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
|
||||
ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8:
|
||||
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
|
||||
m := obj.Addr{Type: obj.TYPE_MEM}
|
||||
memIdx(&m, v)
|
||||
ssagen.AddAux(&m, v)
|
||||
p.SetFrom3(m)
|
||||
|
||||
case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
|
||||
// Arg[0] (the dividend) is in AX.
|
||||
// Arg[1] (the divisor) can be in any other register.
|
||||
|
@ -340,6 +340,22 @@ var combine = map[[2]Op]Op{
|
||||
[2]Op{OpAMD64DIVSDload, OpAMD64LEAQ1}: OpAMD64DIVSDloadidx1,
|
||||
[2]Op{OpAMD64DIVSDload, OpAMD64LEAQ8}: OpAMD64DIVSDloadidx8,
|
||||
|
||||
[2]Op{OpAMD64SHLXLload, OpAMD64ADDQ}: OpAMD64SHLXLloadidx1,
|
||||
[2]Op{OpAMD64SHLXQload, OpAMD64ADDQ}: OpAMD64SHLXQloadidx1,
|
||||
[2]Op{OpAMD64SHRXLload, OpAMD64ADDQ}: OpAMD64SHRXLloadidx1,
|
||||
[2]Op{OpAMD64SHRXQload, OpAMD64ADDQ}: OpAMD64SHRXQloadidx1,
|
||||
|
||||
[2]Op{OpAMD64SHLXLload, OpAMD64LEAQ1}: OpAMD64SHLXLloadidx1,
|
||||
[2]Op{OpAMD64SHLXLload, OpAMD64LEAQ4}: OpAMD64SHLXLloadidx4,
|
||||
[2]Op{OpAMD64SHLXLload, OpAMD64LEAQ8}: OpAMD64SHLXLloadidx8,
|
||||
[2]Op{OpAMD64SHLXQload, OpAMD64LEAQ1}: OpAMD64SHLXQloadidx1,
|
||||
[2]Op{OpAMD64SHLXQload, OpAMD64LEAQ8}: OpAMD64SHLXQloadidx8,
|
||||
[2]Op{OpAMD64SHRXLload, OpAMD64LEAQ1}: OpAMD64SHRXLloadidx1,
|
||||
[2]Op{OpAMD64SHRXLload, OpAMD64LEAQ4}: OpAMD64SHRXLloadidx4,
|
||||
[2]Op{OpAMD64SHRXLload, OpAMD64LEAQ8}: OpAMD64SHRXLloadidx8,
|
||||
[2]Op{OpAMD64SHRXQload, OpAMD64LEAQ1}: OpAMD64SHRXQloadidx1,
|
||||
[2]Op{OpAMD64SHRXQload, OpAMD64LEAQ8}: OpAMD64SHRXQloadidx8,
|
||||
|
||||
// 386
|
||||
[2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1,
|
||||
[2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1,
|
||||
|
@ -2251,3 +2251,6 @@
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
|
||||
|
||||
(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
|
||||
|
@ -141,11 +141,13 @@ func init() {
|
||||
readflags = regInfo{inputs: nil, outputs: gponly}
|
||||
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
|
||||
|
||||
gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly}
|
||||
gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly}
|
||||
gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly}
|
||||
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly}
|
||||
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
|
||||
gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly}
|
||||
gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly}
|
||||
gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly}
|
||||
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly}
|
||||
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
|
||||
gp21shxload = regInfo{inputs: []regMask{gpspsbg, gp, 0}, outputs: gponly}
|
||||
gp21shxloadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gp, 0}, outputs: gponly}
|
||||
|
||||
gpstore = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}}
|
||||
gpstoreconst = regInfo{inputs: []regMask{gpspsbg, 0}}
|
||||
@ -935,6 +937,23 @@ func init() {
|
||||
{name: "MOVBELstore", argLength: 3, reg: gpstore, asm: "MOVBEL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
||||
{name: "MOVBEQload", argLength: 2, reg: gpload, asm: "MOVBEQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 8 bytes from arg0+auxint+aux. arg1=mem
|
||||
{name: "MOVBEQstore", argLength: 3, reg: gpstore, asm: "MOVBEQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
||||
|
||||
// CPUID feature: BMI2.
|
||||
{name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32
|
||||
{name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64
|
||||
{name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
|
||||
{name: "SHRXQload", argLength: 3, reg: gp21shxload, asm: "SHRXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64
|
||||
|
||||
{name: "SHLXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+1*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
|
||||
{name: "SHLXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+4*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
|
||||
{name: "SHLXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+8*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
|
||||
{name: "SHLXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHLXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+1*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 64
|
||||
{name: "SHLXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHLXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+8*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 64
|
||||
{name: "SHRXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHRXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
|
||||
{name: "SHRXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SHRXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+4*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
|
||||
{name: "SHRXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHRXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
|
||||
{name: "SHRXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHRXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64
|
||||
{name: "SHRXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHRXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64
|
||||
}
|
||||
|
||||
var AMD64blocks = []blockData{
|
||||
|
@ -1050,6 +1050,20 @@ const (
|
||||
OpAMD64MOVBELstore
|
||||
OpAMD64MOVBEQload
|
||||
OpAMD64MOVBEQstore
|
||||
OpAMD64SHLXLload
|
||||
OpAMD64SHLXQload
|
||||
OpAMD64SHRXLload
|
||||
OpAMD64SHRXQload
|
||||
OpAMD64SHLXLloadidx1
|
||||
OpAMD64SHLXLloadidx4
|
||||
OpAMD64SHLXLloadidx8
|
||||
OpAMD64SHLXQloadidx1
|
||||
OpAMD64SHLXQloadidx8
|
||||
OpAMD64SHRXLloadidx1
|
||||
OpAMD64SHRXLloadidx4
|
||||
OpAMD64SHRXLloadidx8
|
||||
OpAMD64SHRXQloadidx1
|
||||
OpAMD64SHRXQloadidx8
|
||||
|
||||
OpARMADD
|
||||
OpARMADDconst
|
||||
@ -13896,6 +13910,264 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXLload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXQload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXLload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXQload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXLloadidx1",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXL,
|
||||
scale: 1,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXLloadidx4",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXL,
|
||||
scale: 4,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXLloadidx8",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXL,
|
||||
scale: 8,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXQloadidx1",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXQ,
|
||||
scale: 1,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHLXQloadidx8",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHLXQ,
|
||||
scale: 8,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXLloadidx1",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXL,
|
||||
scale: 1,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXLloadidx4",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXL,
|
||||
scale: 4,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXLloadidx8",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXL,
|
||||
scale: 8,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXQloadidx1",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXQ,
|
||||
scale: 1,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SHRXQloadidx8",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.ASHRXQ,
|
||||
scale: 8,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "ADD",
|
||||
|
@ -24641,6 +24641,28 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SHLL l:(MOVLload [off] {sym} ptr mem) x)
|
||||
// cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
|
||||
// result: (SHLXLload [off] {sym} ptr x mem)
|
||||
for {
|
||||
l := v_0
|
||||
if l.Op != OpAMD64MOVLload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
x := v_1
|
||||
if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLXLload)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, x, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
|
||||
@ -24875,6 +24897,28 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SHLQ l:(MOVQload [off] {sym} ptr mem) x)
|
||||
// cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
|
||||
// result: (SHLXQload [off] {sym} ptr x mem)
|
||||
for {
|
||||
l := v_0
|
||||
if l.Op != OpAMD64MOVQload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
x := v_1
|
||||
if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLXQload)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, x, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
|
||||
@ -25204,6 +25248,28 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SHRL l:(MOVLload [off] {sym} ptr mem) x)
|
||||
// cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
|
||||
// result: (SHRXLload [off] {sym} ptr x mem)
|
||||
for {
|
||||
l := v_0
|
||||
if l.Op != OpAMD64MOVLload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
x := v_1
|
||||
if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHRXLload)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, x, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
|
||||
@ -25426,6 +25492,28 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (SHRQ l:(MOVQload [off] {sym} ptr mem) x)
|
||||
// cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)
|
||||
// result: (SHRXQload [off] {sym} ptr x mem)
|
||||
for {
|
||||
l := v_0
|
||||
if l.Op != OpAMD64MOVQload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
x := v_1
|
||||
if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHRXQload)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, x, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool {
|
||||
|
@ -45,3 +45,19 @@ func blsr32(x int32) int32 {
|
||||
// amd64/v3:"BLSRL"
|
||||
return x & (x - 1)
|
||||
}
|
||||
|
||||
func shlrx64(x []uint64, i int, s uint64) uint64 {
|
||||
// amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
s = x[i] >> i
|
||||
// amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
|
||||
s = x[i+1] << s
|
||||
return s
|
||||
}
|
||||
|
||||
func shlrx32(x []uint32, i int, s uint32) uint32 {
|
||||
// amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
s = x[i] >> i
|
||||
// amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
|
||||
s = x[i+1] << s
|
||||
return s
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user