mirror of
https://github.com/golang/go
synced 2024-11-12 00:20:22 -07:00
cmd/asm,cmd/compile,cmd/internal/obj/ppc64: add extswsli support on power9
This adds support for the extswsli instruction which combines extsw followed by a shift. New benchmark demonstrates the improvement: name old time/op new time/op delta ExtShift 1.34µs ± 0% 1.30µs ± 0% -3.15% (p=0.057 n=4+3) Change-Id: I21b410676fdf15d20e0cbbaa75d7c6dcd3bbb7b0 Reviewed-on: https://go-review.googlesource.com/c/go/+/257017 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <carlos.seo@gmail.com> Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
874b3132a8
commit
a424f6e45e
1
src/cmd/asm/internal/asm/testdata/ppc64enc.s
vendored
1
src/cmd/asm/internal/asm/testdata/ppc64enc.s
vendored
@ -266,6 +266,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
|
||||
SRDCC R3, R4 // 7c841c37
|
||||
ROTLW $16, R3, R4 // 5464803e
|
||||
ROTLW R3, R4, R5 // 5c85183e
|
||||
EXTSWSLI $3, R4, R5 // 7c851ef4
|
||||
RLWMI $7, R3, $65535, R6 // 50663c3e
|
||||
RLWMICC $7, R3, $65535, R6 // 50663c3f
|
||||
RLWNM $3, R4, $7, R6 // 54861f7e
|
||||
|
@ -20,6 +20,18 @@ func BenchmarkLoadAdd(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
// Added for ppc64 extswsli on power9
|
||||
func BenchmarkExtShift(b *testing.B) {
|
||||
x := make([]int32, 1024)
|
||||
for i := 0; i < b.N; i++ {
|
||||
var s int64
|
||||
for i := range x {
|
||||
s ^= int64(x[i]+32) * 8
|
||||
}
|
||||
globl = s
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkModify(b *testing.B) {
|
||||
a := make([]int64, 1024)
|
||||
v := globl
|
||||
|
@ -677,7 +677,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
|
||||
case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
|
||||
ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
|
||||
ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.Reg = v.Args[0].Reg()
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
|
@ -1025,6 +1025,8 @@
|
||||
(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x)
|
||||
(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
|
||||
(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
|
||||
// special case for power9
|
||||
(SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x)
|
||||
|
||||
// Lose widening ops fed to stores
|
||||
(MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem)
|
||||
|
@ -223,6 +223,7 @@ func init() {
|
||||
|
||||
{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits
|
||||
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
|
||||
{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
|
||||
|
||||
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
|
||||
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
|
||||
|
@ -1865,6 +1865,7 @@ const (
|
||||
OpPPC64SLWconst
|
||||
OpPPC64ROTLconst
|
||||
OpPPC64ROTLWconst
|
||||
OpPPC64EXTSWSLconst
|
||||
OpPPC64CNTLZD
|
||||
OpPPC64CNTLZW
|
||||
OpPPC64CNTTZD
|
||||
@ -24849,6 +24850,20 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "EXTSWSLconst",
|
||||
auxType: auxInt64,
|
||||
argLen: 1,
|
||||
asm: ppc64.AEXTSWSLI,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CNTLZD",
|
||||
argLen: 1,
|
||||
|
@ -12877,6 +12877,24 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool {
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (SLDconst [c] z:(MOVWreg x))
|
||||
// cond: c < 32 && objabi.GOPPC64 >= 9
|
||||
// result: (EXTSWSLconst [c] x)
|
||||
for {
|
||||
c := auxIntToInt64(v.AuxInt)
|
||||
z := v_0
|
||||
if z.Op != OpPPC64MOVWreg {
|
||||
break
|
||||
}
|
||||
x := z.Args[0]
|
||||
if !(c < 32 && objabi.GOPPC64 >= 9) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64EXTSWSLconst)
|
||||
v.AuxInt = int64ToAuxInt(c)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64_OpPPC64SLW(v *Value) bool {
|
||||
@ -13000,6 +13018,24 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool {
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (SLWconst [c] z:(MOVWreg x))
|
||||
// cond: c < 32 && objabi.GOPPC64 >= 9
|
||||
// result: (EXTSWSLconst [c] x)
|
||||
for {
|
||||
c := auxIntToInt64(v.AuxInt)
|
||||
z := v_0
|
||||
if z.Op != OpPPC64MOVWreg {
|
||||
break
|
||||
}
|
||||
x := z.Args[0]
|
||||
if !(c < 32 && objabi.GOPPC64 >= 9) {
|
||||
break
|
||||
}
|
||||
v.reset(OpPPC64EXTSWSLconst)
|
||||
v.AuxInt = int64ToAuxInt(c)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool {
|
||||
|
@ -733,6 +733,8 @@ const (
|
||||
ASRAD
|
||||
ASRADCC
|
||||
ASRDCC
|
||||
AEXTSWSLI
|
||||
AEXTSWSLICC
|
||||
ASTDCCC
|
||||
ATD
|
||||
|
||||
|
@ -329,6 +329,8 @@ var Anames = []string{
|
||||
"SRAD",
|
||||
"SRADCC",
|
||||
"SRDCC",
|
||||
"EXTSWSLI",
|
||||
"EXTSWSLICC",
|
||||
"STDCCC",
|
||||
"TD",
|
||||
"DWORD",
|
||||
|
@ -160,6 +160,8 @@ var optab = []Optab{
|
||||
{ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
|
||||
{ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0},
|
||||
{ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0},
|
||||
{AEXTSWSLI, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0},
|
||||
{AEXTSWSLI, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0},
|
||||
{ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0},
|
||||
{ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0},
|
||||
{ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
|
||||
@ -1877,6 +1879,9 @@ func buildop(ctxt *obj.Link) {
|
||||
case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
|
||||
opset(ASRAWCC, r0)
|
||||
|
||||
case AEXTSWSLI:
|
||||
opset(AEXTSWSLICC, r0)
|
||||
|
||||
case ASRAD: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
|
||||
opset(ASRADCC, r0)
|
||||
|
||||
@ -2189,49 +2194,54 @@ func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
|
||||
return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
|
||||
}
|
||||
|
||||
func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 {
|
||||
return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1
|
||||
}
|
||||
|
||||
func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 {
|
||||
return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
|
||||
}
|
||||
|
||||
const (
|
||||
/* each rhs is OPVCC(_, _, _, _) */
|
||||
OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0
|
||||
OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0
|
||||
OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0
|
||||
OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0
|
||||
OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0
|
||||
OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0
|
||||
OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0
|
||||
OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0
|
||||
OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0
|
||||
OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0
|
||||
OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0
|
||||
OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0
|
||||
OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0
|
||||
OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0
|
||||
OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0
|
||||
OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0
|
||||
OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0
|
||||
OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0
|
||||
OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0
|
||||
OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0
|
||||
OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0
|
||||
OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0
|
||||
OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0
|
||||
OP_OR = 31<<26 | 444<<1 | 0<<10 | 0
|
||||
OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0
|
||||
OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0
|
||||
OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0
|
||||
OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0
|
||||
OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0
|
||||
OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0
|
||||
OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0
|
||||
OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0
|
||||
OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0
|
||||
OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0
|
||||
OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0
|
||||
OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0
|
||||
OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0
|
||||
OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0
|
||||
OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0
|
||||
OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0
|
||||
OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0
|
||||
OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0
|
||||
OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0
|
||||
OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0
|
||||
OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0
|
||||
OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0
|
||||
OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0
|
||||
OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0
|
||||
OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0
|
||||
OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0
|
||||
OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0
|
||||
OP_OR = 31<<26 | 444<<1 | 0<<10 | 0
|
||||
OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0
|
||||
OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0
|
||||
OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0
|
||||
OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0
|
||||
OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0
|
||||
OP_EXTSWSLI = 31<<26 | 445<<2
|
||||
)
|
||||
|
||||
func oclass(a *obj.Addr) int {
|
||||
@ -2965,14 +2975,21 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
case AROTL:
|
||||
a = int(0)
|
||||
op = OP_RLDICL
|
||||
case AEXTSWSLI:
|
||||
a = int(v)
|
||||
default:
|
||||
c.ctxt.Diag("unexpected op in sldi case\n%v", p)
|
||||
a = 0
|
||||
o1 = 0
|
||||
}
|
||||
|
||||
o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
|
||||
if p.As == ASLDCC || p.As == ASRDCC {
|
||||
if p.As == AEXTSWSLI || p.As == AEXTSWSLICC {
|
||||
o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v))
|
||||
|
||||
} else {
|
||||
o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
|
||||
}
|
||||
if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC {
|
||||
o1 |= 1 // Set the condition code bit
|
||||
}
|
||||
|
||||
@ -4350,6 +4367,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
|
||||
case ASRADCC:
|
||||
return OPVCC(31, 794, 0, 1)
|
||||
|
||||
case AEXTSWSLI:
|
||||
return OPVCC(31, 445, 0, 0)
|
||||
case AEXTSWSLICC:
|
||||
return OPVCC(31, 445, 0, 1)
|
||||
|
||||
case ASRW:
|
||||
return OPVCC(31, 536, 0, 0)
|
||||
case ASRWCC:
|
||||
@ -5013,6 +5035,10 @@ func (c *ctxt9) opirr(a obj.As) uint32 {
|
||||
return OPVCC(31, (413 << 1), 0, 0)
|
||||
case ASRADCC:
|
||||
return OPVCC(31, (413 << 1), 0, 1)
|
||||
case AEXTSWSLI:
|
||||
return OPVCC(31, 445, 0, 0)
|
||||
case AEXTSWSLICC:
|
||||
return OPVCC(31, 445, 0, 1)
|
||||
|
||||
case ASTSW:
|
||||
return OPVCC(31, 725, 0, 0)
|
||||
|
@ -182,7 +182,7 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt
|
||||
return f, g
|
||||
}
|
||||
|
||||
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) {
|
||||
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
|
||||
|
||||
// ppc64le:-"AND","CLRLSLWI"
|
||||
// ppc64:-"AND","CLRLSLWI"
|
||||
@ -202,7 +202,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, u
|
||||
// ppc64le:-"AND","CLRLSLDI"
|
||||
// ppc64:-"AND","CLRLSLDI"
|
||||
i := (v64 & 0xFFFFFFFF) << 5
|
||||
return f, g, h, i
|
||||
// ppc64le/power9:-"SLD","EXTSWSLI"
|
||||
// ppc64/power9:-"SLD","EXTSWSLI"
|
||||
j := int64(x32+32)*8
|
||||
return f, g, h, i, j
|
||||
}
|
||||
|
||||
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
|
||||
|
Loading…
Reference in New Issue
Block a user