1
0
mirror of https://github.com/golang/go synced 2024-11-24 12:00:14 -07:00

cmd/internal/obj/ppc64: allow VR register arguments to VS registers

Likewise, reorder register numbers such that extended mnemonics which
use FPR arguments can be transparently encoded as a VSR argument for
the move to/from VSR class of instructions. Specifically, ensure the
following holds for all FPx and VRx constants: FPRx & 63 == x, and
VRx & 63 == x + 32.

This simplifies encoding machine instructions, and likewise helps
ppc64 assembly writers to avoid hokey workarounds when switching from
vector to vector-scalar register notation. Notably, many VSX
instructions are limited to vector operands due to encoding
restrictions.

Secondly, this explicitly rejects dubious usages of the m[tf]vsr
family of instructions which had previously been accepted.

 * Reject two GPR arguments for non-MTVSRDD opcodes. These
   have no defined behavior today, and may set RFU bits. e.g
   MTVSRD R1, R2, VS1

 * Reject FPR destinations for MTVSRDD, and only accept with two GPR
   arguments. This copies two GPR values into either half of a VSR. e.g
   MTVSRDD R1, R2, F1
   MTVSRDD R1, F1

Change-Id: If13dd88c3791d1892dbd18ef0e34675a5285fff9
Reviewed-on: https://go-review.googlesource.com/c/go/+/342929
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Paul E. Murphy 2021-03-09 16:55:31 -06:00 committed by Lynn Boger
parent ee91bb8319
commit b3c6de9dcd
4 changed files with 87 additions and 64 deletions

View File

@ -649,6 +649,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
LXVB16X (R3)(R4), VS1 // 7c241ed8 LXVB16X (R3)(R4), VS1 // 7c241ed8
LXVW4X (R3)(R4), VS1 // 7c241e18 LXVW4X (R3)(R4), VS1 // 7c241e18
LXV 16(R3), VS1 // f4230011 LXV 16(R3), VS1 // f4230011
LXV 16(R3), VS33 // f4230019
LXV 16(R3), V1 // f4230019
LXVL R3, R4, VS1 // 7c23221a LXVL R3, R4, VS1 // 7c23221a
LXVLL R3, R4, VS1 // 7c23225a LXVLL R3, R4, VS1 // 7c23225a
LXVX R3, R4, VS1 // 7c232218 LXVX R3, R4, VS1 // 7c232218
@ -668,8 +670,13 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
MTFPRD R3, F0 // 7c030166 MTFPRD R3, F0 // 7c030166
MFVRD V0, R3 // 7c030067 MFVRD V0, R3 // 7c030067
MFVSRLD VS63,R4 // 7fe40267 MFVSRLD VS63,R4 // 7fe40267
MFVSRLD V31,R4 // 7fe40267
MFVSRWZ VS33,R4 // 7c2400e7 MFVSRWZ VS33,R4 // 7c2400e7
MFVSRWZ V1,R4 // 7c2400e7
MTVSRD R3, VS1 // 7c230166 MTVSRD R3, VS1 // 7c230166
MTVSRDD R3, R4, VS1 // 7c232366
MTVSRDD R3, R4, VS33 // 7c232367
MTVSRDD R3, R4, V1 // 7c232367
MTVRD R3, V13 // 7da30167 MTVRD R3, V13 // 7da30167
MTVSRWA R4, VS31 // 7fe401a6 MTVSRWA R4, VS31 // 7fe401a6
MTVSRWS R4, VS32 // 7c040327 MTVSRWS R4, VS32 // 7c040327
@ -678,6 +685,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
XXBRW VS1, VS2 // f04f0f6c XXBRW VS1, VS2 // f04f0f6c
XXBRH VS2, VS3 // f067176c XXBRH VS2, VS3 // f067176c
XXLAND VS1, VS2, VS3 // f0611410 XXLAND VS1, VS2, VS3 // f0611410
XXLAND V1, V2, V3 // f0611417
XXLAND VS33, VS34, VS35 // f0611417
XXLANDC VS1, VS2, VS3 // f0611450 XXLANDC VS1, VS2, VS3 // f0611450
XXLEQV VS0, VS1, VS2 // f0400dd0 XXLEQV VS0, VS1, VS2 // f0400dd0
XXLNAND VS0, VS1, VS2 // f0400d90 XXLNAND VS0, VS1, VS2 // f0400d90
@ -687,11 +696,17 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
XXLORQ VS1, VS2, VS3 // f0611490 XXLORQ VS1, VS2, VS3 // f0611490
XXLXOR VS1, VS2, VS3 // f06114d0 XXLXOR VS1, VS2, VS3 // f06114d0
XXSEL VS1, VS2, VS3, VS4 // f08110f0 XXSEL VS1, VS2, VS3, VS4 // f08110f0
XXSEL VS33, VS34, VS35, VS36 // f08110ff
XXSEL V1, V2, V3, V4 // f08110ff
XXMRGHW VS1, VS2, VS3 // f0611090 XXMRGHW VS1, VS2, VS3 // f0611090
XXMRGLW VS1, VS2, VS3 // f0611190 XXMRGLW VS1, VS2, VS3 // f0611190
XXSPLTW VS1, $1, VS2 // f0410a90 XXSPLTW VS1, $1, VS2 // f0410a90
XXSPLTW VS33, $1, VS34 // f0410a93
XXSPLTW V1, $1, V2 // f0410a93
XXPERM VS1, VS2, VS3 // f06110d0 XXPERM VS1, VS2, VS3 // f06110d0
XXSLDWI VS1, VS2, $1, VS3 // f0611110 XXSLDWI VS1, VS2, $1, VS3 // f0611110
XXSLDWI V1, V2, $1, V3 // f0611117
XXSLDWI VS33, VS34, $1, VS35 // f0611117
XSCVDPSP VS1, VS2 // f0400c24 XSCVDPSP VS1, VS2 // f0400c24
XVCVDPSP VS1, VS2 // f0400e24 XVCVDPSP VS1, VS2 // f0400e24
XSCVSXDDP VS1, VS2 // f0400de0 XSCVSXDDP VS1, VS2 // f0400de0

View File

@ -79,8 +79,10 @@ const (
REG_R30 REG_R30
REG_R31 REG_R31
/* F0=4128 ... F31=4159 */ /* Align FPR and VSR vectors such that when masked with 0x3F they produce
REG_F0 an equivalent VSX register. */
/* F0=4160 ... F31=4191 */
REG_F0 = obj.RBasePPC64 + iota + 32
REG_F1 REG_F1
REG_F2 REG_F2
REG_F3 REG_F3
@ -113,7 +115,7 @@ const (
REG_F30 REG_F30
REG_F31 REG_F31
/* V0=4160 ... V31=4191 */ /* V0=4192 ... V31=4223 */
REG_V0 REG_V0
REG_V1 REG_V1
REG_V2 REG_V2
@ -147,7 +149,7 @@ const (
REG_V30 REG_V30
REG_V31 REG_V31
/* VS0=4192 ... VS63=4255 */ /* VS0=4224 ... VS63=4287 */
REG_VS0 REG_VS0
REG_VS1 REG_VS1
REG_VS2 REG_VS2

View File

@ -428,15 +428,13 @@ var optab = []Optab{
{as: ASTXSIWX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */ {as: ASTXSIWX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */
/* VSX move from VSR */ /* VSX move from VSR */
{as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, /* vsx move from vsr, xx1-form */ {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4},
{as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4}, {as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4},
{as: AMFVSRD, a1: C_VREG, a6: C_REG, type_: 88, size: 4},
/* VSX move to VSR */ /* VSX move to VSR */
{as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 88, size: 4}, /* vsx move to vsr, xx1-form */ {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 104, size: 4},
{as: AMTVSRD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 88, size: 4}, {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 88, size: 4}, {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
{as: AMTVSRD, a1: C_REG, a6: C_VREG, type_: 88, size: 4},
/* VSX logical */ /* VSX logical */
{as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */ {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
@ -1036,13 +1034,14 @@ func (c *ctxt9) oplook(p *obj.Prog) *Optab {
// c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6) // c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6)
ops := oprange[p.As&obj.AMask] ops := oprange[p.As&obj.AMask]
c1 := &xcmp[a1] c1 := &xcmp[a1]
c2 := &xcmp[a2]
c3 := &xcmp[a3] c3 := &xcmp[a3]
c4 := &xcmp[a4] c4 := &xcmp[a4]
c5 := &xcmp[a5] c5 := &xcmp[a5]
c6 := &xcmp[a6] c6 := &xcmp[a6]
for i := range ops { for i := range ops {
op := &ops[i] op := &ops[i]
if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] { if c1[op.a1] && c2[op.a2] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
p.Optab = uint16(cap(optab) - cap(ops) + i + 1) p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
return op return op
} }
@ -1116,6 +1115,12 @@ func cmp(a int, b int) bool {
return r0iszero != 0 /*TypeKind(100016)*/ return r0iszero != 0 /*TypeKind(100016)*/
} }
case C_VSREG:
/* Allow any VR argument as a VSR operand. */
if b == C_VREG {
return true
}
case C_ANY: case C_ANY:
return true return true
} }
@ -1594,7 +1599,6 @@ func buildop(ctxt *obj.Link) {
opset(AMTVRD, r0) opset(AMTVRD, r0)
opset(AMTVSRWA, r0) opset(AMTVSRWA, r0)
opset(AMTVSRWZ, r0) opset(AMTVSRWZ, r0)
opset(AMTVSRDD, r0)
opset(AMTVSRWS, r0) opset(AMTVSRWS, r0)
case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */ case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
@ -1977,6 +1981,7 @@ func buildop(ctxt *obj.Link) {
ACMPEQB, ACMPEQB,
AECIWX, AECIWX,
ACLRLSLWI, ACLRLSLWI,
AMTVSRDD,
obj.ANOP, obj.ANOP,
obj.ATEXT, obj.ATEXT,
obj.AUNDEF, obj.AUNDEF,
@ -2075,50 +2080,32 @@ func AOP_IR(op uint32, d uint32, simm uint32) uint32 {
} }
/* XX1-form 3-register operands, 1 VSR operand */ /* XX1-form 3-register operands, 1 VSR operand */
func AOP_XX1(op uint32, d uint32, a uint32, b uint32) uint32 { func AOP_XX1(op uint32, r uint32, a uint32, b uint32) uint32 {
/* For the XX-form encodings, we need the VSX register number to be exactly */
/* between 0-63, so we can properly set the rightmost bits. */
r := d - REG_VS0
return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5 return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5
} }
/* XX2-form 3-register operands, 2 VSR operands */ /* XX2-form 3-register operands, 2 VSR operands */
func AOP_XX2(op uint32, d uint32, a uint32, b uint32) uint32 { func AOP_XX2(op uint32, xt uint32, a uint32, xb uint32) uint32 {
xt := d - REG_VS0
xb := b - REG_VS0
return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5 return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5
} }
/* XX3-form 3 VSR operands */ /* XX3-form 3 VSR operands */
func AOP_XX3(op uint32, d uint32, a uint32, b uint32) uint32 { func AOP_XX3(op uint32, xt uint32, xa uint32, xb uint32) uint32 {
xt := d - REG_VS0
xa := a - REG_VS0
xb := b - REG_VS0
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
} }
/* XX3-form 3 VSR operands + immediate */ /* XX3-form 3 VSR operands + immediate */
func AOP_XX3I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { func AOP_XX3I(op uint32, xt uint32, xa uint32, xb uint32, c uint32) uint32 {
xt := d - REG_VS0
xa := a - REG_VS0
xb := b - REG_VS0
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
} }
/* XX4-form, 4 VSR operands */ /* XX4-form, 4 VSR operands */
func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { func AOP_XX4(op uint32, xt uint32, xa uint32, xb uint32, xc uint32) uint32 {
xt := d - REG_VS0
xa := a - REG_VS0
xb := b - REG_VS0
xc := c - REG_VS0
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
} }
/* DQ-form, VSR register, register + offset operands */ /* DQ-form, VSR register, register + offset operands */
func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 { func AOP_DQ(op uint32, xt uint32, a uint32, b uint32) uint32 {
/* For the DQ-form encodings, we need the VSX register number to be exactly */
/* between 0-63, so we can properly set the SX bit. */
r := d - REG_VS0
/* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */ /* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */
/* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */ /* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */
/* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */ /* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */
@ -2126,7 +2113,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
/* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */ /* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */
/* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */ /* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */
dq := b >> 4 dq := b >> 4
return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2 return op | (xt&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (xt&32)>>2
} }
/* Z23-form, 3-register operands + CY field */ /* Z23-form, 3-register operands + CY field */
@ -3586,33 +3573,8 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
/* 3-register operand order: (RB)(RA*1), XT */ /* 3-register operand order: (RB)(RA*1), XT */
o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg)) o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg))
case 88: /* VSX instructions, XX1-form */ case 88: /* VSX mfvsr* instructions, XX1-form XS,RA */
/* reg reg none OR reg reg reg */ o1 = AOP_XX1(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
/* 3-register operand order: RA, RB, XT */
/* 2-register operand order: XS, RA or RA, XT */
xt := int32(p.To.Reg)
xs := int32(p.From.Reg)
/* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */
if REG_V0 <= xt && xt <= REG_V31 {
/* Convert V0-V31 to VS32-VS63 */
xt = xt + 64
o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
} else if REG_F0 <= xt && xt <= REG_F31 {
/* Convert F0-F31 to VS0-VS31 */
xt = xt + 64
o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
} else if REG_VS0 <= xt && xt <= REG_VS63 {
o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
} else if REG_V0 <= xs && xs <= REG_V31 {
/* Likewise for XS */
xs = xs + 64
o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
} else if REG_F0 <= xs && xs <= REG_F31 {
xs = xs + 64
o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
} else if REG_VS0 <= xs && xs <= REG_VS63 {
o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
}
case 89: /* VSX instructions, XX2-form */ case 89: /* VSX instructions, XX2-form */
/* reg none reg OR reg imm reg */ /* reg none reg OR reg imm reg */
@ -3743,6 +3705,9 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
mb := uint32(c.regoff(&p.RestArgs[0].Addr)) mb := uint32(c.regoff(&p.RestArgs[0].Addr))
me := uint32(c.regoff(&p.RestArgs[1].Addr)) me := uint32(c.regoff(&p.RestArgs[1].Addr))
o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me) o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */
o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
} }
out[0] = o1 out[0] = o1

View File

@ -107,3 +107,44 @@ func TestPCalign(t *testing.T) {
t.Errorf("Invalid alignment not detected for PCALIGN\n") t.Errorf("Invalid alignment not detected for PCALIGN\n")
} }
} }
// Verify register constants are correctly aligned. Much of the ppc64 assembler assumes masking out significant
// bits will produce a valid register number:
// REG_Rx & 31 == x
// REG_Fx & 31 == x
// REG_Vx & 31 == x
// REG_VSx & 63 == x
// REG_SPRx & 1023 == x
// REG_CRx & 7 == x
//
// VR and FPR disjointly overlap VSR, interpreting as VSR registers should produce the correctly overlapped VSR.
// REG_FPx & 63 == x
// REG_Vx & 63 == x + 32
func TestRegValueAlignment(t *testing.T) {
tstFunc := func(rstart, rend, msk, rout int) {
for i := rstart; i <= rend; i++ {
if i&msk != rout {
t.Errorf("%v is not aligned to 0x%X (expected %d, got %d)\n", rconv(i), msk, rout, rstart&msk)
}
rout++
}
}
var testType = []struct {
rstart int
rend int
msk int
rout int
}{
{REG_VS0, REG_VS63, 63, 0},
{REG_R0, REG_R31, 31, 0},
{REG_F0, REG_F31, 31, 0},
{REG_V0, REG_V31, 31, 0},
{REG_V0, REG_V31, 63, 32},
{REG_F0, REG_F31, 63, 0},
{REG_SPR0, REG_SPR0 + 1023, 1023, 0},
{REG_CR0, REG_CR7, 7, 0},
}
for _, t := range testType {
tstFunc(t.rstart, t.rend, t.msk, t.rout)
}
}