diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index b6c0aa5035c..28ceb621cb3 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -649,6 +649,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 LXVB16X (R3)(R4), VS1 // 7c241ed8 LXVW4X (R3)(R4), VS1 // 7c241e18 LXV 16(R3), VS1 // f4230011 + LXV 16(R3), VS33 // f4230019 + LXV 16(R3), V1 // f4230019 LXVL R3, R4, VS1 // 7c23221a LXVLL R3, R4, VS1 // 7c23225a LXVX R3, R4, VS1 // 7c232218 @@ -668,8 +670,13 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MTFPRD R3, F0 // 7c030166 MFVRD V0, R3 // 7c030067 MFVSRLD VS63,R4 // 7fe40267 + MFVSRLD V31,R4 // 7fe40267 MFVSRWZ VS33,R4 // 7c2400e7 + MFVSRWZ V1,R4 // 7c2400e7 MTVSRD R3, VS1 // 7c230166 + MTVSRDD R3, R4, VS1 // 7c232366 + MTVSRDD R3, R4, VS33 // 7c232367 + MTVSRDD R3, R4, V1 // 7c232367 MTVRD R3, V13 // 7da30167 MTVSRWA R4, VS31 // 7fe401a6 MTVSRWS R4, VS32 // 7c040327 @@ -678,6 +685,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 XXBRW VS1, VS2 // f04f0f6c XXBRH VS2, VS3 // f067176c XXLAND VS1, VS2, VS3 // f0611410 + XXLAND V1, V2, V3 // f0611417 + XXLAND VS33, VS34, VS35 // f0611417 XXLANDC VS1, VS2, VS3 // f0611450 XXLEQV VS0, VS1, VS2 // f0400dd0 XXLNAND VS0, VS1, VS2 // f0400d90 @@ -687,11 +696,17 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 XXLORQ VS1, VS2, VS3 // f0611490 XXLXOR VS1, VS2, VS3 // f06114d0 XXSEL VS1, VS2, VS3, VS4 // f08110f0 + XXSEL VS33, VS34, VS35, VS36 // f08110ff + XXSEL V1, V2, V3, V4 // f08110ff XXMRGHW VS1, VS2, VS3 // f0611090 XXMRGLW VS1, VS2, VS3 // f0611190 XXSPLTW VS1, $1, VS2 // f0410a90 + XXSPLTW VS33, $1, VS34 // f0410a93 + XXSPLTW V1, $1, V2 // f0410a93 XXPERM VS1, VS2, VS3 // f06110d0 XXSLDWI VS1, VS2, $1, VS3 // f0611110 + XXSLDWI V1, V2, $1, V3 // f0611117 + XXSLDWI VS33, VS34, $1, VS35 // f0611117 XSCVDPSP VS1, VS2 // f0400c24 XVCVDPSP VS1, VS2 // f0400e24 XSCVSXDDP VS1, VS2 // f0400de0 diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index e57beb3276c..dda24a0b966 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -79,8 +79,10 @@ const ( REG_R30 REG_R31 - /* F0=4128 ... F31=4159 */ - REG_F0 + /* Align FPR and VSR vectors such that when masked with 0x3F they produce + an equivalent VSX register. */ + /* F0=4160 ... F31=4191 */ + REG_F0 = obj.RBasePPC64 + iota + 32 REG_F1 REG_F2 REG_F3 @@ -113,7 +115,7 @@ const ( REG_F30 REG_F31 - /* V0=4160 ... V31=4191 */ + /* V0=4192 ... V31=4223 */ REG_V0 REG_V1 REG_V2 @@ -147,7 +149,7 @@ const ( REG_V30 REG_V31 - /* VS0=4192 ... VS63=4255 */ + /* VS0=4224 ... VS63=4287 */ REG_VS0 REG_VS1 REG_VS2 diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index e642413590d..1d92c4866f1 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -428,15 +428,13 @@ var optab = []Optab{ {as: ASTXSIWX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */ /* VSX move from VSR */ - {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, /* vsx move from vsr, xx1-form */ + {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, {as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4}, - {as: AMFVSRD, a1: C_VREG, a6: C_REG, type_: 88, size: 4}, /* VSX move to VSR */ - {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 88, size: 4}, /* vsx move to vsr, xx1-form */ - {as: AMTVSRD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 88, size: 4}, - {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 88, size: 4}, - {as: AMTVSRD, a1: C_REG, a6: C_VREG, type_: 88, size: 4}, + {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 104, size: 4}, + {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4}, + {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4}, /* VSX logical */ {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */ @@ -1036,13 +1034,14 @@ func (c *ctxt9) oplook(p *obj.Prog) *Optab { // c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6) ops := oprange[p.As&obj.AMask] c1 := &xcmp[a1] + c2 := &xcmp[a2] c3 := &xcmp[a3] c4 := &xcmp[a4] c5 := &xcmp[a5] c6 := &xcmp[a6] for i := range ops { op := &ops[i] - if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] { + if c1[op.a1] && c2[op.a2] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] { p.Optab = uint16(cap(optab) - cap(ops) + i + 1) return op } @@ -1116,6 +1115,12 @@ func cmp(a int, b int) bool { return r0iszero != 0 /*TypeKind(100016)*/ } + case C_VSREG: + /* Allow any VR argument as a VSR operand. */ + if b == C_VREG { + return true + } + case C_ANY: return true } @@ -1594,7 +1599,6 @@ func buildop(ctxt *obj.Link) { opset(AMTVRD, r0) opset(AMTVSRWA, r0) opset(AMTVSRWZ, r0) - opset(AMTVSRDD, r0) opset(AMTVSRWS, r0) case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */ @@ -1977,6 +1981,7 @@ func buildop(ctxt *obj.Link) { ACMPEQB, AECIWX, ACLRLSLWI, + AMTVSRDD, obj.ANOP, obj.ATEXT, obj.AUNDEF, @@ -2075,50 +2080,32 @@ func AOP_IR(op uint32, d uint32, simm uint32) uint32 { } /* XX1-form 3-register operands, 1 VSR operand */ -func AOP_XX1(op uint32, d uint32, a uint32, b uint32) uint32 { - /* For the XX-form encodings, we need the VSX register number to be exactly */ - /* between 0-63, so we can properly set the rightmost bits. */ - r := d - REG_VS0 +func AOP_XX1(op uint32, r uint32, a uint32, b uint32) uint32 { return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5 } /* XX2-form 3-register operands, 2 VSR operands */ -func AOP_XX2(op uint32, d uint32, a uint32, b uint32) uint32 { - xt := d - REG_VS0 - xb := b - REG_VS0 +func AOP_XX2(op uint32, xt uint32, a uint32, xb uint32) uint32 { return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5 } /* XX3-form 3 VSR operands */ -func AOP_XX3(op uint32, d uint32, a uint32, b uint32) uint32 { - xt := d - REG_VS0 - xa := a - REG_VS0 - xb := b - REG_VS0 +func AOP_XX3(op uint32, xt uint32, xa uint32, xb uint32) uint32 { return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 } /* XX3-form 3 VSR operands + immediate */ -func AOP_XX3I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { - xt := d - REG_VS0 - xa := a - REG_VS0 - xb := b - REG_VS0 +func AOP_XX3I(op uint32, xt uint32, xa uint32, xb uint32, c uint32) uint32 { return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 } /* XX4-form, 4 VSR operands */ -func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { - xt := d - REG_VS0 - xa := a - REG_VS0 - xb := b - REG_VS0 - xc := c - REG_VS0 +func AOP_XX4(op uint32, xt uint32, xa uint32, xb uint32, xc uint32) uint32 { return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5 } /* DQ-form, VSR register, register + offset operands */ -func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 { - /* For the DQ-form encodings, we need the VSX register number to be exactly */ - /* between 0-63, so we can properly set the SX bit. */ - r := d - REG_VS0 +func AOP_DQ(op uint32, xt uint32, a uint32, b uint32) uint32 { /* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */ /* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */ /* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */ @@ -2126,7 +2113,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 { /* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */ /* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */ dq := b >> 4 - return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2 + return op | (xt&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (xt&32)>>2 } /* Z23-form, 3-register operands + CY field */ @@ -3586,33 +3573,8 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { /* 3-register operand order: (RB)(RA*1), XT */ o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg)) - case 88: /* VSX instructions, XX1-form */ - /* reg reg none OR reg reg reg */ - /* 3-register operand order: RA, RB, XT */ - /* 2-register operand order: XS, RA or RA, XT */ - xt := int32(p.To.Reg) - xs := int32(p.From.Reg) - /* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */ - if REG_V0 <= xt && xt <= REG_V31 { - /* Convert V0-V31 to VS32-VS63 */ - xt = xt + 64 - o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg)) - } else if REG_F0 <= xt && xt <= REG_F31 { - /* Convert F0-F31 to VS0-VS31 */ - xt = xt + 64 - o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg)) - } else if REG_VS0 <= xt && xt <= REG_VS63 { - o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg)) - } else if REG_V0 <= xs && xs <= REG_V31 { - /* Likewise for XS */ - xs = xs + 64 - o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg)) - } else if REG_F0 <= xs && xs <= REG_F31 { - xs = xs + 64 - o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg)) - } else if REG_VS0 <= xs && xs <= REG_VS63 { - o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg)) - } + case 88: /* VSX mfvsr* instructions, XX1-form XS,RA */ + o1 = AOP_XX1(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg)) case 89: /* VSX instructions, XX2-form */ /* reg none reg OR reg imm reg */ @@ -3743,6 +3705,9 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { mb := uint32(c.regoff(&p.RestArgs[0].Addr)) me := uint32(c.regoff(&p.RestArgs[1].Addr)) o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me) + + case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */ + o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) } out[0] = o1 diff --git a/src/cmd/internal/obj/ppc64/asm_test.go b/src/cmd/internal/obj/ppc64/asm_test.go index 70dabc20175..b851d3c86b9 100644 --- a/src/cmd/internal/obj/ppc64/asm_test.go +++ b/src/cmd/internal/obj/ppc64/asm_test.go @@ -107,3 +107,44 @@ func TestPCalign(t *testing.T) { t.Errorf("Invalid alignment not detected for PCALIGN\n") } } + +// Verify register constants are correctly aligned. Much of the ppc64 assembler assumes masking out significant +// bits will produce a valid register number: +// REG_Rx & 31 == x +// REG_Fx & 31 == x +// REG_Vx & 31 == x +// REG_VSx & 63 == x +// REG_SPRx & 1023 == x +// REG_CRx & 7 == x +// +// VR and FPR disjointly overlap VSR, interpreting as VSR registers should produce the correctly overlapped VSR. +// REG_FPx & 63 == x +// REG_Vx & 63 == x + 32 +func TestRegValueAlignment(t *testing.T) { + tstFunc := func(rstart, rend, msk, rout int) { + for i := rstart; i <= rend; i++ { + if i&msk != rout { + t.Errorf("%v is not aligned to 0x%X (expected %d, got %d)\n", rconv(i), msk, rout, rstart&msk) + } + rout++ + } + } + var testType = []struct { + rstart int + rend int + msk int + rout int + }{ + {REG_VS0, REG_VS63, 63, 0}, + {REG_R0, REG_R31, 31, 0}, + {REG_F0, REG_F31, 31, 0}, + {REG_V0, REG_V31, 31, 0}, + {REG_V0, REG_V31, 63, 32}, + {REG_F0, REG_F31, 63, 0}, + {REG_SPR0, REG_SPR0 + 1023, 1023, 0}, + {REG_CR0, REG_CR7, 7, 0}, + } + for _, t := range testType { + tstFunc(t.rstart, t.rend, t.msk, t.rout) + } +}