diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s
index d1ebaa2962..8498f5804a 100644
--- a/src/cmd/asm/internal/asm/testdata/ppc64.s
+++ b/src/cmd/asm/internal/asm/testdata/ppc64.s
@@ -540,6 +540,14 @@ label1:
 //	}
 //	FCMPU	F1, F2, CR0
 
+//	FTDIV FRA, FRB, BF produces
+//	ftdiv BF, FRA, FRB
+	FTDIV F1,F2,$7
+
+//	FTSQRT	FRB, BF produces
+//	ftsqrt	BF, FRB
+	FTSQRT	F2,$7
+
 //
 // CMP
 //
@@ -567,6 +575,10 @@ label1:
 //	}
 	CMP	R1, $4, CR0 // CMP R1, CR0, $4
 
+//	CMPB  RS,RB,RA produces
+//	cmpb  RA,RS,RB
+	CMPB  R2,R2,R1
+
 //
 // rotate and mask
 //
@@ -673,6 +685,13 @@ label1:
 	DCBF	(R1)
 	DCBF	(R1+R2) // DCBF	(R1)(R2*1)
 
+//	Population count, X-form
+//	<MNEMONIC> RS,RA produces
+//	<mnemonic> RA,RS
+	POPCNTD	R1,R2
+	POPCNTW	R1,R2
+	POPCNTB R1,R2
+
 //	VMX instructions
 
 //	Described as:
@@ -703,14 +722,14 @@ label1:
 //	Vector AND, VX-form
 //	<MNEMONIC> VRA,VRB,VRT produces
 //	<mnemonic> VRT,VRA,VRB
-	VANDL	V10, V9, V8
+	VAND	V10, V9, V8
 	VANDC	V15, V14, V13
 	VNAND	V19, V18, V17
 
 //	Vector OR, VX-form
 //	<MNEMONIC> VRA,VRB,VRT produces
 //	<mnemonic> VRT,VRA,VRB
-	VORL	V26, V25, V24
+	VOR	V26, V25, V24
 	VORC	V23, V22, V21
 	VNOR	V20, V19, V18
 	VXOR	V17, V16, V15
@@ -739,6 +758,14 @@ label1:
 	VADDEUQM V4, V3, V2, V1
 	VADDECUQ V4, V3, V2, V1
 
+//	Vector polynomial multiply-sum, VX-form
+//	<MNEMONIC>  VRA,VRB,VRT produces
+//	<mnemonic>  VRT,VRA,VRB
+	VPMSUMB	V2, V3, V1
+	VPMSUMH	V2, V3, V1
+	VPMSUMW	V2, V3, V1
+	VPMSUMD	V2, V3, V1
+
 //	Vector SUB, VX-form
 //	<MNEMONIC> VRA,VRB,VRT produces
 //	<mnemonic> VRT,VRA,VRB
diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go
index 60781310d0..15e143d12c 100644
--- a/src/cmd/internal/obj/ppc64/a.out.go
+++ b/src/cmd/internal/obj/ppc64/a.out.go
@@ -626,6 +626,9 @@ const (
 	ACNTLZDCC
 	ACMPW /* CMP with L=0 */
 	ACMPWU
+	ACMPB
+	AFTDIV
+	AFTSQRT
 	ADIVD
 	ADIVDCC
 	ADIVDE
@@ -704,6 +707,9 @@ const (
 
 	/* more 64-bit operations */
 	AHRFID
+	APOPCNTD
+	APOPCNTW
+	APOPCNTB
 
 	/* Vector */
 	ALV
@@ -721,11 +727,9 @@ const (
 	ASTVX
 	ASTVXL
 	AVAND
-	AVANDL
 	AVANDC
 	AVNAND
 	AVOR
-	AVORL
 	AVORC
 	AVNOR
 	AVXOR
@@ -770,6 +774,11 @@ const (
 	AVSUBE
 	AVSUBEUQM
 	AVSUBECUQ
+	AVPMSUM
+	AVPMSUMB
+	AVPMSUMH
+	AVPMSUMW
+	AVPMSUMD
 	AVR
 	AVRLB
 	AVRLH
@@ -871,9 +880,13 @@ const (
 	ASTXSIWX
 	AMFVSR
 	AMFVSRD
+	AMFFPRD
+	AMFVRD
 	AMFVSRWZ
 	AMTVSR
 	AMTVSRD
+	AMTFPRD
+	AMTVRD
 	AMTVSRWA
 	AMTVSRWZ
 	AXXLAND
diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go
index 19ddd3c675..01f4a7d41b 100644
--- a/src/cmd/internal/obj/ppc64/anames.go
+++ b/src/cmd/internal/obj/ppc64/anames.go
@@ -248,6 +248,9 @@ var Anames = []string{
 	"CNTLZDCC",
 	"CMPW",
 	"CMPWU",
+	"CMPB",
+	"FTDIV",
+	"FTSQRT",
 	"DIVD",
 	"DIVDCC",
 	"DIVDE",
@@ -321,6 +324,9 @@ var Anames = []string{
 	"REMDUV",
 	"REMDUVCC",
 	"HRFID",
+	"POPCNTD",
+	"POPCNTW",
+	"POPCNTB",
 	"LV",
 	"LVEBX",
 	"LVEHX",
@@ -336,11 +342,9 @@ var Anames = []string{
 	"STVX",
 	"STVXL",
 	"VAND",
-	"VANDL",
 	"VANDC",
 	"VNAND",
 	"VOR",
-	"VORL",
 	"VORC",
 	"VNOR",
 	"VXOR",
@@ -385,6 +389,11 @@ var Anames = []string{
 	"VSUBE",
 	"VSUBEUQM",
 	"VSUBECUQ",
+	"VPMSUM",
+	"VPMSUMB",
+	"VPMSUMH",
+	"VPMSUMW",
+	"VPMSUMD",
 	"VR",
 	"VRLB",
 	"VRLH",
@@ -484,9 +493,13 @@ var Anames = []string{
 	"STXSIWX",
 	"MFVSR",
 	"MFVSRD",
+	"MFFPRD",
+	"MFVRD",
 	"MFVSRWZ",
 	"MTVSR",
 	"MTVSRD",
+	"MTFPRD",
+	"MTVRD",
 	"MTVSRWA",
 	"MTVSRWZ",
 	"XXLAND",
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 4f8655449d..e88cd12126 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -346,6 +346,12 @@ var optab = []Optab{
 	{AMOVD, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0},  /* mtmsrd */
 	{AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */
 
+	/* Other ISA 2.05+ instructions */
+	{APOPCNTD, C_REG, C_NONE, C_NONE, C_REG, 93, 4, 0},  /* population count, x-form */
+	{ACMPB, C_REG, C_REG, C_NONE, C_REG, 92, 4, 0},      /* compare byte, x-form */
+	{AFTDIV, C_FREG, C_FREG, C_NONE, C_SCON, 92, 4, 0},  /* floating test for sw divide, x-form */
+	{AFTSQRT, C_FREG, C_NONE, C_NONE, C_SCON, 93, 4, 0}, /* floating test for sw square root, x-form */
+
 	/* Vector instructions */
 
 	/* Vector load */
@@ -372,6 +378,9 @@ var optab = []Optab{
 	{AVSUBSS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract signed saturate, vx-form */
 	{AVSUBE, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0},  /* vector subtract extended, va-form */
 
+	/* Vector multiply */
+	{AVPMSUM, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector polynomial multiply & sum, vx-form */
+
 	/* Vector rotate */
 	{AVR, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector rotate, vx-form */
 
@@ -428,9 +437,13 @@ var optab = []Optab{
 
 	/* VSX move from VSR */
 	{AMFVSR, C_VSREG, C_NONE, C_NONE, C_REG, 88, 4, 0}, /* vsx move from vsr, xx1-form */
+	{AMFVSR, C_FREG, C_NONE, C_NONE, C_REG, 88, 4, 0},
+	{AMFVSR, C_VREG, C_NONE, C_NONE, C_REG, 88, 4, 0},
 
 	/* VSX move to VSR */
 	{AMTVSR, C_REG, C_NONE, C_NONE, C_VSREG, 88, 4, 0}, /* vsx move to vsr, xx1-form */
+	{AMTVSR, C_REG, C_NONE, C_NONE, C_FREG, 88, 4, 0},
+	{AMTVSR, C_REG, C_NONE, C_NONE, C_VREG, 88, 4, 0},
 
 	/* VSX logical */
 	{AXXLAND, C_VSREG, C_VSREG, C_NONE, C_VSREG, 90, 4, 0}, /* vsx and, xx3-form */
@@ -1161,6 +1174,10 @@ func buildop(ctxt *obj.Link) {
 			opset(ADIVDUVCC, r0)
 			opset(ADIVDUCC, r0)
 
+		case APOPCNTD:
+			opset(APOPCNTW, r0)
+			opset(APOPCNTB, r0)
+
 		case AMOVBZ: /* lbz, stz, rlwm(r/r), lhz, lha, stz, and x variants */
 			opset(AMOVH, r0)
 
@@ -1192,12 +1209,12 @@ func buildop(ctxt *obj.Link) {
 			opset(ASTVXL, r0)
 
 		case AVAND: /* vand, vandc, vnand */
-			opset(AVANDL, r0)
+			opset(AVAND, r0)
 			opset(AVANDC, r0)
 			opset(AVNAND, r0)
 
 		case AVOR: /* vor, vorc, vxor, vnor, veqv */
-			opset(AVORL, r0)
+			opset(AVOR, r0)
 			opset(AVORC, r0)
 			opset(AVXOR, r0)
 			opset(AVNOR, r0)
@@ -1253,6 +1270,12 @@ func buildop(ctxt *obj.Link) {
 			opset(AVSUBEUQM, r0)
 			opset(AVSUBECUQ, r0)
 
+		case AVPMSUM: /* vpmsumb, vpmsumh, vpmsumw, vpmsumd */
+			opset(AVPMSUMB, r0)
+			opset(AVPMSUMH, r0)
+			opset(AVPMSUMW, r0)
+			opset(AVPMSUMD, r0)
+
 		case AVR: /* vrlb, vrlh, vrlw, vrld */
 			opset(AVRLB, r0)
 			opset(AVRLH, r0)
@@ -1375,12 +1398,16 @@ func buildop(ctxt *obj.Link) {
 		case ASTXSI: /* stxsiwx */
 			opset(ASTXSIWX, r0)
 
-		case AMFVSR: /* mfvsrd, mfvsrwz */
+		case AMFVSR: /* mfvsrd, mfvsrwz (and extended mnemonics) */
 			opset(AMFVSRD, r0)
+			opset(AMFFPRD, r0)
+			opset(AMFVRD, r0)
 			opset(AMFVSRWZ, r0)
 
-		case AMTVSR: /* mtvsrd, mtvsrwa, mtvsrwz */
+		case AMTVSR: /* mtvsrd, mtvsrwa, mtvsrwz (and extended mnemonics) */
 			opset(AMTVSRD, r0)
+			opset(AMTFPRD, r0)
+			opset(AMTVRD, r0)
 			opset(AMTVSRWA, r0)
 			opset(AMTVSRWZ, r0)
 
@@ -1710,6 +1737,15 @@ func buildop(ctxt *obj.Link) {
 		case ACMPU:
 			opset(ACMPWU, r0)
 
+		case ACMPB:
+			opset(ACMPB, r0)
+
+		case AFTDIV:
+			opset(AFTDIV, r0)
+
+		case AFTSQRT:
+			opset(AFTSQRT, r0)
+
 		case AADD,
 			AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra; andis. $uimm,Rs,Ra */
 			AFMOVSX,
@@ -1783,7 +1819,7 @@ func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 {
 	return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11
 }
 
-/* VX-form 2-register operands, r/r/none */
+/* VX-form 2-register operands, r/none/r */
 func AOP_RR(op uint32, d uint32, a uint32) uint32 {
 	return op | (d&31)<<21 | (a&31)<<11
 }
@@ -1881,6 +1917,10 @@ func OP_RLW(op uint32, a uint32, s uint32, sh uint32, mb uint32, me uint32) uint
 	return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | (mb&31)<<6 | (me&31)<<1
 }
 
+func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
+	return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
+}
+
 func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 {
 	return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
 }
@@ -2353,6 +2393,11 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 			if mask[1] != 63 {
 				ctxt.Diag("invalid mask for rotate: %x (end != bit 63)\n%v", uint64(d), p)
 			}
+			o1 = LOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+			o1 |= (uint32(a) & 31) << 6
+			if a&0x20 != 0 {
+				o1 |= 1 << 5 /* mb[5] is top bit */
+			}
 
 		case ARLDCR, ARLDCRCC:
 			var mask [2]uint8
@@ -2362,22 +2407,28 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 			if mask[0] != 0 {
 				ctxt.Diag("invalid mask for rotate: %x (start != 0)\n%v", uint64(d), p)
 			}
+			o1 = LOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+			o1 |= (uint32(a) & 31) << 6
+			if a&0x20 != 0 {
+				o1 |= 1 << 5 /* mb[5] is top bit */
+			}
 
 		// These opcodes use a shift count like the ppc64 asm, no mask conversion done
-		case ARLDICR, ARLDICRCC, ARLDICL, ARLDICLCC:
-			a = int(d)
+		case ARLDICR, ARLDICRCC:
+			me := int(d)
+			sh := regoff(ctxt, &p.From)
+			o1 = AOP_RLDIC(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me))
+
+		case ARLDICL, ARLDICLCC:
+			mb := int(d)
+			sh := regoff(ctxt, &p.From)
+			o1 = AOP_RLDIC(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb))
 
 		default:
 			ctxt.Diag("unexpected op in rldc case\n%v", p)
 			a = 0
 		}
 
-		o1 = LOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
-		o1 |= (uint32(a) & 31) << 6
-		if a&0x20 != 0 {
-			o1 |= 1 << 5 /* mb[5] is top bit */
-		}
-
 	case 17, /* bc bo,bi,lbra (same for now) */
 		16: /* bc bo,bi,sbra */
 		a := 0
@@ -3170,8 +3221,24 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 		/* 2-register operand order: XS, RA or RA, XT */
 		xt := int32(p.To.Reg)
 		xs := int32(p.From.Reg)
-		if REG_VS0 <= xt && xt <= REG_VS63 {
+		/* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */
+		if REG_V0 <= xt && xt <= REG_V31 {
+			/* Convert V0-V31 to VS32-VS63 */
+			xt = xt + 64
 			o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
+		} else if REG_F0 <= xt && xt <= REG_F31 {
+			/* Convert F0-F31 to VS0-VS31 */
+			xt = xt + 64
+			o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
+		} else if REG_VS0 <= xt && xt <= REG_VS63 {
+			o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
+		} else if REG_V0 <= xs && xs <= REG_V31 {
+			/* Likewise for XS */
+			xs = xs + 64
+			o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
+		} else if REG_F0 <= xs && xs <= REG_F31 {
+			xs = xs + 64
+			o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
 		} else if REG_VS0 <= xs && xs <= REG_VS63 {
 			o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
 		}
@@ -3199,6 +3266,30 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 		/* 3-register operand order: XA, XB, XC, XT */
 		o1 = AOP_XX4(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(p.From3.Reg))
 
+	case 92: /* X-form instructions, 3-operands */
+		if p.To.Type == obj.TYPE_CONST {
+			/* imm reg reg */
+			/* operand order: FRA, FRB, BF */
+			bf := int(regoff(ctxt, &p.To)) << 2
+			o1 = AOP_RRR(opirr(ctxt, p.As), uint32(bf), uint32(p.From.Reg), uint32(p.Reg))
+		} else if p.To.Type == obj.TYPE_REG {
+			/* reg reg reg */
+			/* operand order: RS, RB, RA */
+			o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
+		}
+
+	case 93: /* X-form instructions, 2-operands */
+		if p.To.Type == obj.TYPE_CONST {
+			/* imm reg */
+			/* operand order: FRB, BF */
+			bf := int(regoff(ctxt, &p.To)) << 2
+			o1 = AOP_RR(opirr(ctxt, p.As), uint32(bf), uint32(p.From.Reg))
+		} else if p.Reg == 0 {
+			/* popcnt* r,r, X-form */
+			/* operand order: RS, RA */
+			o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
+		}
+
 	}
 
 	out[0] = o1
@@ -3281,6 +3372,8 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
 		return OPVCC(31, 0, 0, 0) /* L=0 */
 	case ACMPWU:
 		return OPVCC(31, 32, 0, 0)
+	case ACMPB:
+		return OPVCC(31, 508, 0, 0) /* cmpb - v2.05 */
 
 	case ACNTLZW:
 		return OPVCC(31, 26, 0, 0)
@@ -3621,6 +3714,13 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
 	case AORNCC:
 		return OPVCC(31, 412, 0, 1)
 
+	case APOPCNTD:
+		return OPVCC(31, 506, 0, 0) /* popcntd - v2.06 */
+	case APOPCNTW:
+		return OPVCC(31, 378, 0, 0) /* popcntw - v2.06 */
+	case APOPCNTB:
+		return OPVCC(31, 122, 0, 0) /* popcntb - v2.02 */
+
 	case ARFI:
 		return OPVCC(19, 50, 0, 0)
 	case ARFCI:
@@ -3757,14 +3857,14 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
 	/* Vector (VMX/Altivec) instructions */
 	/* ISA 2.03 enables these for PPC970. For POWERx processors, these */
 	/* are enabled starting at POWER6 (ISA 2.05). */
-	case AVANDL:
+	case AVAND:
 		return OPVX(4, 1028, 0, 0) /* vand - v2.03 */
 	case AVANDC:
 		return OPVX(4, 1092, 0, 0) /* vandc - v2.03 */
 	case AVNAND:
 		return OPVX(4, 1412, 0, 0) /* vnand - v2.07 */
 
-	case AVORL:
+	case AVOR:
 		return OPVX(4, 1156, 0, 0) /* vor - v2.03 */
 	case AVORC:
 		return OPVX(4, 1348, 0, 0) /* vorc - v2.07 */
@@ -3810,6 +3910,15 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
 	case AVADDECUQ:
 		return OPVX(4, 61, 0, 0) /* vaddecuq - v2.07 */
 
+	case AVPMSUMB:
+		return OPVX(4, 1032, 0, 0) /* vpmsumb - v2.07 */
+	case AVPMSUMH:
+		return OPVX(4, 1096, 0, 0) /* vpmsumh - v2.07 */
+	case AVPMSUMW:
+		return OPVX(4, 1160, 0, 0) /* vpmsumw - v2.07 */
+	case AVPMSUMD:
+		return OPVX(4, 1224, 0, 0) /* vpmsumd - v2.07 */
+
 	case AVSUBUBM:
 		return OPVX(4, 1024, 0, 0) /* vsububm - v2.03 */
 	case AVSUBUHM:
@@ -3976,12 +4085,12 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
 
 	/* Vector scalar (VSX) instructions */
 	/* ISA 2.06 enables these for POWER7. */
-	case AMFVSRD:
+	case AMFVSRD, AMFVRD, AMFFPRD:
 		return OPVXX1(31, 51, 0) /* mfvsrd - v2.07 */
 	case AMFVSRWZ:
 		return OPVXX1(31, 115, 0) /* mfvsrwz - v2.07 */
 
-	case AMTVSRD:
+	case AMTVSRD, AMTFPRD, AMTVRD:
 		return OPVXX1(31, 179, 0) /* mtvsrd - v2.07 */
 	case AMTVSRWA:
 		return OPVXX1(31, 211, 0) /* mtvsrwa - v2.07 */
@@ -4260,6 +4369,11 @@ func opirr(ctxt *obj.Link, a obj.As) uint32 {
 		return OPVX(4, 908, 0, 0) /* vspltisw - v2.03 */
 	/* End of vector instructions */
 
+	case AFTDIV:
+		return OPVCC(63, 128, 0, 0) /* ftdiv - v2.06 */
+	case AFTSQRT:
+		return OPVCC(63, 160, 0, 0) /* ftsqrt - v2.06 */
+
 	case AXOR:
 		return OPVCC(26, 0, 0, 0) /* XORIL */
 	case -AXOR: