diff --git a/src/cmd/asm/internal/asm/testdata/amd64enc.s b/src/cmd/asm/internal/asm/testdata/amd64enc.s index 22dfe127b3..b27faa5a36 100644 --- a/src/cmd/asm/internal/asm/testdata/amd64enc.s +++ b/src/cmd/asm/internal/asm/testdata/amd64enc.s @@ -2551,14 +2551,14 @@ TEXT asmtest(SB),7,$0 MOVQ (R11), X11 // 66450f6e1b or 664d0f6e1b or f3450f7e1b MOVQ DX, X11 // 66440f6eda or 664c0f6eda MOVQ R11, X11 // 66450f6edb or 664d0f6edb - //TODO: MOVDDUP (BX), X2 // f20f1213 - //TODO: MOVDDUP (R11), X2 // f2410f1213 - //TODO: MOVDDUP X2, X2 // f20f12d2 - //TODO: MOVDDUP X11, X2 // f2410f12d3 - //TODO: MOVDDUP (BX), X11 // f2440f121b - //TODO: MOVDDUP (R11), X11 // f2450f121b - //TODO: MOVDDUP X2, X11 // f2440f12da - //TODO: MOVDDUP X11, X11 // f2450f12db + MOVDDUP (BX), X2 // f20f1213 + MOVDDUP (R11), X2 // f2410f1213 + MOVDDUP X2, X2 // f20f12d2 + MOVDDUP X11, X2 // f2410f12d3 + MOVDDUP (BX), X11 // f2440f121b + MOVDDUP (R11), X11 // f2450f121b + MOVDDUP X2, X11 // f2440f12da + MOVDDUP X11, X11 // f2450f12db MOVQ X2, M2 // f20fd6d2 MOVQ X11, M2 // f2410fd6d3 MOVQ X2, M3 // f20fd6da @@ -2697,22 +2697,22 @@ TEXT asmtest(SB),7,$0 //TODO: MOVSD X11, (BX) // f2440f111b //TODO: MOVSD X2, (R11) // f2410f1113 //TODO: MOVSD X11, (R11) // f2450f111b - //TODO: MOVSHDUP (BX), X2 // f30f1613 - //TODO: MOVSHDUP (R11), X2 // f3410f1613 - //TODO: MOVSHDUP X2, X2 // f30f16d2 - //TODO: MOVSHDUP X11, X2 // f3410f16d3 - //TODO: MOVSHDUP (BX), X11 // f3440f161b - //TODO: MOVSHDUP (R11), X11 // f3450f161b - //TODO: MOVSHDUP X2, X11 // f3440f16da - //TODO: MOVSHDUP X11, X11 // f3450f16db - //TODO: MOVSLDUP (BX), X2 // f30f1213 - //TODO: MOVSLDUP (R11), X2 // f3410f1213 - //TODO: MOVSLDUP X2, X2 // f30f12d2 - //TODO: MOVSLDUP X11, X2 // f3410f12d3 - //TODO: MOVSLDUP (BX), X11 // f3440f121b - //TODO: MOVSLDUP (R11), X11 // f3450f121b - //TODO: MOVSLDUP X2, X11 // f3440f12da - //TODO: MOVSLDUP X11, X11 // f3450f12db + MOVSHDUP (BX), X2 // f30f1613 + MOVSHDUP (R11), X2 // f3410f1613 + MOVSHDUP X2, X2 // f30f16d2 + MOVSHDUP X11, X2 // f3410f16d3 + MOVSHDUP (BX), X11 // f3440f161b + MOVSHDUP (R11), X11 // f3450f161b + MOVSHDUP X2, X11 // f3440f16da + MOVSHDUP X11, X11 // f3450f16db + MOVSLDUP (BX), X2 // f30f1213 + MOVSLDUP (R11), X2 // f3410f1213 + MOVSLDUP X2, X2 // f30f12d2 + MOVSLDUP X11, X2 // f3410f12d3 + MOVSLDUP (BX), X11 // f3440f121b + MOVSLDUP (R11), X11 // f3450f121b + MOVSLDUP X2, X11 // f3440f12da + MOVSLDUP X11, X11 // f3450f12db MOVSQ // 48a5 MOVSS (BX), X2 // f30f1013 MOVSS (R11), X2 // f3410f1013 @@ -6116,30 +6116,30 @@ TEXT asmtest(SB),7,$0 //TODO: VBROADCASTI128 (R11), Y2 // c4c27d5a13 //TODO: VBROADCASTI128 (BX), Y11 // c4627d5a1b //TODO: VBROADCASTI128 (R11), Y11 // c4427d5a1b - //TODO: VBROADCASTSD (BX), Y2 // c4e27d1913 - //TODO: VBROADCASTSD (R11), Y2 // c4c27d1913 - //TODO: VBROADCASTSD (BX), Y11 // c4627d191b - //TODO: VBROADCASTSD (R11), Y11 // c4427d191b - //TODO: VBROADCASTSD X2, Y2 // c4e27d19d2 - //TODO: VBROADCASTSD X11, Y2 // c4c27d19d3 - //TODO: VBROADCASTSD X2, Y11 // c4627d19da - //TODO: VBROADCASTSD X11, Y11 // c4427d19db - //TODO: VBROADCASTSS (BX), X2 // c4e2791813 - //TODO: VBROADCASTSS (R11), X2 // c4c2791813 - //TODO: VBROADCASTSS (BX), X11 // c46279181b - //TODO: VBROADCASTSS (R11), X11 // c44279181b - //TODO: VBROADCASTSS X2, X2 // c4e27918d2 - //TODO: VBROADCASTSS X11, X2 // c4c27918d3 - //TODO: VBROADCASTSS X2, X11 // c4627918da - //TODO: VBROADCASTSS X11, X11 // c4427918db - //TODO: VBROADCASTSS (BX), Y2 // c4e27d1813 - //TODO: VBROADCASTSS (R11), Y2 // c4c27d1813 - //TODO: VBROADCASTSS (BX), Y11 // c4627d181b - //TODO: VBROADCASTSS (R11), Y11 // c4427d181b - //TODO: VBROADCASTSS X2, Y2 // c4e27d18d2 - //TODO: VBROADCASTSS X11, Y2 // c4c27d18d3 - //TODO: VBROADCASTSS X2, Y11 // c4627d18da - //TODO: VBROADCASTSS X11, Y11 // c4427d18db + VBROADCASTSD (BX), Y2 // c4e27d1913 + VBROADCASTSD (R11), Y2 // c4c27d1913 + VBROADCASTSD (BX), Y11 // c4627d191b + VBROADCASTSD (R11), Y11 // c4427d191b + VBROADCASTSD X2, Y2 // c4e27d19d2 + VBROADCASTSD X11, Y2 // c4c27d19d3 + VBROADCASTSD X2, Y11 // c4627d19da + VBROADCASTSD X11, Y11 // c4427d19db + VBROADCASTSS (BX), X2 // c4e2791813 + VBROADCASTSS (R11), X2 // c4c2791813 + VBROADCASTSS (BX), X11 // c46279181b + VBROADCASTSS (R11), X11 // c44279181b + VBROADCASTSS X2, X2 // c4e27918d2 + VBROADCASTSS X11, X2 // c4c27918d3 + VBROADCASTSS X2, X11 // c4627918da + VBROADCASTSS X11, X11 // c4427918db + VBROADCASTSS (BX), Y2 // c4e27d1813 + VBROADCASTSS (R11), Y2 // c4c27d1813 + VBROADCASTSS (BX), Y11 // c4627d181b + VBROADCASTSS (R11), Y11 // c4427d181b + VBROADCASTSS X2, Y2 // c4e27d18d2 + VBROADCASTSS X11, Y2 // c4c27d18d3 + VBROADCASTSS X2, Y11 // c4627d18da + VBROADCASTSS X11, Y11 // c4427d18db //TODO: VCMPPD $7, (BX), X9, X2 // c4e131c21307 or c5b1c21307 //TODO: VCMPPD $7, (R11), X9, X2 // c4c131c21307 //TODO: VCMPPD $7, X2, X9, X2 // c4e131c2d207 or c5b1c2d207 @@ -7642,22 +7642,22 @@ TEXT asmtest(SB),7,$0 //TODO: VMOVD (R11), X11 // c441796e1b //TODO: VMOVD DX, X11 // c461796eda or c5796eda //TODO: VMOVD R11, X11 // c441796edb - //TODO: VMOVDDUP (BX), X2 // c4e17b1213 or c5fb1213 - //TODO: VMOVDDUP (R11), X2 // c4c17b1213 - //TODO: VMOVDDUP X2, X2 // c4e17b12d2 or c5fb12d2 - //TODO: VMOVDDUP X11, X2 // c4c17b12d3 - //TODO: VMOVDDUP (BX), X11 // c4617b121b or c57b121b - //TODO: VMOVDDUP (R11), X11 // c4417b121b - //TODO: VMOVDDUP X2, X11 // c4617b12da or c57b12da - //TODO: VMOVDDUP X11, X11 // c4417b12db - //TODO: VMOVDDUP (BX), Y2 // c4e17f1213 or c5ff1213 - //TODO: VMOVDDUP (R11), Y2 // c4c17f1213 - //TODO: VMOVDDUP Y2, Y2 // c4e17f12d2 or c5ff12d2 - //TODO: VMOVDDUP Y11, Y2 // c4c17f12d3 - //TODO: VMOVDDUP (BX), Y11 // c4617f121b or c57f121b - //TODO: VMOVDDUP (R11), Y11 // c4417f121b - //TODO: VMOVDDUP Y2, Y11 // c4617f12da or c57f12da - //TODO: VMOVDDUP Y11, Y11 // c4417f12db + VMOVDDUP (BX), X2 // c4e17b1213 or c5fb1213 + VMOVDDUP (R11), X2 // c4c17b1213 + VMOVDDUP X2, X2 // c4e17b12d2 or c5fb12d2 + VMOVDDUP X11, X2 // c4c17b12d3 + VMOVDDUP (BX), X11 // c4617b121b or c57b121b + VMOVDDUP (R11), X11 // c4417b121b + VMOVDDUP X2, X11 // c4617b12da or c57b12da + VMOVDDUP X11, X11 // c4417b12db + VMOVDDUP (BX), Y2 // c4e17f1213 or c5ff1213 + VMOVDDUP (R11), Y2 // c4c17f1213 + VMOVDDUP Y2, Y2 // c4e17f12d2 or c5ff12d2 + VMOVDDUP Y11, Y2 // c4c17f12d3 + VMOVDDUP (BX), Y11 // c4617f121b or c57f121b + VMOVDDUP (R11), Y11 // c4417f121b + VMOVDDUP Y2, Y11 // c4617f12da or c57f12da + VMOVDDUP Y11, Y11 // c4417f12db VMOVDQA (BX), X2 // c4e1796f13 or c5f96f13 VMOVDQA (R11), X2 // c4c1796f13 VMOVDQA X2, X2 // c4e1796fd2 or c5f96fd2 or c4e1797fd2 or c5f97fd2 @@ -7826,38 +7826,38 @@ TEXT asmtest(SB),7,$0 //TODO: VMOVSD X11, X9, X2 // c4c13310d3 or c4613311da or c53311da //TODO: VMOVSD X2, X9, X11 // c4613310da or c53310da or c4c13311d3 //TODO: VMOVSD X11, X9, X11 // c4413310db or c4413311db - //TODO: VMOVSHDUP (BX), X2 // c4e17a1613 or c5fa1613 - //TODO: VMOVSHDUP (R11), X2 // c4c17a1613 - //TODO: VMOVSHDUP X2, X2 // c4e17a16d2 or c5fa16d2 - //TODO: VMOVSHDUP X11, X2 // c4c17a16d3 - //TODO: VMOVSHDUP (BX), X11 // c4617a161b or c57a161b - //TODO: VMOVSHDUP (R11), X11 // c4417a161b - //TODO: VMOVSHDUP X2, X11 // c4617a16da or c57a16da - //TODO: VMOVSHDUP X11, X11 // c4417a16db - //TODO: VMOVSHDUP (BX), Y2 // c4e17e1613 or c5fe1613 - //TODO: VMOVSHDUP (R11), Y2 // c4c17e1613 - //TODO: VMOVSHDUP Y2, Y2 // c4e17e16d2 or c5fe16d2 - //TODO: VMOVSHDUP Y11, Y2 // c4c17e16d3 - //TODO: VMOVSHDUP (BX), Y11 // c4617e161b or c57e161b - //TODO: VMOVSHDUP (R11), Y11 // c4417e161b - //TODO: VMOVSHDUP Y2, Y11 // c4617e16da or c57e16da - //TODO: VMOVSHDUP Y11, Y11 // c4417e16db - //TODO: VMOVSLDUP (BX), X2 // c4e17a1213 or c5fa1213 - //TODO: VMOVSLDUP (R11), X2 // c4c17a1213 - //TODO: VMOVSLDUP X2, X2 // c4e17a12d2 or c5fa12d2 - //TODO: VMOVSLDUP X11, X2 // c4c17a12d3 - //TODO: VMOVSLDUP (BX), X11 // c4617a121b or c57a121b - //TODO: VMOVSLDUP (R11), X11 // c4417a121b - //TODO: VMOVSLDUP X2, X11 // c4617a12da or c57a12da - //TODO: VMOVSLDUP X11, X11 // c4417a12db - //TODO: VMOVSLDUP (BX), Y2 // c4e17e1213 or c5fe1213 - //TODO: VMOVSLDUP (R11), Y2 // c4c17e1213 - //TODO: VMOVSLDUP Y2, Y2 // c4e17e12d2 or c5fe12d2 - //TODO: VMOVSLDUP Y11, Y2 // c4c17e12d3 - //TODO: VMOVSLDUP (BX), Y11 // c4617e121b or c57e121b - //TODO: VMOVSLDUP (R11), Y11 // c4417e121b - //TODO: VMOVSLDUP Y2, Y11 // c4617e12da or c57e12da - //TODO: VMOVSLDUP Y11, Y11 // c4417e12db + VMOVSHDUP (BX), X2 // c4e17a1613 or c5fa1613 + VMOVSHDUP (R11), X2 // c4c17a1613 + VMOVSHDUP X2, X2 // c4e17a16d2 or c5fa16d2 + VMOVSHDUP X11, X2 // c4c17a16d3 + VMOVSHDUP (BX), X11 // c4617a161b or c57a161b + VMOVSHDUP (R11), X11 // c4417a161b + VMOVSHDUP X2, X11 // c4617a16da or c57a16da + VMOVSHDUP X11, X11 // c4417a16db + VMOVSHDUP (BX), Y2 // c4e17e1613 or c5fe1613 + VMOVSHDUP (R11), Y2 // c4c17e1613 + VMOVSHDUP Y2, Y2 // c4e17e16d2 or c5fe16d2 + VMOVSHDUP Y11, Y2 // c4c17e16d3 + VMOVSHDUP (BX), Y11 // c4617e161b or c57e161b + VMOVSHDUP (R11), Y11 // c4417e161b + VMOVSHDUP Y2, Y11 // c4617e16da or c57e16da + VMOVSHDUP Y11, Y11 // c4417e16db + VMOVSLDUP (BX), X2 // c4e17a1213 or c5fa1213 + VMOVSLDUP (R11), X2 // c4c17a1213 + VMOVSLDUP X2, X2 // c4e17a12d2 or c5fa12d2 + VMOVSLDUP X11, X2 // c4c17a12d3 + VMOVSLDUP (BX), X11 // c4617a121b or c57a121b + VMOVSLDUP (R11), X11 // c4417a121b + VMOVSLDUP X2, X11 // c4617a12da or c57a12da + VMOVSLDUP X11, X11 // c4417a12db + VMOVSLDUP (BX), Y2 // c4e17e1213 or c5fe1213 + VMOVSLDUP (R11), Y2 // c4c17e1213 + VMOVSLDUP Y2, Y2 // c4e17e12d2 or c5fe12d2 + VMOVSLDUP Y11, Y2 // c4c17e12d3 + VMOVSLDUP (BX), Y11 // c4617e121b or c57e121b + VMOVSLDUP (R11), Y11 // c4417e121b + VMOVSLDUP Y2, Y11 // c4617e12da or c57e12da + VMOVSLDUP Y11, Y11 // c4417e12db //TODO: VMOVSS X2, (BX) // c4e17a1113 or c5fa1113 //TODO: VMOVSS X11, (BX) // c4617a111b or c57a111b //TODO: VMOVSS X2, (R11) // c4c17a1113 diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go index 9e7bbe4900..02f92ed387 100644 --- a/src/cmd/internal/obj/x86/a.out.go +++ b/src/cmd/internal/obj/x86/a.out.go @@ -771,6 +771,9 @@ const ( AROUNDSS AROUNDPD AROUNDSD + AMOVDDUP + AMOVSHDUP + AMOVSLDUP APSHUFD APCLMULQDQ @@ -803,6 +806,11 @@ const ( AVPERM2I128 ARORXL ARORXQ + AVBROADCASTSS + AVBROADCASTSD + AVMOVDDUP + AVMOVSHDUP + AVMOVSLDUP // from 386 AJCXZW diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go index 3b30154625..8c5be80795 100644 --- a/src/cmd/internal/obj/x86/anames.go +++ b/src/cmd/internal/obj/x86/anames.go @@ -708,6 +708,9 @@ var Anames = []string{ "ROUNDSS", "ROUNDPD", "ROUNDSD", + "MOVDDUP", + "MOVSHDUP", + "MOVSLDUP", "PSHUFD", "PCLMULQDQ", "VZEROUPPER", @@ -738,6 +741,11 @@ var Anames = []string{ "VPERM2I128", "RORXL", "RORXQ", + "VBROADCASTSS", + "VBROADCASTSD", + "VMOVDDUP", + "VMOVSHDUP", + "VMOVSLDUP", "JCXZW", "FCMOVCC", "FCMOVCS", diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index f7fa9a2edc..bf67822822 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -866,6 +866,10 @@ var yvex_vpbroadcast = []ytab{ {Yxm, Ynone, Yyr, Zvex_rm_v_r, 2}, } +var yvex_vpbroadcast_sd = []ytab{ + {Yxm, Ynone, Yyr, Zvex_rm_v_r, 2}, +} + var ymmxmm0f38 = []ytab{ {Ymm, Ynone, Ymr, Zlitm_r, 3}, {Yxm, Ynone, Yxr, Zlitm_r, 5}, @@ -1630,6 +1634,9 @@ var optab = {APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}}, {APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}}, {APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}}, + {AMOVDDUP, yxm, Pf2, [23]uint8{0x12}}, + {AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}}, + {AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}}, {AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}}, {AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}}, @@ -1678,6 +1685,11 @@ var optab = {AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}}, {ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}}, {ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}}, + {AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}}, + {AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}}, + {AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}}, + {AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}}, + {AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}}, {AXACQUIRE, ynone, Px, [23]uint8{0xf2}}, {AXRELEASE, ynone, Px, [23]uint8{0xf3}}, @@ -3379,7 +3391,8 @@ func doasm(ctxt *obj.Link, p *obj.Prog) { log.Fatalf("asmins bad table %v", p) } op = int(o.op[z]) - if op == 0x0f { + // In vex case 0x0f is actually VEX_256_F2_0F_WIG + if op == 0x0f && o.prefix != Pvex { ctxt.AsmBuf.Put1(byte(op)) z++ op = int(o.op[z])