1
0
mirror of https://github.com/golang/go synced 2024-11-20 07:54:39 -07:00

cmd/internal/obj/x86: add some more AVX2 instructions

This adds the VFMADD[213|231]SD, VFNMADD[213|231]SD,
VADDSD, VSUBSD instructions

This will allow us to write a fast path for exp_amd64.s where
these optimizations can be applied in a lot of places.

Change-Id: Ide292107ab887bd1e225a1ad60880235b5ed7c61
Reviewed-on: https://go-review.googlesource.com/61810
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Agniva De Sarker 2017-09-06 00:10:50 +05:30 committed by Ilya Tocar
parent 31ddd8a39d
commit 6367c19f26
4 changed files with 70 additions and 48 deletions

View File

@ -5886,14 +5886,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VADDPS (R11), Y15, Y11 // c44104581b
//TODO: VADDPS Y2, Y15, Y11 // c4610458da or c50458da
//TODO: VADDPS Y11, Y15, Y11 // c4410458db
//TODO: VADDSD (BX), X9, X2 // c4e1335813 or c5b35813
//TODO: VADDSD (R11), X9, X2 // c4c1335813
//TODO: VADDSD X2, X9, X2 // c4e13358d2 or c5b358d2
//TODO: VADDSD X11, X9, X2 // c4c13358d3
//TODO: VADDSD (BX), X9, X11 // c46133581b or c533581b
//TODO: VADDSD (R11), X9, X11 // c44133581b
//TODO: VADDSD X2, X9, X11 // c4613358da or c53358da
//TODO: VADDSD X11, X9, X11 // c4413358db
VADDSD (BX), X9, X2 // c4e1335813 or c5b35813
VADDSD (R11), X9, X2 // c4c1335813
VADDSD X2, X9, X2 // c4e13358d2 or c5b358d2
VADDSD X11, X9, X2 // c4c13358d3
VADDSD (BX), X9, X11 // c46133581b or c533581b
VADDSD (R11), X9, X11 // c44133581b
VADDSD X2, X9, X11 // c4613358da or c53358da
VADDSD X11, X9, X11 // c4413358db
//TODO: VADDSS (BX), X9, X2 // c4e1325813 or c5b25813
//TODO: VADDSS (R11), X9, X2 // c4c1325813
//TODO: VADDSS X2, X9, X2 // c4e13258d2 or c5b258d2
@ -6662,14 +6662,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFMADD213PS (R11), Y15, Y11 // c44205a81b
//TODO: VFMADD213PS Y2, Y15, Y11 // c46205a8da
//TODO: VFMADD213PS Y11, Y15, Y11 // c44205a8db
//TODO: VFMADD213SD (BX), X9, X2 // c4e2b1a913
//TODO: VFMADD213SD (R11), X9, X2 // c4c2b1a913
//TODO: VFMADD213SD X2, X9, X2 // c4e2b1a9d2
//TODO: VFMADD213SD X11, X9, X2 // c4c2b1a9d3
//TODO: VFMADD213SD (BX), X9, X11 // c462b1a91b
//TODO: VFMADD213SD (R11), X9, X11 // c442b1a91b
//TODO: VFMADD213SD X2, X9, X11 // c462b1a9da
//TODO: VFMADD213SD X11, X9, X11 // c442b1a9db
VFMADD213SD (BX), X9, X2 // c4e2b1a913
VFMADD213SD (R11), X9, X2 // c4c2b1a913
VFMADD213SD X2, X9, X2 // c4e2b1a9d2
VFMADD213SD X11, X9, X2 // c4c2b1a9d3
VFMADD213SD (BX), X9, X11 // c462b1a91b
VFMADD213SD (R11), X9, X11 // c442b1a91b
VFMADD213SD X2, X9, X11 // c462b1a9da
VFMADD213SD X11, X9, X11 // c442b1a9db
//TODO: VFMADD213SS (BX), X9, X2 // c4e231a913
//TODO: VFMADD213SS (R11), X9, X2 // c4c231a913
//TODO: VFMADD213SS X2, X9, X2 // c4e231a9d2
@ -6710,14 +6710,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFMADD231PS (R11), Y15, Y11 // c44205b81b
//TODO: VFMADD231PS Y2, Y15, Y11 // c46205b8da
//TODO: VFMADD231PS Y11, Y15, Y11 // c44205b8db
//TODO: VFMADD231SD (BX), X9, X2 // c4e2b1b913
//TODO: VFMADD231SD (R11), X9, X2 // c4c2b1b913
//TODO: VFMADD231SD X2, X9, X2 // c4e2b1b9d2
//TODO: VFMADD231SD X11, X9, X2 // c4c2b1b9d3
//TODO: VFMADD231SD (BX), X9, X11 // c462b1b91b
//TODO: VFMADD231SD (R11), X9, X11 // c442b1b91b
//TODO: VFMADD231SD X2, X9, X11 // c462b1b9da
//TODO: VFMADD231SD X11, X9, X11 // c442b1b9db
VFMADD231SD (BX), X9, X2 // c4e2b1b913
VFMADD231SD (R11), X9, X2 // c4c2b1b913
VFMADD231SD X2, X9, X2 // c4e2b1b9d2
VFMADD231SD X11, X9, X2 // c4c2b1b9d3
VFMADD231SD (BX), X9, X11 // c462b1b91b
VFMADD231SD (R11), X9, X11 // c442b1b91b
VFMADD231SD X2, X9, X11 // c462b1b9da
VFMADD231SD X11, X9, X11 // c442b1b9db
//TODO: VFMADD231SS (BX), X9, X2 // c4e231b913
//TODO: VFMADD231SS (R11), X9, X2 // c4c231b913
//TODO: VFMADD231SS X2, X9, X2 // c4e231b9d2
@ -7142,14 +7142,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFNMADD213PS (R11), Y15, Y11 // c44205ac1b
//TODO: VFNMADD213PS Y2, Y15, Y11 // c46205acda
//TODO: VFNMADD213PS Y11, Y15, Y11 // c44205acdb
//TODO: VFNMADD213SD (BX), X9, X2 // c4e2b1ad13
//TODO: VFNMADD213SD (R11), X9, X2 // c4c2b1ad13
//TODO: VFNMADD213SD X2, X9, X2 // c4e2b1add2
//TODO: VFNMADD213SD X11, X9, X2 // c4c2b1add3
//TODO: VFNMADD213SD (BX), X9, X11 // c462b1ad1b
//TODO: VFNMADD213SD (R11), X9, X11 // c442b1ad1b
//TODO: VFNMADD213SD X2, X9, X11 // c462b1adda
//TODO: VFNMADD213SD X11, X9, X11 // c442b1addb
VFNMADD213SD (BX), X9, X2 // c4e2b1ad13
VFNMADD213SD (R11), X9, X2 // c4c2b1ad13
VFNMADD213SD X2, X9, X2 // c4e2b1add2
VFNMADD213SD X11, X9, X2 // c4c2b1add3
VFNMADD213SD (BX), X9, X11 // c462b1ad1b
VFNMADD213SD (R11), X9, X11 // c442b1ad1b
VFNMADD213SD X2, X9, X11 // c462b1adda
VFNMADD213SD X11, X9, X11 // c442b1addb
//TODO: VFNMADD213SS (BX), X9, X2 // c4e231ad13
//TODO: VFNMADD213SS (R11), X9, X2 // c4c231ad13
//TODO: VFNMADD213SS X2, X9, X2 // c4e231add2
@ -7190,14 +7190,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFNMADD231PS (R11), Y15, Y11 // c44205bc1b
//TODO: VFNMADD231PS Y2, Y15, Y11 // c46205bcda
//TODO: VFNMADD231PS Y11, Y15, Y11 // c44205bcdb
//TODO: VFNMADD231SD (BX), X9, X2 // c4e2b1bd13
//TODO: VFNMADD231SD (R11), X9, X2 // c4c2b1bd13
//TODO: VFNMADD231SD X2, X9, X2 // c4e2b1bdd2
//TODO: VFNMADD231SD X11, X9, X2 // c4c2b1bdd3
//TODO: VFNMADD231SD (BX), X9, X11 // c462b1bd1b
//TODO: VFNMADD231SD (R11), X9, X11 // c442b1bd1b
//TODO: VFNMADD231SD X2, X9, X11 // c462b1bdda
//TODO: VFNMADD231SD X11, X9, X11 // c442b1bddb
VFNMADD231SD (BX), X9, X2 // c4e2b1bd13
VFNMADD231SD (R11), X9, X2 // c4c2b1bd13
VFNMADD231SD X2, X9, X2 // c4e2b1bdd2
VFNMADD231SD X11, X9, X2 // c4c2b1bdd3
VFNMADD231SD (BX), X9, X11 // c462b1bd1b
VFNMADD231SD (R11), X9, X11 // c442b1bd1b
VFNMADD231SD X2, X9, X11 // c462b1bdda
VFNMADD231SD X11, X9, X11 // c442b1bddb
//TODO: VFNMADD231SS (BX), X9, X2 // c4e231bd13
//TODO: VFNMADD231SS (R11), X9, X2 // c4c231bd13
//TODO: VFNMADD231SS X2, X9, X2 // c4e231bdd2
@ -10314,14 +10314,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VSUBPS (R11), Y15, Y11 // c441045c1b
//TODO: VSUBPS Y2, Y15, Y11 // c461045cda or c5045cda
//TODO: VSUBPS Y11, Y15, Y11 // c441045cdb
//TODO: VSUBSD (BX), X9, X2 // c4e1335c13 or c5b35c13
//TODO: VSUBSD (R11), X9, X2 // c4c1335c13
//TODO: VSUBSD X2, X9, X2 // c4e1335cd2 or c5b35cd2
//TODO: VSUBSD X11, X9, X2 // c4c1335cd3
//TODO: VSUBSD (BX), X9, X11 // c461335c1b or c5335c1b
//TODO: VSUBSD (R11), X9, X11 // c441335c1b
//TODO: VSUBSD X2, X9, X11 // c461335cda or c5335cda
//TODO: VSUBSD X11, X9, X11 // c441335cdb
VSUBSD (BX), X9, X2 // c4e1335c13 or c5b35c13
VSUBSD (R11), X9, X2 // c4c1335c13
VSUBSD X2, X9, X2 // c4e1335cd2 or c5b35cd2
VSUBSD X11, X9, X2 // c4c1335cd3
VSUBSD (BX), X9, X11 // c461335c1b or c5335c1b
VSUBSD (R11), X9, X11 // c441335c1b
VSUBSD X2, X9, X11 // c461335cda or c5335cda
VSUBSD X11, X9, X11 // c441335cdb
//TODO: VSUBSS (BX), X9, X2 // c4e1325c13 or c5b25c13
//TODO: VSUBSS (R11), X9, X2 // c4c1325c13
//TODO: VSUBSS X2, X9, X2 // c4e1325cd2 or c5b25cd2

View File

@ -838,11 +838,17 @@ const (
AVPERM2I128
ARORXL
ARORXQ
AVADDSD
AVBROADCASTSS
AVBROADCASTSD
AVFMADD213SD
AVFMADD231SD
AVFNMADD213SD
AVFNMADD231SD
AVMOVDDUP
AVMOVSHDUP
AVMOVSLDUP
AVSUBSD
// from 386
AJCXZW

View File

@ -773,11 +773,17 @@ var Anames = []string{
"VPERM2I128",
"RORXL",
"RORXQ",
"VADDSD",
"VBROADCASTSS",
"VBROADCASTSD",
"VFMADD213SD",
"VFMADD231SD",
"VFNMADD213SD",
"VFNMADD231SD",
"VMOVDDUP",
"VMOVSHDUP",
"VMOVSLDUP",
"VSUBSD",
"JCXZW",
"FCMOVCC",
"FCMOVCS",

View File

@ -819,6 +819,10 @@ var yvex_xy3 = []ytab{
{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
}
var yvex_x3 = []ytab{
{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
}
var yvex_ri3 = []ytab{
{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
}
@ -1722,6 +1726,12 @@ var optab =
{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
{AVADDSD, yvex_x3, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x58}},
{AVSUBSD, yvex_x3, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x5c}},
{AVFMADD213SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xa9}},
{AVFMADD231SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xb9}},
{AVFNMADD213SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xad}},
{AVFNMADD231SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xbd}},
{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},