1
0
mirror of https://github.com/golang/go synced 2024-11-20 08:54:40 -07:00

cmd/asm: add most SSE4 missing instructions

Instructions added:
  INSERTPS immb, r/m, xmm
  MPSADBW immb, r/m, xmm
  BLENDPD immb, r/m, xmm
  BLENDPS immb, r/m, xmm
  DPPD immb, r/m, xmm
  DPPS immb, r/m, xmm
  MOVNTDQA r/m, xmm
  PACKUSDW r/m, xmm
  PBLENDW immb, r/m, xmm
  PCMPEQQ r/m, xmm
  PCMPGTQ r/m, xmm
  PCMPISTRI immb, r/m, xmm
  PCMPISTRM immb, r/m, xmm
  PMAXSB r/m, xmm
  PMAXSD r/m, xmm
  PMAXUD r/m, xmm
  PMAXUW r/m, xmm
  PMINSB r/m, xmm
  PMINSD r/m, xmm
  PMINUD r/m, xmm
  PMINUW r/m, xmm
  PTEST r/m, xmm
  PCMPESTRM immb, r/m, xmm

Note: only 'optab' table is extended.

`EXTRACTPS immb, xmm, r/m` is not included in this
change due to new ytab set 'yextractps'. This should simplify
code review.

4-operand instructions are a subject of upcoming changes that
make 4-th (and so on) operands explicit.
Related TODO note in asm6.go:
"dont't hide 4op, some version have xmm version".

Part of the mission to add missing amd64 SSE4 instructions to Go asm.

Change-Id: I71716df14a8a5332e866dd0f0d52d43d7714872f
Reviewed-on: https://go-review.googlesource.com/57470
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
This commit is contained in:
isharipo 2017-08-21 12:12:41 +03:00 committed by Ilya Tocar
parent b15e8babc8
commit 50f1f639a4
4 changed files with 254 additions and 180 deletions

View File

@ -436,22 +436,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
BEXTRQ R14, (R11), R11 // c44288f71b
BEXTRQ R14, DX, R11 // c46288f7da
BEXTRQ R14, R11, R11 // c44288f7db
//TODO: BLENDPD $7, (BX), X2 // 660f3a0d1307
//TODO: BLENDPD $7, (R11), X2 // 66410f3a0d1307
//TODO: BLENDPD $7, X2, X2 // 660f3a0dd207
//TODO: BLENDPD $7, X11, X2 // 66410f3a0dd307
//TODO: BLENDPD $7, (BX), X11 // 66440f3a0d1b07
//TODO: BLENDPD $7, (R11), X11 // 66450f3a0d1b07
//TODO: BLENDPD $7, X2, X11 // 66440f3a0dda07
//TODO: BLENDPD $7, X11, X11 // 66450f3a0ddb07
//TODO: BLENDPS $7, (BX), X2 // 660f3a0c1307
//TODO: BLENDPS $7, (R11), X2 // 66410f3a0c1307
//TODO: BLENDPS $7, X2, X2 // 660f3a0cd207
//TODO: BLENDPS $7, X11, X2 // 66410f3a0cd307
//TODO: BLENDPS $7, (BX), X11 // 66440f3a0c1b07
//TODO: BLENDPS $7, (R11), X11 // 66450f3a0c1b07
//TODO: BLENDPS $7, X2, X11 // 66440f3a0cda07
//TODO: BLENDPS $7, X11, X11 // 66450f3a0cdb07
BLENDPD $7, (BX), X2 // 660f3a0d1307
BLENDPD $7, (R11), X2 // 66410f3a0d1307
BLENDPD $7, X2, X2 // 660f3a0dd207
BLENDPD $7, X11, X2 // 66410f3a0dd307
BLENDPD $7, (BX), X11 // 66440f3a0d1b07
BLENDPD $7, (R11), X11 // 66450f3a0d1b07
BLENDPD $7, X2, X11 // 66440f3a0dda07
BLENDPD $7, X11, X11 // 66450f3a0ddb07
BLENDPS $7, (BX), X2 // 660f3a0c1307
BLENDPS $7, (R11), X2 // 66410f3a0c1307
BLENDPS $7, X2, X2 // 660f3a0cd207
BLENDPS $7, X11, X2 // 66410f3a0cd307
BLENDPS $7, (BX), X11 // 66440f3a0c1b07
BLENDPS $7, (R11), X11 // 66450f3a0c1b07
BLENDPS $7, X2, X11 // 66440f3a0cda07
BLENDPS $7, X11, X11 // 66450f3a0cdb07
//TODO: BLENDVPD XMM0, (BX), X2 // 660f381513
//TODO: BLENDVPD XMM0, (R11), X2 // 66410f381513
//TODO: BLENDVPD XMM0, X2, X2 // 660f3815d2
@ -1622,22 +1622,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
DIVSS (R11), X11 // f3450f5e1b
DIVSS X2, X11 // f3440f5eda
DIVSS X11, X11 // f3450f5edb
//TODO: DPPD $7, (BX), X2 // 660f3a411307
//TODO: DPPD $7, (R11), X2 // 66410f3a411307
//TODO: DPPD $7, X2, X2 // 660f3a41d207
//TODO: DPPD $7, X11, X2 // 66410f3a41d307
//TODO: DPPD $7, (BX), X11 // 66440f3a411b07
//TODO: DPPD $7, (R11), X11 // 66450f3a411b07
//TODO: DPPD $7, X2, X11 // 66440f3a41da07
//TODO: DPPD $7, X11, X11 // 66450f3a41db07
//TODO: DPPS $7, (BX), X2 // 660f3a401307
//TODO: DPPS $7, (R11), X2 // 66410f3a401307
//TODO: DPPS $7, X2, X2 // 660f3a40d207
//TODO: DPPS $7, X11, X2 // 66410f3a40d307
//TODO: DPPS $7, (BX), X11 // 66440f3a401b07
//TODO: DPPS $7, (R11), X11 // 66450f3a401b07
//TODO: DPPS $7, X2, X11 // 66440f3a40da07
//TODO: DPPS $7, X11, X11 // 66450f3a40db07
DPPD $7, (BX), X2 // 660f3a411307
DPPD $7, (R11), X2 // 66410f3a411307
DPPD $7, X2, X2 // 660f3a41d207
DPPD $7, X11, X2 // 66410f3a41d307
DPPD $7, (BX), X11 // 66440f3a411b07
DPPD $7, (R11), X11 // 66450f3a411b07
DPPD $7, X2, X11 // 66440f3a41da07
DPPD $7, X11, X11 // 66450f3a41db07
DPPS $7, (BX), X2 // 660f3a401307
DPPS $7, (R11), X2 // 66410f3a401307
DPPS $7, X2, X2 // 660f3a40d207
DPPS $7, X11, X2 // 66410f3a40d307
DPPS $7, (BX), X11 // 66440f3a401b07
DPPS $7, (R11), X11 // 66450f3a401b07
DPPS $7, X2, X11 // 66440f3a40da07
DPPS $7, X11, X11 // 66450f3a40db07
EMMS // 0f77
//TODO: ENTERQ $0x12, $0xf123 // c823f112
//TODO: EXTRACTPS $7, X2, (BX) // 660f3a171307
@ -2042,14 +2042,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
INCB R11 // 41fec3
INSB // 6c
INSL // 6d
//TODO: INSERTPS $7, (BX), X2 // 660f3a211307
//TODO: INSERTPS $7, (R11), X2 // 66410f3a211307
//TODO: INSERTPS $7, X2, X2 // 660f3a21d207
//TODO: INSERTPS $7, X11, X2 // 66410f3a21d307
//TODO: INSERTPS $7, (BX), X11 // 66440f3a211b07
//TODO: INSERTPS $7, (R11), X11 // 66450f3a211b07
//TODO: INSERTPS $7, X2, X11 // 66440f3a21da07
//TODO: INSERTPS $7, X11, X11 // 66450f3a21db07
INSERTPS $7, (BX), X2 // 660f3a211307
INSERTPS $7, (R11), X2 // 66410f3a211307
INSERTPS $7, X2, X2 // 660f3a21d207
INSERTPS $7, X11, X2 // 66410f3a21d307
INSERTPS $7, (BX), X11 // 66440f3a211b07
INSERTPS $7, (R11), X11 // 66450f3a211b07
INSERTPS $7, X2, X11 // 66440f3a21da07
INSERTPS $7, X11, X11 // 66450f3a21db07
INSW // 666d
//TODO: INT $3 // cc
INT $7 // cd07
@ -2641,10 +2641,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
MOVNTO X11, (BX) // 66440fe71b
MOVNTO X2, (R11) // 66410fe713
MOVNTO X11, (R11) // 66450fe71b
//TODO: MOVNTDQA (BX), X2 // 660f382a13
//TODO: MOVNTDQA (R11), X2 // 66410f382a13
//TODO: MOVNTDQA (BX), X11 // 66440f382a1b
//TODO: MOVNTDQA (R11), X11 // 66450f382a1b
MOVNTDQA (BX), X2 // 660f382a13
MOVNTDQA (R11), X2 // 66410f382a13
MOVNTDQA (BX), X11 // 66440f382a1b
MOVNTDQA (R11), X11 // 66450f382a1b
MOVNTIL DX, (BX) // 0fc313
MOVNTIL R11, (BX) // 440fc31b
MOVNTIL DX, (R11) // 410fc313
@ -2857,14 +2857,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: MOVBQZX (R11), R11 // 4d0fb61b
//TODO: MOVBQZX DL, R11 // 4c0fb6da
//TODO: MOVBQZX R11, R11 // 4d0fb6db
//TODO: MPSADBW $7, (BX), X2 // 660f3a421307
//TODO: MPSADBW $7, (R11), X2 // 66410f3a421307
//TODO: MPSADBW $7, X2, X2 // 660f3a42d207
//TODO: MPSADBW $7, X11, X2 // 66410f3a42d307
//TODO: MPSADBW $7, (BX), X11 // 66440f3a421b07
//TODO: MPSADBW $7, (R11), X11 // 66450f3a421b07
//TODO: MPSADBW $7, X2, X11 // 66440f3a42da07
//TODO: MPSADBW $7, X11, X11 // 66450f3a42db07
MPSADBW $7, (BX), X2 // 660f3a421307
MPSADBW $7, (R11), X2 // 66410f3a421307
MPSADBW $7, X2, X2 // 660f3a42d207
MPSADBW $7, X11, X2 // 66410f3a42d307
MPSADBW $7, (BX), X11 // 66440f3a421b07
MPSADBW $7, (R11), X11 // 66450f3a421b07
MPSADBW $7, X2, X11 // 66440f3a42da07
MPSADBW $7, X11, X11 // 66450f3a42db07
MULW (BX) // 66f723
MULW (R11) // 6641f723
MULW DX // 66f7e2
@ -3155,14 +3155,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PACKSSWB (R11), X11 // 66450f631b
PACKSSWB X2, X11 // 66440f63da
PACKSSWB X11, X11 // 66450f63db
//TODO: PACKUSDW (BX), X2 // 660f382b13
//TODO: PACKUSDW (R11), X2 // 66410f382b13
//TODO: PACKUSDW X2, X2 // 660f382bd2
//TODO: PACKUSDW X11, X2 // 66410f382bd3
//TODO: PACKUSDW (BX), X11 // 66440f382b1b
//TODO: PACKUSDW (R11), X11 // 66450f382b1b
//TODO: PACKUSDW X2, X11 // 66440f382bda
//TODO: PACKUSDW X11, X11 // 66450f382bdb
PACKUSDW (BX), X2 // 660f382b13
PACKUSDW (R11), X2 // 66410f382b13
PACKUSDW X2, X2 // 660f382bd2
PACKUSDW X11, X2 // 66410f382bd3
PACKUSDW (BX), X11 // 66440f382b1b
PACKUSDW (R11), X11 // 66450f382b1b
PACKUSDW X2, X11 // 66440f382bda
PACKUSDW X11, X11 // 66450f382bdb
PACKUSWB (BX), M2 // 0f6713
PACKUSWB (R11), M2 // 410f6713
PACKUSWB M2, M2 // 0f67d2
@ -3395,14 +3395,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PBLENDVB XMM0, (R11), X11 // 66450f38101b
//TODO: PBLENDVB XMM0, X2, X11 // 66440f3810da
//TODO: PBLENDVB XMM0, X11, X11 // 66450f3810db
//TODO: PBLENDW $7, (BX), X2 // 660f3a0e1307
//TODO: PBLENDW $7, (R11), X2 // 66410f3a0e1307
//TODO: PBLENDW $7, X2, X2 // 660f3a0ed207
//TODO: PBLENDW $7, X11, X2 // 66410f3a0ed307
//TODO: PBLENDW $7, (BX), X11 // 66440f3a0e1b07
//TODO: PBLENDW $7, (R11), X11 // 66450f3a0e1b07
//TODO: PBLENDW $7, X2, X11 // 66440f3a0eda07
//TODO: PBLENDW $7, X11, X11 // 66450f3a0edb07
PBLENDW $7, (BX), X2 // 660f3a0e1307
PBLENDW $7, (R11), X2 // 66410f3a0e1307
PBLENDW $7, X2, X2 // 660f3a0ed207
PBLENDW $7, X11, X2 // 66410f3a0ed307
PBLENDW $7, (BX), X11 // 66440f3a0e1b07
PBLENDW $7, (R11), X11 // 66450f3a0e1b07
PBLENDW $7, X2, X11 // 66440f3a0eda07
PBLENDW $7, X11, X11 // 66450f3a0edb07
PCLMULQDQ $7, (BX), X2 // 660f3a441307
PCLMULQDQ $7, (R11), X2 // 66410f3a441307
PCLMULQDQ $7, X2, X2 // 660f3a44d207
@ -3443,14 +3443,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PCMPEQL (R11), X11 // 66450f761b
PCMPEQL X2, X11 // 66440f76da
PCMPEQL X11, X11 // 66450f76db
//TODO: PCMPEQQ (BX), X2 // 660f382913
//TODO: PCMPEQQ (R11), X2 // 66410f382913
//TODO: PCMPEQQ X2, X2 // 660f3829d2
//TODO: PCMPEQQ X11, X2 // 66410f3829d3
//TODO: PCMPEQQ (BX), X11 // 66440f38291b
//TODO: PCMPEQQ (R11), X11 // 66450f38291b
//TODO: PCMPEQQ X2, X11 // 66440f3829da
//TODO: PCMPEQQ X11, X11 // 66450f3829db
PCMPEQQ (BX), X2 // 660f382913
PCMPEQQ (R11), X2 // 66410f382913
PCMPEQQ X2, X2 // 660f3829d2
PCMPEQQ X11, X2 // 66410f3829d3
PCMPEQQ (BX), X11 // 66440f38291b
PCMPEQQ (R11), X11 // 66450f38291b
PCMPEQQ X2, X11 // 66440f3829da
PCMPEQQ X11, X11 // 66450f3829db
PCMPEQW (BX), M2 // 0f7513
PCMPEQW (R11), M2 // 410f7513
PCMPEQW M2, M2 // 0f75d2
@ -3475,14 +3475,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PCMPESTRI $7, (R11), X11 // 66450f3a611b07
PCMPESTRI $7, X2, X11 // 66440f3a61da07
PCMPESTRI $7, X11, X11 // 66450f3a61db07
//TODO: PCMPESTRM $7, (BX), X2 // 660f3a601307
//TODO: PCMPESTRM $7, (R11), X2 // 66410f3a601307
//TODO: PCMPESTRM $7, X2, X2 // 660f3a60d207
//TODO: PCMPESTRM $7, X11, X2 // 66410f3a60d307
//TODO: PCMPESTRM $7, (BX), X11 // 66440f3a601b07
//TODO: PCMPESTRM $7, (R11), X11 // 66450f3a601b07
//TODO: PCMPESTRM $7, X2, X11 // 66440f3a60da07
//TODO: PCMPESTRM $7, X11, X11 // 66450f3a60db07
PCMPESTRM $7, (BX), X2 // 660f3a601307
PCMPESTRM $7, (R11), X2 // 66410f3a601307
PCMPESTRM $7, X2, X2 // 660f3a60d207
PCMPESTRM $7, X11, X2 // 66410f3a60d307
PCMPESTRM $7, (BX), X11 // 66440f3a601b07
PCMPESTRM $7, (R11), X11 // 66450f3a601b07
PCMPESTRM $7, X2, X11 // 66440f3a60da07
PCMPESTRM $7, X11, X11 // 66450f3a60db07
PCMPGTB (BX), M2 // 0f6413
PCMPGTB (R11), M2 // 410f6413
PCMPGTB M2, M2 // 0f64d2
@ -3515,14 +3515,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PCMPGTL (R11), X11 // 66450f661b
PCMPGTL X2, X11 // 66440f66da
PCMPGTL X11, X11 // 66450f66db
//TODO: PCMPGTQ (BX), X2 // 660f383713
//TODO: PCMPGTQ (R11), X2 // 66410f383713
//TODO: PCMPGTQ X2, X2 // 660f3837d2
//TODO: PCMPGTQ X11, X2 // 66410f3837d3
//TODO: PCMPGTQ (BX), X11 // 66440f38371b
//TODO: PCMPGTQ (R11), X11 // 66450f38371b
//TODO: PCMPGTQ X2, X11 // 66440f3837da
//TODO: PCMPGTQ X11, X11 // 66450f3837db
PCMPGTQ (BX), X2 // 660f383713
PCMPGTQ (R11), X2 // 66410f383713
PCMPGTQ X2, X2 // 660f3837d2
PCMPGTQ X11, X2 // 66410f3837d3
PCMPGTQ (BX), X11 // 66440f38371b
PCMPGTQ (R11), X11 // 66450f38371b
PCMPGTQ X2, X11 // 66440f3837da
PCMPGTQ X11, X11 // 66450f3837db
PCMPGTW (BX), M2 // 0f6513
PCMPGTW (R11), M2 // 410f6513
PCMPGTW M2, M2 // 0f65d2
@ -3539,22 +3539,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PCMPGTW (R11), X11 // 66450f651b
PCMPGTW X2, X11 // 66440f65da
PCMPGTW X11, X11 // 66450f65db
//TODO: PCMPISTRI $7, (BX), X2 // 660f3a631307
//TODO: PCMPISTRI $7, (R11), X2 // 66410f3a631307
//TODO: PCMPISTRI $7, X2, X2 // 660f3a63d207
//TODO: PCMPISTRI $7, X11, X2 // 66410f3a63d307
//TODO: PCMPISTRI $7, (BX), X11 // 66440f3a631b07
//TODO: PCMPISTRI $7, (R11), X11 // 66450f3a631b07
//TODO: PCMPISTRI $7, X2, X11 // 66440f3a63da07
//TODO: PCMPISTRI $7, X11, X11 // 66450f3a63db07
//TODO: PCMPISTRM $7, (BX), X2 // 660f3a621307
//TODO: PCMPISTRM $7, (R11), X2 // 66410f3a621307
//TODO: PCMPISTRM $7, X2, X2 // 660f3a62d207
//TODO: PCMPISTRM $7, X11, X2 // 66410f3a62d307
//TODO: PCMPISTRM $7, (BX), X11 // 66440f3a621b07
//TODO: PCMPISTRM $7, (R11), X11 // 66450f3a621b07
//TODO: PCMPISTRM $7, X2, X11 // 66440f3a62da07
//TODO: PCMPISTRM $7, X11, X11 // 66450f3a62db07
PCMPISTRI $7, (BX), X2 // 660f3a631307
PCMPISTRI $7, (R11), X2 // 66410f3a631307
PCMPISTRI $7, X2, X2 // 660f3a63d207
PCMPISTRI $7, X11, X2 // 66410f3a63d307
PCMPISTRI $7, (BX), X11 // 66440f3a631b07
PCMPISTRI $7, (R11), X11 // 66450f3a631b07
PCMPISTRI $7, X2, X11 // 66440f3a63da07
PCMPISTRI $7, X11, X11 // 66450f3a63db07
PCMPISTRM $7, (BX), X2 // 660f3a621307
PCMPISTRM $7, (R11), X2 // 66410f3a621307
PCMPISTRM $7, X2, X2 // 660f3a62d207
PCMPISTRM $7, X11, X2 // 66410f3a62d307
PCMPISTRM $7, (BX), X11 // 66440f3a621b07
PCMPISTRM $7, (R11), X11 // 66450f3a621b07
PCMPISTRM $7, X2, X11 // 66440f3a62da07
PCMPISTRM $7, X11, X11 // 66450f3a62db07
PDEPL (BX), R9, DX // c4e233f513
PDEPL (R11), R9, DX // c4c233f513
PDEPL DX, R9, DX // c4e233f5d2
@ -3799,22 +3799,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PMADDWL (R11), X11 // 66450ff51b
PMADDWL X2, X11 // 66440ff5da
PMADDWL X11, X11 // 66450ff5db
//TODO: PMAXSB (BX), X2 // 660f383c13
//TODO: PMAXSB (R11), X2 // 66410f383c13
//TODO: PMAXSB X2, X2 // 660f383cd2
//TODO: PMAXSB X11, X2 // 66410f383cd3
//TODO: PMAXSB (BX), X11 // 66440f383c1b
//TODO: PMAXSB (R11), X11 // 66450f383c1b
//TODO: PMAXSB X2, X11 // 66440f383cda
//TODO: PMAXSB X11, X11 // 66450f383cdb
//TODO: PMAXSD (BX), X2 // 660f383d13
//TODO: PMAXSD (R11), X2 // 66410f383d13
//TODO: PMAXSD X2, X2 // 660f383dd2
//TODO: PMAXSD X11, X2 // 66410f383dd3
//TODO: PMAXSD (BX), X11 // 66440f383d1b
//TODO: PMAXSD (R11), X11 // 66450f383d1b
//TODO: PMAXSD X2, X11 // 66440f383dda
//TODO: PMAXSD X11, X11 // 66450f383ddb
PMAXSB (BX), X2 // 660f383c13
PMAXSB (R11), X2 // 66410f383c13
PMAXSB X2, X2 // 660f383cd2
PMAXSB X11, X2 // 66410f383cd3
PMAXSB (BX), X11 // 66440f383c1b
PMAXSB (R11), X11 // 66450f383c1b
PMAXSB X2, X11 // 66440f383cda
PMAXSB X11, X11 // 66450f383cdb
PMAXSD (BX), X2 // 660f383d13
PMAXSD (R11), X2 // 66410f383d13
PMAXSD X2, X2 // 660f383dd2
PMAXSD X11, X2 // 66410f383dd3
PMAXSD (BX), X11 // 66440f383d1b
PMAXSD (R11), X11 // 66450f383d1b
PMAXSD X2, X11 // 66440f383dda
PMAXSD X11, X11 // 66450f383ddb
//TODO: PMAXSW (BX), M2 // 0fee13
//TODO: PMAXSW (R11), M2 // 410fee13
//TODO: PMAXSW M2, M2 // 0feed2
@ -3847,38 +3847,38 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PMAXUB (R11), X11 // 66450fde1b
PMAXUB X2, X11 // 66440fdeda
PMAXUB X11, X11 // 66450fdedb
//TODO: PMAXUD (BX), X2 // 660f383f13
//TODO: PMAXUD (R11), X2 // 66410f383f13
//TODO: PMAXUD X2, X2 // 660f383fd2
//TODO: PMAXUD X11, X2 // 66410f383fd3
//TODO: PMAXUD (BX), X11 // 66440f383f1b
//TODO: PMAXUD (R11), X11 // 66450f383f1b
//TODO: PMAXUD X2, X11 // 66440f383fda
//TODO: PMAXUD X11, X11 // 66450f383fdb
//TODO: PMAXUW (BX), X2 // 660f383e13
//TODO: PMAXUW (R11), X2 // 66410f383e13
//TODO: PMAXUW X2, X2 // 660f383ed2
//TODO: PMAXUW X11, X2 // 66410f383ed3
//TODO: PMAXUW (BX), X11 // 66440f383e1b
//TODO: PMAXUW (R11), X11 // 66450f383e1b
//TODO: PMAXUW X2, X11 // 66440f383eda
//TODO: PMAXUW X11, X11 // 66450f383edb
//TODO: PMINSB (BX), X2 // 660f383813
//TODO: PMINSB (R11), X2 // 66410f383813
//TODO: PMINSB X2, X2 // 660f3838d2
//TODO: PMINSB X11, X2 // 66410f3838d3
//TODO: PMINSB (BX), X11 // 66440f38381b
//TODO: PMINSB (R11), X11 // 66450f38381b
//TODO: PMINSB X2, X11 // 66440f3838da
//TODO: PMINSB X11, X11 // 66450f3838db
//TODO: PMINSD (BX), X2 // 660f383913
//TODO: PMINSD (R11), X2 // 66410f383913
//TODO: PMINSD X2, X2 // 660f3839d2
//TODO: PMINSD X11, X2 // 66410f3839d3
//TODO: PMINSD (BX), X11 // 66440f38391b
//TODO: PMINSD (R11), X11 // 66450f38391b
//TODO: PMINSD X2, X11 // 66440f3839da
//TODO: PMINSD X11, X11 // 66450f3839db
PMAXUD (BX), X2 // 660f383f13
PMAXUD (R11), X2 // 66410f383f13
PMAXUD X2, X2 // 660f383fd2
PMAXUD X11, X2 // 66410f383fd3
PMAXUD (BX), X11 // 66440f383f1b
PMAXUD (R11), X11 // 66450f383f1b
PMAXUD X2, X11 // 66440f383fda
PMAXUD X11, X11 // 66450f383fdb
PMAXUW (BX), X2 // 660f383e13
PMAXUW (R11), X2 // 66410f383e13
PMAXUW X2, X2 // 660f383ed2
PMAXUW X11, X2 // 66410f383ed3
PMAXUW (BX), X11 // 66440f383e1b
PMAXUW (R11), X11 // 66450f383e1b
PMAXUW X2, X11 // 66440f383eda
PMAXUW X11, X11 // 66450f383edb
PMINSB (BX), X2 // 660f383813
PMINSB (R11), X2 // 66410f383813
PMINSB X2, X2 // 660f3838d2
PMINSB X11, X2 // 66410f3838d3
PMINSB (BX), X11 // 66440f38381b
PMINSB (R11), X11 // 66450f38381b
PMINSB X2, X11 // 66440f3838da
PMINSB X11, X11 // 66450f3838db
PMINSD (BX), X2 // 660f383913
PMINSD (R11), X2 // 66410f383913
PMINSD X2, X2 // 660f3839d2
PMINSD X11, X2 // 66410f3839d3
PMINSD (BX), X11 // 66440f38391b
PMINSD (R11), X11 // 66450f38391b
PMINSD X2, X11 // 66440f3839da
PMINSD X11, X11 // 66450f3839db
//TODO: PMINSW (BX), M2 // 0fea13
//TODO: PMINSW (R11), M2 // 410fea13
//TODO: PMINSW M2, M2 // 0fead2
@ -3911,22 +3911,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PMINUB (R11), X11 // 66450fda1b
PMINUB X2, X11 // 66440fdada
PMINUB X11, X11 // 66450fdadb
//TODO: PMINUD (BX), X2 // 660f383b13
//TODO: PMINUD (R11), X2 // 66410f383b13
//TODO: PMINUD X2, X2 // 660f383bd2
//TODO: PMINUD X11, X2 // 66410f383bd3
//TODO: PMINUD (BX), X11 // 66440f383b1b
//TODO: PMINUD (R11), X11 // 66450f383b1b
//TODO: PMINUD X2, X11 // 66440f383bda
//TODO: PMINUD X11, X11 // 66450f383bdb
//TODO: PMINUW (BX), X2 // 660f383a13
//TODO: PMINUW (R11), X2 // 66410f383a13
//TODO: PMINUW X2, X2 // 660f383ad2
//TODO: PMINUW X11, X2 // 66410f383ad3
//TODO: PMINUW (BX), X11 // 66440f383a1b
//TODO: PMINUW (R11), X11 // 66450f383a1b
//TODO: PMINUW X2, X11 // 66440f383ada
//TODO: PMINUW X11, X11 // 66450f383adb
PMINUD (BX), X2 // 660f383b13
PMINUD (R11), X2 // 66410f383b13
PMINUD X2, X2 // 660f383bd2
PMINUD X11, X2 // 66410f383bd3
PMINUD (BX), X11 // 66440f383b1b
PMINUD (R11), X11 // 66450f383b1b
PMINUD X2, X11 // 66440f383bda
PMINUD X11, X11 // 66450f383bdb
PMINUW (BX), X2 // 660f383a13
PMINUW (R11), X2 // 66410f383a13
PMINUW X2, X2 // 660f383ad2
PMINUW X11, X2 // 66410f383ad3
PMINUW (BX), X11 // 66440f383a1b
PMINUW (R11), X11 // 66450f383a1b
PMINUW X2, X11 // 66440f383ada
PMINUW X11, X11 // 66450f383adb
PMOVMSKB M2, DX // 0fd7d2
PMOVMSKB M3, DX // 0fd7d3
PMOVMSKB M2, R11 // 440fd7da
@ -4615,14 +4615,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PSUBW (R11), X11 // 66450ff91b
PSUBW X2, X11 // 66440ff9da
PSUBW X11, X11 // 66450ff9db
//TODO: PTEST (BX), X2 // 660f381713
//TODO: PTEST (R11), X2 // 66410f381713
//TODO: PTEST X2, X2 // 660f3817d2
//TODO: PTEST X11, X2 // 66410f3817d3
//TODO: PTEST (BX), X11 // 66440f38171b
//TODO: PTEST (R11), X11 // 66450f38171b
//TODO: PTEST X2, X11 // 66440f3817da
//TODO: PTEST X11, X11 // 66450f3817db
PTEST (BX), X2 // 660f381713
PTEST (R11), X2 // 66410f381713
PTEST X2, X2 // 660f3817d2
PTEST X11, X2 // 66410f3817d3
PTEST (BX), X11 // 66440f38171b
PTEST (R11), X11 // 66450f38171b
PTEST X2, X11 // 66440f3817da
PTEST X11, X11 // 66450f3817db
PUNPCKHBW (BX), M2 // 0f6813
PUNPCKHBW (R11), M2 // 410f6813
PUNPCKHBW M2, M2 // 0f68d2

View File

@ -116,6 +116,7 @@ const (
AINCW
AINSB
AINSL
AINSERTPS
AINSW
AINT
AINTO
@ -171,6 +172,7 @@ const (
AMOVSB
AMOVSL
AMOVSW
AMPSADBW
AMULB
AMULL
AMULW
@ -530,6 +532,8 @@ const (
AANDPS
ABEXTRL
ABEXTRQ
ABLENDPD
ABLENDPS
ABLSIL
ABLSIQ
ABLSMSKL
@ -570,6 +574,8 @@ const (
ADIVPS
ADIVSD
ADIVSS
ADPPD
ADPPS
AEMMS
AFXRSTOR
AFXRSTOR64
@ -599,6 +605,7 @@ const (
AMOVMSKPD
AMOVMSKPS
AMOVNTO
AMOVNTDQA
AMOVNTPD
AMOVNTPS
AMOVNTQ
@ -618,6 +625,7 @@ const (
AORPS
APACKSSLW
APACKSSWB
APACKUSDW
APACKUSWB
APADDB
APADDL
@ -632,12 +640,17 @@ const (
APANDN
APAVGB
APAVGW
APBLENDW
APCMPEQB
APCMPEQL
APCMPEQQ
APCMPEQW
APCMPGTB
APCMPGTL
APCMPGTQ
APCMPGTW
APCMPISTRI
APCMPISTRM
APDEPL
APDEPQ
APEXTL
@ -659,10 +672,18 @@ const (
APINSRW
APMADDUBSW
APMADDWL
APMAXSB
APMAXSD
APMAXSW
APMAXUB
APMAXUD
APMAXUW
APMINSB
APMINSD
APMINSW
APMINUB
APMINUD
APMINUW
APMOVMSKB
APMOVSXBD
APMOVSXBQ
@ -711,6 +732,7 @@ const (
APSUBUSB
APSUBUSW
APSUBW
APTEST
APUNPCKHBW
APUNPCKHLQ
APUNPCKHQDQ
@ -750,6 +772,7 @@ const (
AXORPD
AXORPS
APCMPESTRI
APCMPESTRM
ARETFW
ARETFL

View File

@ -78,6 +78,7 @@ var Anames = []string{
"INCW",
"INSB",
"INSL",
"INSERTPS",
"INSW",
"INT",
"INTO",
@ -133,6 +134,7 @@ var Anames = []string{
"MOVSB",
"MOVSL",
"MOVSW",
"MPSADBW",
"MULB",
"MULL",
"MULW",
@ -473,6 +475,8 @@ var Anames = []string{
"ANDPS",
"BEXTRL",
"BEXTRQ",
"BLENDPD",
"BLENDPS",
"BLSIL",
"BLSIQ",
"BLSMSKL",
@ -513,6 +517,8 @@ var Anames = []string{
"DIVPS",
"DIVSD",
"DIVSS",
"DPPD",
"DPPS",
"EMMS",
"FXRSTOR",
"FXRSTOR64",
@ -542,6 +548,7 @@ var Anames = []string{
"MOVMSKPD",
"MOVMSKPS",
"MOVNTO",
"MOVNTDQA",
"MOVNTPD",
"MOVNTPS",
"MOVNTQ",
@ -561,6 +568,7 @@ var Anames = []string{
"ORPS",
"PACKSSLW",
"PACKSSWB",
"PACKUSDW",
"PACKUSWB",
"PADDB",
"PADDL",
@ -575,12 +583,17 @@ var Anames = []string{
"PANDN",
"PAVGB",
"PAVGW",
"PBLENDW",
"PCMPEQB",
"PCMPEQL",
"PCMPEQQ",
"PCMPEQW",
"PCMPGTB",
"PCMPGTL",
"PCMPGTQ",
"PCMPGTW",
"PCMPISTRI",
"PCMPISTRM",
"PDEPL",
"PDEPQ",
"PEXTL",
@ -602,10 +615,18 @@ var Anames = []string{
"PINSRW",
"PMADDUBSW",
"PMADDWL",
"PMAXSB",
"PMAXSD",
"PMAXSW",
"PMAXUB",
"PMAXUD",
"PMAXUW",
"PMINSB",
"PMINSD",
"PMINSW",
"PMINUB",
"PMINUD",
"PMINUW",
"PMOVMSKB",
"PMOVSXBD",
"PMOVSXBQ",
@ -654,6 +675,7 @@ var Anames = []string{
"PSUBUSB",
"PSUBUSW",
"PSUBW",
"PTEST",
"PUNPCKHBW",
"PUNPCKHLQ",
"PUNPCKHQDQ",
@ -693,6 +715,7 @@ var Anames = []string{
"XORPD",
"XORPS",
"PCMPESTRI",
"PCMPESTRM",
"RETFW",
"RETFL",
"RETFQ",

View File

@ -733,6 +733,11 @@ var ymshufb = []ytab{
{Yxm, Ynone, Yxr, Zm2_r, 2},
}
// It should never have more than 1 entry,
// because some optab entries you opcode secuences that
// are longer than 2 bytes (zoffset=2 here),
// ROUNDPD and ROUNDPS and recently added BLENDPD,
// to name a few.
var yxshuf = []ytab{
{Yu8, Yxm, Yxr, Zibm_r, 2},
}
@ -1107,6 +1112,8 @@ var optab =
{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
{ADPPD, yxshuf, Pq, [23]uint8{0x3a, 0x41, 0}},
{ADPPS, yxshuf, Pq, [23]uint8{0x3a, 0x40, 0}},
{AEMMS, ynone, Pm, [23]uint8{0x77}},
{AENTER, nil, 0, [23]uint8{}}, /* botch */
{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
@ -1131,6 +1138,7 @@ var optab =
{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
{AINSB, ynone, Pb, [23]uint8{0x6c}},
{AINSL, ynone, Px, [23]uint8{0x6d}},
{AINSERTPS, yxshuf, Pq, [23]uint8{0x3a, 0x21, 0}},
{AINSW, ynone, Pe, [23]uint8{0x6d}},
{AINT, yint, Px, [23]uint8{0xcd}},
{AINTO, ynone, P32, [23]uint8{0xce}},
@ -1217,6 +1225,7 @@ var optab =
{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
{AMOVNTDQA, ylddqu, Pq4, [23]uint8{0x2a}},
{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
@ -1235,6 +1244,7 @@ var optab =
{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
{AMPSADBW, yxshuf, Pq, [23]uint8{0x3a, 0x42, 0}},
{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
{AMULPD, yxm, Pe, [23]uint8{0x59}},
@ -1269,6 +1279,7 @@ var optab =
{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
{APACKUSDW, yxm_q4, Pq4, [23]uint8{0x2b}},
{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
@ -1284,12 +1295,17 @@ var optab =
{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
{APBLENDW, yxshuf, Pq, [23]uint8{0x3a, 0x0e, 0}},
{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
{APCMPEQQ, yxm_q4, Pq4, [23]uint8{0x29}},
{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
{APCMPGTQ, yxm_q4, Pq4, [23]uint8{0x37}},
{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
{APCMPISTRI, yxshuf, Pq, [23]uint8{0x3a, 0x63, 0}},
{APCMPISTRM, yxshuf, Pq, [23]uint8{0x3a, 0x62, 0}},
{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
@ -1307,10 +1323,18 @@ var optab =
{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
{APMAXSB, yxm_q4, Pq4, [23]uint8{0x3c}},
{APMAXSD, yxm_q4, Pq4, [23]uint8{0x3d}},
{APMAXSW, yxm, Pe, [23]uint8{0xee}},
{APMAXUB, yxm, Pe, [23]uint8{0xde}},
{APMAXUD, yxm_q4, Pq4, [23]uint8{0x3f}},
{APMAXUW, yxm_q4, Pq4, [23]uint8{0x3e}},
{APMINSB, yxm_q4, Pq4, [23]uint8{0x38}},
{APMINSD, yxm_q4, Pq4, [23]uint8{0x39}},
{APMINSW, yxm, Pe, [23]uint8{0xea}},
{APMINUB, yxm, Pe, [23]uint8{0xda}},
{APMINUD, yxm_q4, Pq4, [23]uint8{0x3b}},
{APMINUW, yxm_q4, Pq4, [23]uint8{0x3a}},
{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
@ -1370,6 +1394,7 @@ var optab =
{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
{APSUBW, yxm, Pe, [23]uint8{0xf9}},
{APTEST, yxm_q4, Pq4, [23]uint8{0x17}},
{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
@ -1656,6 +1681,7 @@ var optab =
{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
{APCMPESTRM, yxshuf, Pq, [23]uint8{0x3a, 0x60, 0}},
{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
@ -1664,6 +1690,8 @@ var optab =
{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
{ABLENDPD, yxshuf, Pq, [23]uint8{0x3a, 0x0d, 0}},
{ABLENDPS, yxshuf, Pq, [23]uint8{0x3a, 0x0c, 0}},
{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},