Home | History | Annotate | Download | only in amd64
      1 
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <assert.h>
      5 #include <malloc.h>
      6 
      7 typedef  unsigned char           UChar;
      8 typedef  unsigned int            UInt;
      9 typedef  unsigned long int       UWord;
     10 typedef  unsigned long long int  ULong;
     11 
     12 UChar randArray[1027] __attribute__((used));
     13 
     14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
     15 
     16 typedef  union { UChar u8[32];  UInt u32[8];  }  YMM;
     17 
     18 typedef  struct {  YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; }  Block;
     19 
     20 void showYMM ( YMM* vec )
     21 {
     22    int i;
     23    assert(IS_32_ALIGNED(vec));
     24    for (i = 31; i >= 0; i--) {
     25       printf("%02x", (UInt)vec->u8[i]);
     26       if (i > 0 && 0 == ((i+0) & 7)) printf(".");
     27    }
     28 }
     29 
     30 void showBlock ( char* msg, Block* block )
     31 {
     32    printf("  %s\n", msg);
     33    printf("    "); showYMM(&block->a1); printf("\n");
     34    printf("    "); showYMM(&block->a2); printf("\n");
     35    printf("    "); showYMM(&block->a3); printf("\n");
     36    printf("    "); showYMM(&block->a4); printf("\n");
     37    printf("    %016llx\n", block->u64);
     38 }
     39 
     40 UChar randUChar ( void )
     41 {
     42    static UInt seed = 80021;
     43    seed = 1103515245 * seed + 12345;
     44    return (seed >> 17) & 0xFF;
     45 }
     46 
     47 void randBlock ( Block* b )
     48 {
     49    int i;
     50    UChar* p = (UChar*)b;
     51    for (i = 0; i < sizeof(Block); i++)
     52       p[i] = randUChar();
     53 }
     54 
     55 
     56 /* Generate a function test_NAME, that tests the given insn, in both
     57    its mem and reg forms.  The reg form of the insn may mention, as
     58    operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14.  The mem form of
     59    the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
     60    and %r14.  It's OK for the insn to clobber ymm0, as this is needed
     61    for testing PCMPxSTRx. */
     62 
     63 #define GEN_test_RandM(_name, _reg_form, _mem_form)   \
     64     \
     65     __attribute__ ((noinline)) static void test_##_name ( void )   \
     66     { \
     67        Block* b = memalign(32, sizeof(Block)); \
     68        randBlock(b); \
     69        printf("%s(reg)\n", #_name); \
     70        showBlock("before", b); \
     71        __asm__ __volatile__( \
     72           "vmovdqa   0(%0),%%ymm7"  "\n\t" \
     73           "vmovdqa  32(%0),%%ymm8"  "\n\t" \
     74           "vmovdqa  64(%0),%%ymm6"  "\n\t" \
     75           "vmovdqa  96(%0),%%ymm9"  "\n\t" \
     76           "movq    128(%0),%%r14"   "\n\t" \
     77           _reg_form   "\n\t" \
     78           "vmovdqa %%ymm7,  0(%0)"  "\n\t" \
     79           "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
     80           "vmovdqa %%ymm6, 64(%0)"  "\n\t" \
     81           "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
     82           "movq    %%r14, 128(%0)"  "\n\t" \
     83           : /*OUT*/  \
     84           : /*IN*/"r"(b) \
     85           : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
     86        ); \
     87        showBlock("after", b); \
     88        randBlock(b); \
     89        printf("%s(mem)\n", #_name); \
     90        showBlock("before", b); \
     91        __asm__ __volatile__( \
     92           "leaq      0(%0),%%rax"  "\n\t" \
     93           "vmovdqa  32(%0),%%ymm8"  "\n\t" \
     94           "vmovdqa  64(%0),%%ymm7"  "\n\t" \
     95           "vmovdqa  96(%0),%%ymm9"  "\n\t" \
     96           "movq    128(%0),%%r14"   "\n\t" \
     97           _mem_form   "\n\t" \
     98           "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
     99           "vmovdqa %%ymm7, 64(%0)"  "\n\t" \
    100           "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
    101           "movq    %%r14, 128(%0)"  "\n\t" \
    102           : /*OUT*/  \
    103           : /*IN*/"r"(b) \
    104           : /*TRASH*/"xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
    105        ); \
    106        showBlock("after", b); \
    107        printf("\n"); \
    108        free(b); \
    109     }
    110 
    111 #define GEN_test_Ronly(_name, _reg_form) \
    112    GEN_test_RandM(_name, _reg_form, "")
    113 #define GEN_test_Monly(_name, _mem_form) \
    114    GEN_test_RandM(_name, "", _mem_form)
    115 
    116 /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2.  */
    117 
    118 GEN_test_RandM(VPOR_256,
    119                "vpor %%ymm6,  %%ymm8, %%ymm7",
    120                "vpor (%%rax), %%ymm8, %%ymm7")
    121 
    122 GEN_test_RandM(VPXOR_256,
    123                "vpxor %%ymm6,  %%ymm8, %%ymm7",
    124                "vpxor (%%rax), %%ymm8, %%ymm7")
    125 
    126 GEN_test_RandM(VPSUBB_256,
    127                "vpsubb %%ymm6,  %%ymm8, %%ymm7",
    128                "vpsubb (%%rax), %%ymm8, %%ymm7")
    129 
    130 GEN_test_RandM(VPSUBD_256,
    131                "vpsubd %%ymm6,  %%ymm8, %%ymm7",
    132                "vpsubd (%%rax), %%ymm8, %%ymm7")
    133 
    134 GEN_test_RandM(VPADDD_256,
    135                "vpaddd %%ymm6,  %%ymm8, %%ymm7",
    136                "vpaddd (%%rax), %%ymm8, %%ymm7")
    137 
    138 GEN_test_RandM(VPMOVZXWD_256,
    139                "vpmovzxwd %%xmm6,  %%ymm8",
    140                "vpmovzxwd (%%rax), %%ymm8")
    141 
    142 GEN_test_RandM(VPMOVZXBW_256,
    143                "vpmovzxbw %%xmm6,  %%ymm8",
    144                "vpmovzxbw (%%rax), %%ymm8")
    145 
    146 GEN_test_RandM(VPBLENDVB_256,
    147                "vpblendvb %%ymm9, %%ymm6,  %%ymm8, %%ymm7",
    148                "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7")
    149 
    150 GEN_test_RandM(VPMINSD_256,
    151                "vpminsd %%ymm6,  %%ymm8, %%ymm7",
    152                "vpminsd (%%rax), %%ymm8, %%ymm7")
    153 
    154 GEN_test_RandM(VPMAXSD_256,
    155                "vpmaxsd %%ymm6,  %%ymm8, %%ymm7",
    156                "vpmaxsd (%%rax), %%ymm8, %%ymm7")
    157 
    158 GEN_test_RandM(VPSHUFB_256,
    159                "vpshufb %%ymm6,  %%ymm8, %%ymm7",
    160                "vpshufb (%%rax), %%ymm8, %%ymm7")
    161 
    162 GEN_test_RandM(VPUNPCKLBW_256,
    163                "vpunpcklbw %%ymm6,  %%ymm8, %%ymm7",
    164                "vpunpcklbw (%%rax), %%ymm8, %%ymm7")
    165 
    166 GEN_test_RandM(VPUNPCKHBW_256,
    167                "vpunpckhbw %%ymm6,  %%ymm8, %%ymm7",
    168                "vpunpckhbw (%%rax), %%ymm8, %%ymm7")
    169 
    170 GEN_test_RandM(VPABSD_256,
    171                "vpabsd %%ymm6,  %%ymm8",
    172                "vpabsd (%%rax), %%ymm8")
    173 
    174 GEN_test_RandM(VPACKUSWB_256,
    175                "vpackuswb %%ymm9,  %%ymm8, %%ymm7",
    176                "vpackuswb (%%rax), %%ymm8, %%ymm7")
    177 
    178 GEN_test_Ronly(VPMOVMSKB_256,
    179                "vpmovmskb %%ymm8, %%r14")
    180 
    181 GEN_test_RandM(VPAND_256,
    182                "vpand %%ymm9,  %%ymm8, %%ymm7",
    183                "vpand (%%rax), %%ymm8, %%ymm7")
    184 
    185 GEN_test_RandM(VPCMPEQB_256,
    186                "vpcmpeqb %%ymm9,  %%ymm8, %%ymm7",
    187                "vpcmpeqb (%%rax), %%ymm8, %%ymm7")
    188 
    189 GEN_test_RandM(VPSHUFLW_0x39_256,
    190                "vpshuflw $0x39, %%ymm9,  %%ymm7",
    191                "vpshuflw $0xC6, (%%rax), %%ymm8")
    192 
    193 GEN_test_RandM(VPSHUFHW_0x39_256,
    194                "vpshufhw $0x39, %%ymm9,  %%ymm7",
    195                "vpshufhw $0xC6, (%%rax), %%ymm8")
    196 
    197 GEN_test_RandM(VPMULLW_256,
    198                "vpmullw %%ymm9,  %%ymm8, %%ymm7",
    199                "vpmullw (%%rax), %%ymm8, %%ymm7")
    200 
    201 GEN_test_RandM(VPADDUSW_256,
    202                "vpaddusw %%ymm9,  %%ymm8, %%ymm7",
    203                "vpaddusw (%%rax), %%ymm8, %%ymm7")
    204 
    205 GEN_test_RandM(VPMULHUW_256,
    206                "vpmulhuw %%ymm9,  %%ymm8, %%ymm7",
    207                "vpmulhuw (%%rax), %%ymm8, %%ymm7")
    208 
    209 GEN_test_RandM(VPADDUSB_256,
    210                "vpaddusb %%ymm9,  %%ymm8, %%ymm7",
    211                "vpaddusb (%%rax), %%ymm8, %%ymm7")
    212 
    213 GEN_test_RandM(VPUNPCKLWD_256,
    214                "vpunpcklwd %%ymm6,  %%ymm8, %%ymm7",
    215                "vpunpcklwd (%%rax), %%ymm8, %%ymm7")
    216 
    217 GEN_test_RandM(VPUNPCKHWD_256,
    218                "vpunpckhwd %%ymm6,  %%ymm8, %%ymm7",
    219                "vpunpckhwd (%%rax), %%ymm8, %%ymm7")
    220 
    221 GEN_test_Ronly(VPSLLD_0x05_256,
    222                "vpslld $0x5, %%ymm9,  %%ymm7")
    223 
    224 GEN_test_Ronly(VPSRLD_0x05_256,
    225                "vpsrld $0x5, %%ymm9,  %%ymm7")
    226 
    227 GEN_test_Ronly(VPSRAD_0x05_256,
    228                "vpsrad $0x5, %%ymm9,  %%ymm7")
    229 
    230 GEN_test_RandM(VPSUBUSB_256,
    231                "vpsubusb %%ymm9,  %%ymm8, %%ymm7",
    232                "vpsubusb (%%rax), %%ymm8, %%ymm7")
    233 
    234 GEN_test_RandM(VPSUBSB_256,
    235                "vpsubsb %%ymm9,  %%ymm8, %%ymm7",
    236                "vpsubsb (%%rax), %%ymm8, %%ymm7")
    237 
    238 GEN_test_Ronly(VPSRLDQ_0x05_256,
    239                "vpsrldq $0x5, %%ymm9,  %%ymm7")
    240 
    241 GEN_test_Ronly(VPSLLDQ_0x05_256,
    242                "vpslldq $0x5, %%ymm9,  %%ymm7")
    243 
    244 GEN_test_RandM(VPANDN_256,
    245                "vpandn %%ymm9,  %%ymm8, %%ymm7",
    246                "vpandn (%%rax), %%ymm8, %%ymm7")
    247 
    248 GEN_test_RandM(VPUNPCKLQDQ_256,
    249                "vpunpcklqdq %%ymm6,  %%ymm8, %%ymm7",
    250                "vpunpcklqdq (%%rax), %%ymm8, %%ymm7")
    251 
    252 GEN_test_Ronly(VPSRLW_0x05_256,
    253                "vpsrlw $0x5, %%ymm9,  %%ymm7")
    254 
    255 GEN_test_Ronly(VPSLLW_0x05_256,
    256                "vpsllw $0x5, %%ymm9,  %%ymm7")
    257 
    258 GEN_test_RandM(VPADDW_256,
    259                "vpaddw %%ymm6,  %%ymm8, %%ymm7",
    260                "vpaddw (%%rax), %%ymm8, %%ymm7")
    261 
    262 GEN_test_RandM(VPACKSSDW_256,
    263                "vpackssdw %%ymm9,  %%ymm8, %%ymm7",
    264                "vpackssdw (%%rax), %%ymm8, %%ymm7")
    265 
    266 GEN_test_RandM(VPUNPCKLDQ_256,
    267                "vpunpckldq %%ymm6,  %%ymm8, %%ymm7",
    268                "vpunpckldq (%%rax), %%ymm8, %%ymm7")
    269 
    270 GEN_test_RandM(VPCMPEQD_256,
    271                "vpcmpeqd %%ymm6,  %%ymm8, %%ymm7",
    272                "vpcmpeqd (%%rax), %%ymm8, %%ymm7")
    273 
    274 GEN_test_RandM(VPSHUFD_0x39_256,
    275                "vpshufd $0x39, %%ymm9,  %%ymm8",
    276                "vpshufd $0xC6, (%%rax), %%ymm7")
    277 
    278 GEN_test_RandM(VPADDQ_256,
    279                "vpaddq %%ymm6,  %%ymm8, %%ymm7",
    280                "vpaddq (%%rax), %%ymm8, %%ymm7")
    281 
    282 GEN_test_RandM(VPSUBQ_256,
    283                "vpsubq %%ymm6,  %%ymm8, %%ymm7",
    284                "vpsubq (%%rax), %%ymm8, %%ymm7")
    285 
    286 GEN_test_RandM(VPSUBW_256,
    287                "vpsubw %%ymm6,  %%ymm8, %%ymm7",
    288                "vpsubw (%%rax), %%ymm8, %%ymm7")
    289 
    290 GEN_test_RandM(VPCMPEQQ_256,
    291                "vpcmpeqq %%ymm6,  %%ymm8, %%ymm7",
    292                "vpcmpeqq (%%rax), %%ymm8, %%ymm7")
    293 
    294 GEN_test_RandM(VPCMPGTQ_256,
    295                "vpcmpgtq %%ymm6,  %%ymm8, %%ymm7",
    296                "vpcmpgtq (%%rax), %%ymm8, %%ymm7")
    297 
    298 GEN_test_Ronly(VPSRLQ_0x05_256,
    299                "vpsrlq $0x5, %%ymm9,  %%ymm7")
    300 
    301 GEN_test_RandM(VPMULUDQ_256,
    302                "vpmuludq %%ymm6,  %%ymm8, %%ymm7",
    303                "vpmuludq (%%rax), %%ymm8, %%ymm7")
    304 
    305 GEN_test_RandM(VPMULDQ_256,
    306                "vpmuldq %%ymm6,  %%ymm8, %%ymm7",
    307                "vpmuldq (%%rax), %%ymm8, %%ymm7")
    308 
    309 GEN_test_Ronly(VPSLLQ_0x05_256,
    310                "vpsllq $0x5, %%ymm9,  %%ymm7")
    311 
    312 GEN_test_RandM(VPMAXUD_256,
    313                "vpmaxud %%ymm6,  %%ymm8, %%ymm7",
    314                "vpmaxud (%%rax), %%ymm8, %%ymm7")
    315 
    316 GEN_test_RandM(VPMINUD_256,
    317                "vpminud %%ymm6,  %%ymm8, %%ymm7",
    318                "vpminud (%%rax), %%ymm8, %%ymm7")
    319 
    320 GEN_test_RandM(VPMULLD_256,
    321                "vpmulld %%ymm6,  %%ymm8, %%ymm7",
    322                "vpmulld (%%rax), %%ymm8, %%ymm7")
    323 
    324 GEN_test_RandM(VPMAXUW_256,
    325                "vpmaxuw %%ymm6,  %%ymm8, %%ymm7",
    326                "vpmaxuw (%%rax), %%ymm8, %%ymm7")
    327 
    328 GEN_test_RandM(VPMINUW_256,
    329                "vpminuw %%ymm6,  %%ymm8, %%ymm7",
    330                "vpminuw (%%rax), %%ymm8, %%ymm7")
    331 
    332 GEN_test_RandM(VPMAXSW_256,
    333                "vpmaxsw %%ymm6,  %%ymm8, %%ymm7",
    334                "vpmaxsw (%%rax), %%ymm8, %%ymm7")
    335 
    336 GEN_test_RandM(VPMINSW_256,
    337                "vpminsw %%ymm6,  %%ymm8, %%ymm7",
    338                "vpminsw (%%rax), %%ymm8, %%ymm7")
    339 
    340 GEN_test_RandM(VPMAXUB_256,
    341                "vpmaxub %%ymm6,  %%ymm8, %%ymm7",
    342                "vpmaxub (%%rax), %%ymm8, %%ymm7")
    343 
    344 GEN_test_RandM(VPMINUB_256,
    345                "vpminub %%ymm6,  %%ymm8, %%ymm7",
    346                "vpminub (%%rax), %%ymm8, %%ymm7")
    347 
    348 GEN_test_RandM(VPMAXSB_256,
    349                "vpmaxsb %%ymm6,  %%ymm8, %%ymm7",
    350                "vpmaxsb (%%rax), %%ymm8, %%ymm7")
    351 
    352 GEN_test_RandM(VPMINSB_256,
    353                "vpminsb %%ymm6,  %%ymm8, %%ymm7",
    354                "vpminsb (%%rax), %%ymm8, %%ymm7")
    355 
    356 GEN_test_RandM(VPMOVSXBW_256,
    357                "vpmovsxbw %%xmm6,  %%ymm8",
    358                "vpmovsxbw (%%rax), %%ymm8")
    359 
    360 GEN_test_RandM(VPSUBUSW_256,
    361                "vpsubusw %%ymm9,  %%ymm8, %%ymm7",
    362                "vpsubusw (%%rax), %%ymm8, %%ymm7")
    363 
    364 GEN_test_RandM(VPSUBSW_256,
    365                "vpsubsw %%ymm9,  %%ymm8, %%ymm7",
    366                "vpsubsw (%%rax), %%ymm8, %%ymm7")
    367 
    368 GEN_test_RandM(VPCMPEQW_256,
    369                "vpcmpeqw %%ymm6,  %%ymm8, %%ymm7",
    370                "vpcmpeqw (%%rax), %%ymm8, %%ymm7")
    371 
    372 GEN_test_RandM(VPADDB_256,
    373                "vpaddb %%ymm6,  %%ymm8, %%ymm7",
    374                "vpaddb (%%rax), %%ymm8, %%ymm7")
    375 
    376 GEN_test_RandM(VPUNPCKHDQ_256,
    377                "vpunpckhdq %%ymm6,  %%ymm8, %%ymm7",
    378                "vpunpckhdq (%%rax), %%ymm8, %%ymm7")
    379 
    380 GEN_test_RandM(VPMOVSXDQ_256,
    381                "vpmovsxdq %%xmm6,  %%ymm8",
    382                "vpmovsxdq (%%rax), %%ymm8")
    383 
    384 GEN_test_RandM(VPMOVSXWD_256,
    385                "vpmovsxwd %%xmm6,  %%ymm8",
    386                "vpmovsxwd (%%rax), %%ymm8")
    387 
    388 GEN_test_RandM(VPMULHW_256,
    389                "vpmulhw %%ymm9,  %%ymm8, %%ymm7",
    390                "vpmulhw (%%rax), %%ymm8, %%ymm7")
    391 
    392 GEN_test_RandM(VPUNPCKHQDQ_256,
    393                "vpunpckhqdq %%ymm6,  %%ymm8, %%ymm7",
    394                "vpunpckhqdq (%%rax), %%ymm8, %%ymm7")
    395 
    396 GEN_test_Ronly(VPSRAW_0x05_256,
    397                "vpsraw $0x5, %%ymm9,  %%ymm7")
    398 
    399 GEN_test_RandM(VPCMPGTB_256,
    400                "vpcmpgtb %%ymm6,  %%ymm8, %%ymm7",
    401                "vpcmpgtb (%%rax), %%ymm8, %%ymm7")
    402 
    403 GEN_test_RandM(VPCMPGTW_256,
    404                "vpcmpgtw %%ymm6,  %%ymm8, %%ymm7",
    405                "vpcmpgtw (%%rax), %%ymm8, %%ymm7")
    406 
    407 GEN_test_RandM(VPCMPGTD_256,
    408                "vpcmpgtd %%ymm6,  %%ymm8, %%ymm7",
    409                "vpcmpgtd (%%rax), %%ymm8, %%ymm7")
    410 
    411 GEN_test_RandM(VPMOVZXBD_256,
    412                "vpmovzxbd %%xmm6,  %%ymm8",
    413                "vpmovzxbd (%%rax), %%ymm8")
    414 
    415 GEN_test_RandM(VPMOVSXBD_256,
    416                "vpmovsxbd %%xmm6,  %%ymm8",
    417                "vpmovsxbd (%%rax), %%ymm8")
    418 
    419 GEN_test_RandM(VPALIGNR_256_1of3,
    420                "vpalignr $0, %%ymm6,  %%ymm8, %%ymm7",
    421                "vpalignr $3, (%%rax), %%ymm8, %%ymm7")
    422 GEN_test_RandM(VPALIGNR_256_2of3,
    423                "vpalignr $6, %%ymm6,  %%ymm8, %%ymm7",
    424                "vpalignr $9, (%%rax), %%ymm8, %%ymm7")
    425 GEN_test_RandM(VPALIGNR_256_3of3,
    426                "vpalignr $12, %%ymm6,  %%ymm8, %%ymm7",
    427                "vpalignr $15, (%%rax), %%ymm8, %%ymm7")
    428 
    429 GEN_test_RandM(VPBLENDW_256_0x00,
    430                "vpblendw $0x00, %%ymm6,  %%ymm8, %%ymm7",
    431                "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7")
    432 GEN_test_RandM(VPBLENDW_256_0xFE,
    433                "vpblendw $0xFE, %%ymm6,  %%ymm8, %%ymm7",
    434                "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7")
    435 GEN_test_RandM(VPBLENDW_256_0x30,
    436                "vpblendw $0x30, %%ymm6,  %%ymm8, %%ymm7",
    437                "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7")
    438 GEN_test_RandM(VPBLENDW_256_0x21,
    439                "vpblendw $0x21, %%ymm6,  %%ymm8, %%ymm7",
    440                "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7")
    441 GEN_test_RandM(VPBLENDW_256_0xD7,
    442                "vpblendw $0xD7, %%ymm6,  %%ymm8, %%ymm7",
    443                "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7")
    444 GEN_test_RandM(VPBLENDW_256_0xB5,
    445                "vpblendw $0xB5, %%ymm6,  %%ymm8, %%ymm7",
    446                "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7")
    447 GEN_test_RandM(VPBLENDW_256_0x85,
    448                "vpblendw $0x85, %%ymm6,  %%ymm8, %%ymm7",
    449                "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7")
    450 GEN_test_RandM(VPBLENDW_256_0x29,
    451                "vpblendw $0x29, %%ymm6,  %%ymm8, %%ymm7",
    452                "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7")
    453 
    454 GEN_test_RandM(VPSLLW_256,
    455                "andl $15, %%r14d;"
    456                "vmovd %%r14d, %%xmm6;"
    457                "vpsllw %%xmm6,     %%ymm8, %%ymm9",
    458                "andq $15, 128(%%rax);"
    459                "vpsllw 128(%%rax), %%ymm8, %%ymm9")
    460 
    461 GEN_test_RandM(VPSRLW_256,
    462                "andl $15, %%r14d;"
    463                "vmovd %%r14d, %%xmm6;"
    464                "vpsrlw %%xmm6,     %%ymm8, %%ymm9",
    465                "andq $15, 128(%%rax);"
    466                "vpsrlw 128(%%rax), %%ymm8, %%ymm9")
    467 
    468 GEN_test_RandM(VPSRAW_256,
    469                "andl $31, %%r14d;"
    470                "vmovd %%r14d, %%xmm6;"
    471                "vpsraw %%xmm6,     %%ymm8, %%ymm9",
    472                "andq $15, 128(%%rax);"
    473                "vpsraw 128(%%rax), %%ymm8, %%ymm9")
    474 
    475 GEN_test_RandM(VPSLLD_256,
    476                "andl $31, %%r14d;"
    477                "vmovd %%r14d, %%xmm6;"
    478                "vpslld %%xmm6,     %%ymm8, %%ymm9",
    479                "andq $31, 128(%%rax);"
    480                "vpslld 128(%%rax), %%ymm8, %%ymm9")
    481 
    482 GEN_test_RandM(VPSRLD_256,
    483                "andl $31, %%r14d;"
    484                "vmovd %%r14d, %%xmm6;"
    485                "vpsrld %%xmm6,     %%ymm8, %%ymm9",
    486                "andq $31, 128(%%rax);"
    487                "vpsrld 128(%%rax), %%ymm8, %%ymm9")
    488 
    489 GEN_test_RandM(VPSRAD_256,
    490                "andl $31, %%r14d;"
    491                "vmovd %%r14d, %%xmm6;"
    492                "vpsrad %%xmm6,     %%ymm8, %%ymm9",
    493                "andq $31, 128(%%rax);"
    494                "vpsrad 128(%%rax), %%ymm8, %%ymm9")
    495 
    496 GEN_test_RandM(VPSLLQ_256,
    497                "andl $63, %%r14d;"
    498                "vmovd %%r14d, %%xmm6;"
    499                "vpsllq %%xmm6,     %%ymm8, %%ymm9",
    500                "andq $63, 128(%%rax);"
    501                "vpsllq 128(%%rax), %%ymm8, %%ymm9")
    502 
    503 GEN_test_RandM(VPSRLQ_256,
    504                "andl $63, %%r14d;"
    505                "vmovd %%r14d, %%xmm6;"
    506                "vpsrlq %%xmm6,     %%ymm8, %%ymm9",
    507                "andq $63, 128(%%rax);"
    508                "vpsrlq 128(%%rax), %%ymm8, %%ymm9")
    509 
    510 GEN_test_RandM(VPMADDWD_256,
    511                "vpmaddwd %%ymm6,  %%ymm8, %%ymm7",
    512                "vpmaddwd (%%rax), %%ymm8, %%ymm7")
    513 
    514 GEN_test_Monly(VMOVNTDQA_256,
    515                "vmovntdqa (%%rax), %%ymm9")
    516 
    517 GEN_test_RandM(VPACKSSWB_256,
    518                "vpacksswb %%ymm6,  %%ymm8, %%ymm7",
    519                "vpacksswb (%%rax), %%ymm8, %%ymm7")
    520 
    521 GEN_test_RandM(VPAVGB_256,
    522                "vpavgb %%ymm6,  %%ymm8, %%ymm7",
    523                "vpavgb (%%rax), %%ymm8, %%ymm7")
    524 
    525 GEN_test_RandM(VPAVGW_256,
    526                "vpavgw %%ymm6,  %%ymm8, %%ymm7",
    527                "vpavgw (%%rax), %%ymm8, %%ymm7")
    528 
    529 GEN_test_RandM(VPADDSB_256,
    530                "vpaddsb %%ymm6,  %%ymm8, %%ymm7",
    531                "vpaddsb (%%rax), %%ymm8, %%ymm7")
    532 
    533 GEN_test_RandM(VPADDSW_256,
    534                "vpaddsw %%ymm6,  %%ymm8, %%ymm7",
    535                "vpaddsw (%%rax), %%ymm8, %%ymm7")
    536 
    537 GEN_test_RandM(VPHADDW_256,
    538                "vphaddw %%ymm6,  %%ymm8, %%ymm7",
    539                "vphaddw (%%rax), %%ymm8, %%ymm7")
    540 
    541 GEN_test_RandM(VPHADDD_256,
    542                "vphaddd %%ymm6,  %%ymm8, %%ymm7",
    543                "vphaddd (%%rax), %%ymm8, %%ymm7")
    544 
    545 GEN_test_RandM(VPHADDSW_256,
    546                "vphaddsw %%ymm6,  %%ymm8, %%ymm7",
    547                "vphaddsw (%%rax), %%ymm8, %%ymm7")
    548 
    549 GEN_test_RandM(VPMADDUBSW_256,
    550                "vpmaddubsw %%ymm6,  %%ymm8, %%ymm7",
    551                "vpmaddubsw (%%rax), %%ymm8, %%ymm7")
    552 
    553 GEN_test_RandM(VPHSUBW_256,
    554                "vphsubw %%ymm6,  %%ymm8, %%ymm7",
    555                "vphsubw (%%rax), %%ymm8, %%ymm7")
    556 
    557 GEN_test_RandM(VPHSUBD_256,
    558                "vphsubd %%ymm6,  %%ymm8, %%ymm7",
    559                "vphsubd (%%rax), %%ymm8, %%ymm7")
    560 
    561 GEN_test_RandM(VPHSUBSW_256,
    562                "vphsubsw %%ymm6,  %%ymm8, %%ymm7",
    563                "vphsubsw (%%rax), %%ymm8, %%ymm7")
    564 
    565 GEN_test_RandM(VPABSB_256,
    566                "vpabsb %%ymm6,  %%ymm7",
    567                "vpabsb (%%rax), %%ymm7")
    568 
    569 GEN_test_RandM(VPABSW_256,
    570                "vpabsw %%ymm6,  %%ymm7",
    571                "vpabsw (%%rax), %%ymm7")
    572 
    573 GEN_test_RandM(VPMOVSXBQ_256,
    574                "vpmovsxbq %%xmm6,  %%ymm8",
    575                "vpmovsxbq (%%rax), %%ymm8")
    576 
    577 GEN_test_RandM(VPMOVSXWQ_256,
    578                "vpmovsxwq %%xmm6,  %%ymm8",
    579                "vpmovsxwq (%%rax), %%ymm8")
    580 
    581 GEN_test_RandM(VPACKUSDW_256,
    582                "vpackusdw %%ymm6,  %%ymm8, %%ymm7",
    583                "vpackusdw (%%rax), %%ymm8, %%ymm7")
    584 
    585 GEN_test_RandM(VPMOVZXBQ_256,
    586                "vpmovzxbq %%xmm6,  %%ymm8",
    587                "vpmovzxbq (%%rax), %%ymm8")
    588 
    589 GEN_test_RandM(VPMOVZXWQ_256,
    590                "vpmovzxwq %%xmm6,  %%ymm8",
    591                "vpmovzxwq (%%rax), %%ymm8")
    592 
    593 GEN_test_RandM(VPMOVZXDQ_256,
    594                "vpmovzxdq %%xmm6,  %%ymm8",
    595                "vpmovzxdq (%%rax), %%ymm8")
    596 
    597 GEN_test_RandM(VMPSADBW_256_0x0,
    598                "vmpsadbw $0, %%ymm6,  %%ymm8, %%ymm7",
    599                "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7")
    600 GEN_test_RandM(VMPSADBW_256_0x39,
    601                "vmpsadbw $0x39, %%ymm6,  %%ymm8, %%ymm7",
    602                "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7")
    603 GEN_test_RandM(VMPSADBW_256_0x32,
    604                "vmpsadbw $0x32, %%ymm6,  %%ymm8, %%ymm7",
    605                "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7")
    606 GEN_test_RandM(VMPSADBW_256_0x2b,
    607                "vmpsadbw $0x2b, %%ymm6,  %%ymm8, %%ymm7",
    608                "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7")
    609 GEN_test_RandM(VMPSADBW_256_0x24,
    610                "vmpsadbw $0x24, %%ymm6,  %%ymm8, %%ymm7",
    611                "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7")
    612 GEN_test_RandM(VMPSADBW_256_0x1d,
    613                "vmpsadbw $0x1d, %%ymm6,  %%ymm8, %%ymm7",
    614                "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7")
    615 GEN_test_RandM(VMPSADBW_256_0x16,
    616                "vmpsadbw $0x16, %%ymm6,  %%ymm8, %%ymm7",
    617                "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7")
    618 GEN_test_RandM(VMPSADBW_256_0x0f,
    619                "vmpsadbw $0x0f, %%ymm6,  %%ymm8, %%ymm7",
    620                "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7")
    621 
    622 GEN_test_RandM(VPSADBW_256,
    623                "vpsadbw %%ymm6,  %%ymm8, %%ymm7",
    624                "vpsadbw (%%rax), %%ymm8, %%ymm7")
    625 
    626 GEN_test_RandM(VPSIGNB_256,
    627                "vpsignb %%ymm6,  %%ymm8, %%ymm7",
    628                "vpsignb (%%rax), %%ymm8, %%ymm7")
    629 
    630 GEN_test_RandM(VPSIGNW_256,
    631                "vpsignw %%ymm6,  %%ymm8, %%ymm7",
    632                "vpsignw (%%rax), %%ymm8, %%ymm7")
    633 
    634 GEN_test_RandM(VPSIGND_256,
    635                "vpsignd %%ymm6,  %%ymm8, %%ymm7",
    636                "vpsignd (%%rax), %%ymm8, %%ymm7")
    637 
    638 GEN_test_RandM(VPMULHRSW_256,
    639                "vpmulhrsw %%ymm6,  %%ymm8, %%ymm7",
    640                "vpmulhrsw (%%rax), %%ymm8, %%ymm7")
    641 
    642 /* Instructions new in AVX2.  */
    643 
    644 GEN_test_Monly(VBROADCASTI128,
    645                "vbroadcasti128 (%%rax), %%ymm9")
    646 
    647 GEN_test_RandM(VEXTRACTI128_0x0,
    648                "vextracti128 $0x0, %%ymm7, %%xmm9",
    649                "vextracti128 $0x0, %%ymm7, (%%rax)")
    650 
    651 GEN_test_RandM(VEXTRACTI128_0x1,
    652                "vextracti128 $0x1, %%ymm7, %%xmm9",
    653                "vextracti128 $0x1, %%ymm7, (%%rax)")
    654 
    655 GEN_test_RandM(VINSERTI128_0x0,
    656                "vinserti128 $0x0, %%xmm9,  %%ymm7, %%ymm8",
    657                "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8")
    658 
    659 GEN_test_RandM(VINSERTI128_0x1,
    660                "vinserti128 $0x1, %%xmm9,  %%ymm7, %%ymm8",
    661                "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8")
    662 
    663 GEN_test_RandM(VPERM2I128_0x00,
    664                "vperm2i128 $0x00, %%ymm6,  %%ymm8, %%ymm7",
    665                "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7")
    666 GEN_test_RandM(VPERM2I128_0xFF,
    667                "vperm2i128 $0xFF, %%ymm6,  %%ymm8, %%ymm7",
    668                "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7")
    669 GEN_test_RandM(VPERM2I128_0x30,
    670                "vperm2i128 $0x30, %%ymm6,  %%ymm8, %%ymm7",
    671                "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7")
    672 GEN_test_RandM(VPERM2I128_0x21,
    673                "vperm2i128 $0x21, %%ymm6,  %%ymm8, %%ymm7",
    674                "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7")
    675 GEN_test_RandM(VPERM2I128_0x12,
    676                "vperm2i128 $0x12, %%ymm6,  %%ymm8, %%ymm7",
    677                "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7")
    678 GEN_test_RandM(VPERM2I128_0x03,
    679                "vperm2i128 $0x03, %%ymm6,  %%ymm8, %%ymm7",
    680                "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7")
    681 GEN_test_RandM(VPERM2I128_0x85,
    682                "vperm2i128 $0x85, %%ymm6,  %%ymm8, %%ymm7",
    683                "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7")
    684 GEN_test_RandM(VPERM2I128_0x5A,
    685                "vperm2i128 $0x5A, %%ymm6,  %%ymm8, %%ymm7",
    686                "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7")
    687 
    688 GEN_test_Ronly(VBROADCASTSS_128,
    689                "vbroadcastss %%xmm9, %%xmm7")
    690 
    691 GEN_test_Ronly(VBROADCASTSS_256,
    692                "vbroadcastss %%xmm9, %%ymm7")
    693 
    694 GEN_test_Ronly(VBROADCASTSD_256,
    695                "vbroadcastsd %%xmm9, %%ymm7")
    696 
    697 GEN_test_RandM(VPERMD,
    698                "vpermd %%ymm6, %%ymm7, %%ymm9",
    699                "vpermd (%%rax), %%ymm7, %%ymm9")
    700 
    701 GEN_test_RandM(VPERMQ_0x00,
    702                "vpermq $0x00, %%ymm6,  %%ymm7",
    703                "vpermq $0x01, (%%rax), %%ymm7")
    704 GEN_test_RandM(VPERMQ_0xFE,
    705                "vpermq $0xFE, %%ymm6,  %%ymm7",
    706                "vpermq $0xFF, (%%rax), %%ymm7")
    707 GEN_test_RandM(VPERMQ_0x30,
    708                "vpermq $0x30, %%ymm6,  %%ymm7",
    709                "vpermq $0x03, (%%rax), %%ymm7")
    710 GEN_test_RandM(VPERMQ_0x21,
    711                "vpermq $0x21, %%ymm6,  %%ymm7",
    712                "vpermq $0x12, (%%rax), %%ymm7")
    713 GEN_test_RandM(VPERMQ_0xD7,
    714                "vpermq $0xD7, %%ymm6,  %%ymm7",
    715                "vpermq $0x6C, (%%rax), %%ymm7")
    716 GEN_test_RandM(VPERMQ_0xB5,
    717                "vpermq $0xB5, %%ymm6,  %%ymm7",
    718                "vpermq $0x4A, (%%rax), %%ymm7")
    719 GEN_test_RandM(VPERMQ_0x85,
    720                "vpermq $0x85, %%ymm6,  %%ymm7",
    721                "vpermq $0xDC, (%%rax), %%ymm7")
    722 GEN_test_RandM(VPERMQ_0x29,
    723                "vpermq $0x29, %%ymm6,  %%ymm7",
    724                "vpermq $0x92, (%%rax), %%ymm7")
    725 
    726 GEN_test_RandM(VPERMPS,
    727                "vpermps %%ymm6, %%ymm7, %%ymm9",
    728                "vpermps (%%rax), %%ymm7, %%ymm9")
    729 
    730 GEN_test_RandM(VPERMPD_0x00,
    731                "vpermpd $0x00, %%ymm6,  %%ymm7",
    732                "vpermpd $0x01, (%%rax), %%ymm7")
    733 GEN_test_RandM(VPERMPD_0xFE,
    734                "vpermpd $0xFE, %%ymm6,  %%ymm7",
    735                "vpermpd $0xFF, (%%rax), %%ymm7")
    736 GEN_test_RandM(VPERMPD_0x30,
    737                "vpermpd $0x30, %%ymm6,  %%ymm7",
    738                "vpermpd $0x03, (%%rax), %%ymm7")
    739 GEN_test_RandM(VPERMPD_0x21,
    740                "vpermpd $0x21, %%ymm6,  %%ymm7",
    741                "vpermpd $0x12, (%%rax), %%ymm7")
    742 GEN_test_RandM(VPERMPD_0xD7,
    743                "vpermpd $0xD7, %%ymm6,  %%ymm7",
    744                "vpermpd $0x6C, (%%rax), %%ymm7")
    745 GEN_test_RandM(VPERMPD_0xB5,
    746                "vpermpd $0xB5, %%ymm6,  %%ymm7",
    747                "vpermpd $0x4A, (%%rax), %%ymm7")
    748 GEN_test_RandM(VPERMPD_0x85,
    749                "vpermpd $0x85, %%ymm6,  %%ymm7",
    750                "vpermpd $0xDC, (%%rax), %%ymm7")
    751 GEN_test_RandM(VPERMPD_0x29,
    752                "vpermpd $0x29, %%ymm6,  %%ymm7",
    753                "vpermpd $0x92, (%%rax), %%ymm7")
    754 
    755 GEN_test_RandM(VPBLENDD_128_0x00,
    756                "vpblendd $0x00, %%xmm6,  %%xmm8, %%xmm7",
    757                "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7")
    758 GEN_test_RandM(VPBLENDD_128_0x02,
    759                "vpblendd $0x02, %%xmm6,  %%xmm8, %%xmm7",
    760                "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7")
    761 GEN_test_RandM(VPBLENDD_128_0x04,
    762                "vpblendd $0x04, %%xmm6,  %%xmm8, %%xmm7",
    763                "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7")
    764 GEN_test_RandM(VPBLENDD_128_0x06,
    765                "vpblendd $0x06, %%xmm6,  %%xmm8, %%xmm7",
    766                "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7")
    767 GEN_test_RandM(VPBLENDD_128_0x08,
    768                "vpblendd $0x08, %%xmm6,  %%xmm8, %%xmm7",
    769                "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7")
    770 GEN_test_RandM(VPBLENDD_128_0x0A,
    771                "vpblendd $0x0A, %%xmm6,  %%xmm8, %%xmm7",
    772                "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7")
    773 GEN_test_RandM(VPBLENDD_128_0x0C,
    774                "vpblendd $0x0C, %%xmm6,  %%xmm8, %%xmm7",
    775                "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7")
    776 GEN_test_RandM(VPBLENDD_128_0x0E,
    777                "vpblendd $0x0E, %%xmm6,  %%xmm8, %%xmm7",
    778                "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7")
    779 
    780 GEN_test_RandM(VPBLENDD_256_0x00,
    781                "vpblendd $0x00, %%ymm6,  %%ymm8, %%ymm7",
    782                "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7")
    783 GEN_test_RandM(VPBLENDD_256_0xFE,
    784                "vpblendd $0xFE, %%ymm6,  %%ymm8, %%ymm7",
    785                "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7")
    786 GEN_test_RandM(VPBLENDD_256_0x30,
    787                "vpblendd $0x30, %%ymm6,  %%ymm8, %%ymm7",
    788                "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7")
    789 GEN_test_RandM(VPBLENDD_256_0x21,
    790                "vpblendd $0x21, %%ymm6,  %%ymm8, %%ymm7",
    791                "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7")
    792 GEN_test_RandM(VPBLENDD_256_0xD7,
    793                "vpblendd $0xD7, %%ymm6,  %%ymm8, %%ymm7",
    794                "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7")
    795 GEN_test_RandM(VPBLENDD_256_0xB5,
    796                "vpblendd $0xB5, %%ymm6,  %%ymm8, %%ymm7",
    797                "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7")
    798 GEN_test_RandM(VPBLENDD_256_0x85,
    799                "vpblendd $0x85, %%ymm6,  %%ymm8, %%ymm7",
    800                "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7")
    801 GEN_test_RandM(VPBLENDD_256_0x29,
    802                "vpblendd $0x29, %%ymm6,  %%ymm8, %%ymm7",
    803                "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7")
    804 
    805 GEN_test_RandM(VPSLLVD_128,
    806                "vpslld $27, %%xmm6, %%xmm6;"
    807                "vpsrld $27, %%xmm6, %%xmm6;"
    808                "vpsllvd %%xmm6, %%xmm8, %%xmm7",
    809                "andl $31, (%%rax);"
    810                "andl $31, 4(%%rax);"
    811                "andl $31, 8(%%rax);"
    812                "vpsllvd (%%rax), %%xmm8, %%xmm7")
    813 
    814 GEN_test_RandM(VPSLLVD_256,
    815                "vpslld $27, %%ymm6, %%ymm6;"
    816                "vpsrld $27, %%ymm6, %%ymm6;"
    817                "vpsllvd %%ymm6, %%ymm8, %%ymm7",
    818                "andl $31, (%%rax);"
    819                "andl $31, 4(%%rax);"
    820                "andl $31, 8(%%rax);"
    821                "andl $31, 16(%%rax);"
    822                "andl $31, 20(%%rax);"
    823                "andl $31, 24(%%rax);"
    824                "vpsllvd (%%rax), %%ymm8, %%ymm7")
    825 
    826 GEN_test_RandM(VPSLLVQ_128,
    827                "vpsllq $58, %%xmm6, %%xmm6;"
    828                "vpsrlq $58, %%xmm6, %%xmm6;"
    829                "vpsllvq %%xmm6, %%xmm8, %%xmm7",
    830                "andl $63, (%%rax);"
    831                "vpsllvq (%%rax), %%xmm8, %%xmm7")
    832 
    833 GEN_test_RandM(VPSLLVQ_256,
    834                "vpsllq $58, %%ymm6, %%ymm6;"
    835                "vpsrlq $58, %%ymm6, %%ymm6;"
    836                "vpsllvq %%ymm6, %%ymm8, %%ymm7",
    837                "andl $63, (%%rax);"
    838                "andl $63, 8(%%rax);"
    839                "andl $63, 16(%%rax);"
    840                "vpsllvq (%%rax), %%ymm8, %%ymm7")
    841 
    842 GEN_test_RandM(VPSRLVD_128,
    843                "vpslld $27, %%xmm6, %%xmm6;"
    844                "vpsrld $27, %%xmm6, %%xmm6;"
    845                "vpsrlvd %%xmm6, %%xmm8, %%xmm7",
    846                "andl $31, (%%rax);"
    847                "andl $31, 4(%%rax);"
    848                "andl $31, 8(%%rax);"
    849                "vpsrlvd (%%rax), %%xmm8, %%xmm7")
    850 
    851 GEN_test_RandM(VPSRLVD_256,
    852                "vpslld $27, %%ymm6, %%ymm6;"
    853                "vpsrld $27, %%ymm6, %%ymm6;"
    854                "vpsrlvd %%ymm6, %%ymm8, %%ymm7",
    855                "andl $31, (%%rax);"
    856                "andl $31, 4(%%rax);"
    857                "andl $31, 8(%%rax);"
    858                "andl $31, 16(%%rax);"
    859                "andl $31, 20(%%rax);"
    860                "andl $31, 24(%%rax);"
    861                "vpsrlvd (%%rax), %%ymm8, %%ymm7")
    862 
    863 GEN_test_RandM(VPSRLVQ_128,
    864                "vpsllq $58, %%xmm6, %%xmm6;"
    865                "vpsrlq $58, %%xmm6, %%xmm6;"
    866                "vpsrlvq %%xmm6, %%xmm8, %%xmm7",
    867                "andl $63, (%%rax);"
    868                "vpsrlvq (%%rax), %%xmm8, %%xmm7")
    869 
    870 GEN_test_RandM(VPSRLVQ_256,
    871                "vpsllq $58, %%ymm6, %%ymm6;"
    872                "vpsrlq $58, %%ymm6, %%ymm6;"
    873                "vpsrlvq %%ymm6, %%ymm8, %%ymm7",
    874                "andl $63, (%%rax);"
    875                "andl $63, 8(%%rax);"
    876                "andl $63, 16(%%rax);"
    877                "vpsrlvq (%%rax), %%ymm8, %%ymm7")
    878 
    879 GEN_test_RandM(VPSRAVD_128,
    880                "vpslld $27, %%xmm6, %%xmm6;"
    881                "vpsrld $27, %%xmm6, %%xmm6;"
    882                "vpsravd %%xmm6, %%xmm8, %%xmm7",
    883                "andl $31, (%%rax);"
    884                "andl $31, 4(%%rax);"
    885                "andl $31, 8(%%rax);"
    886                "vpsravd (%%rax), %%xmm8, %%xmm7")
    887 
    888 GEN_test_RandM(VPSRAVD_256,
    889                "vpslld $27, %%ymm6, %%ymm6;"
    890                "vpsrld $27, %%ymm6, %%ymm6;"
    891                "vpsravd %%ymm6, %%ymm8, %%ymm7",
    892                "andl $31, (%%rax);"
    893                "andl $31, 4(%%rax);"
    894                "andl $31, 8(%%rax);"
    895                "andl $31, 16(%%rax);"
    896                "andl $31, 20(%%rax);"
    897                "andl $31, 24(%%rax);"
    898                "vpsravd (%%rax), %%ymm8, %%ymm7")
    899 
    900 GEN_test_RandM(VPBROADCASTB_128,
    901                "vpbroadcastb %%xmm9, %%xmm7",
    902                "vpbroadcastb (%%rax), %%xmm7")
    903 
    904 GEN_test_RandM(VPBROADCASTB_256,
    905                "vpbroadcastb %%xmm9, %%ymm7",
    906                "vpbroadcastb (%%rax), %%ymm7")
    907 
    908 GEN_test_RandM(VPBROADCASTW_128,
    909                "vpbroadcastw %%xmm9, %%xmm7",
    910                "vpbroadcastw (%%rax), %%xmm7")
    911 
    912 GEN_test_RandM(VPBROADCASTW_256,
    913                "vpbroadcastw %%xmm9, %%ymm7",
    914                "vpbroadcastw (%%rax), %%ymm7")
    915 
    916 GEN_test_RandM(VPBROADCASTD_128,
    917                "vpbroadcastd %%xmm9, %%xmm7",
    918                "vpbroadcastd (%%rax), %%xmm7")
    919 
    920 GEN_test_RandM(VPBROADCASTD_256,
    921                "vpbroadcastd %%xmm9, %%ymm7",
    922                "vpbroadcastd (%%rax), %%ymm7")
    923 
    924 GEN_test_RandM(VPBROADCASTQ_128,
    925                "vpbroadcastq %%xmm9, %%xmm7",
    926                "vpbroadcastq (%%rax), %%xmm7")
    927 
    928 GEN_test_RandM(VPBROADCASTQ_256,
    929                "vpbroadcastq %%xmm9, %%ymm7",
    930                "vpbroadcastq (%%rax), %%ymm7")
    931 
    932 GEN_test_Monly(VPMASKMOVD_128_LoadForm,
    933                "vpmaskmovd (%%rax), %%xmm8, %%xmm7;"
    934                "vxorps %%xmm6, %%xmm6, %%xmm6;"
    935                "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9")
    936 
    937 GEN_test_Monly(VPMASKMOVD_256_LoadForm,
    938                "vpmaskmovd (%%rax), %%ymm8, %%ymm7;"
    939                "vxorps %%ymm6, %%ymm6, %%ymm6;"
    940                "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9")
    941 
    942 GEN_test_Monly(VPMASKMOVQ_128_LoadForm,
    943                "vpmaskmovq (%%rax), %%xmm8, %%xmm7;"
    944                "vxorpd %%xmm6, %%xmm6, %%xmm6;"
    945                "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9")
    946 
    947 GEN_test_Monly(VPMASKMOVQ_256_LoadForm,
    948                "vpmaskmovq (%%rax), %%ymm8, %%ymm7;"
    949                "vxorpd %%ymm6, %%ymm6, %%ymm6;"
    950                "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9")
    951 
    952 GEN_test_Ronly(VGATHERDPS_128,
    953                "vpslld $25, %%xmm7, %%xmm8;"
    954                "vpsrld $25, %%xmm8, %%xmm8;"
    955                "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
    956                "leaq randArray(%%rip), %%r14;"
    957                "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
    958                "xorl %%r14d, %%r14d")
    959 
    960 GEN_test_Ronly(VGATHERDPS_256,
    961                "vpslld $25, %%ymm7, %%ymm8;"
    962                "vpsrld $25, %%ymm8, %%ymm8;"
    963                "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
    964                "leaq randArray(%%rip), %%r14;"
    965                "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
    966                "xorl %%r14d, %%r14d")
    967 
    968 GEN_test_Ronly(VGATHERQPS_128_1,
    969                "vpsllq $57, %%xmm7, %%xmm8;"
    970                "vpsrlq $57, %%xmm8, %%xmm8;"
    971                "vpmovsxdq %%xmm6, %%xmm9;"
    972                "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
    973                "vmovdqa 96(%0), %%ymm9;"
    974                "leaq randArray(%%rip), %%r14;"
    975                "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
    976                "xorl %%r14d, %%r14d")
    977 
    978 GEN_test_Ronly(VGATHERQPS_256_1,
    979                "vpsllq $57, %%ymm7, %%ymm8;"
    980                "vpsrlq $57, %%ymm8, %%ymm8;"
    981                "vpmovsxdq %%xmm6, %%ymm9;"
    982                "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
    983                "vmovdqa 96(%0), %%ymm9;"
    984                "leaq randArray(%%rip), %%r14;"
    985                "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
    986                "xorl %%r14d, %%r14d")
    987 
    988 GEN_test_Ronly(VGATHERQPS_128_2,
    989                "vpsllq $57, %%xmm7, %%xmm8;"
    990                "vpsrlq $57, %%xmm8, %%xmm8;"
    991                "vpmovsxdq %%xmm6, %%xmm9;"
    992                "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
    993                "vmovdqa 96(%0), %%ymm9;"
    994                "leaq randArray(%%rip), %%r14;"
    995                "vmovq %%r14, %%xmm7;"
    996                "vpsllq $2, %%xmm8, %%xmm8;"
    997                "vpbroadcastq %%xmm7, %%xmm7;"
    998                "vpaddq %%xmm7, %%xmm8, %%xmm8;"
    999                "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;"
   1000                "vpsubq %%xmm7, %%xmm8, %%xmm8;"
   1001                "vmovdqa 0(%0), %%ymm7;"
   1002                "xorl %%r14d, %%r14d")
   1003 
   1004 GEN_test_Ronly(VGATHERQPS_256_2,
   1005                "vpsllq $57, %%ymm7, %%ymm8;"
   1006                "vpsrlq $57, %%ymm8, %%ymm8;"
   1007                "vpmovsxdq %%xmm6, %%ymm9;"
   1008                "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
   1009                "vmovdqa 96(%0), %%ymm9;"
   1010                "leaq randArray(%%rip), %%r14;"
   1011                "vmovq %%r14, %%xmm7;"
   1012                "vpsllq $2, %%ymm8, %%ymm8;"
   1013                "vpbroadcastq %%xmm7, %%ymm7;"
   1014                "vpaddq %%ymm7, %%ymm8, %%ymm8;"
   1015                "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;"
   1016                "vpsubq %%ymm7, %%ymm8, %%ymm8;"
   1017                "vmovdqa 0(%0), %%ymm7;"
   1018                "xorl %%r14d, %%r14d")
   1019 
   1020 GEN_test_Ronly(VGATHERDPD_128,
   1021                "vpslld $26, %%xmm7, %%xmm8;"
   1022                "vpsrld $26, %%xmm8, %%xmm8;"
   1023                "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
   1024                "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
   1025                "vmovdqa 96(%0), %%ymm9;"
   1026                "leaq randArray(%%rip), %%r14;"
   1027                "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
   1028                "xorl %%r14d, %%r14d")
   1029 
   1030 GEN_test_Ronly(VGATHERDPD_256,
   1031                "vpslld $26, %%ymm7, %%ymm8;"
   1032                "vpsrld $26, %%ymm8, %%ymm8;"
   1033                "vextracti128 $1, %%ymm6, %%xmm9;"
   1034                "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
   1035                "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
   1036                "vmovdqa 96(%0), %%ymm9;"
   1037                "leaq randArray(%%rip), %%r14;"
   1038                "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
   1039                "xorl %%r14d, %%r14d")
   1040 
   1041 GEN_test_Ronly(VGATHERQPD_128_1,
   1042                "vpsllq $58, %%xmm7, %%xmm8;"
   1043                "vpsrlq $58, %%xmm8, %%xmm8;"
   1044                "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
   1045                "leaq randArray(%%rip), %%r14;"
   1046                "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
   1047                "xorl %%r14d, %%r14d")
   1048 
   1049 GEN_test_Ronly(VGATHERQPD_256_1,
   1050                "vpsllq $58, %%ymm7, %%ymm8;"
   1051                "vpsrlq $58, %%ymm8, %%ymm8;"
   1052                "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
   1053                "leaq randArray(%%rip), %%r14;"
   1054                "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
   1055                "xorl %%r14d, %%r14d")
   1056 
   1057 GEN_test_Ronly(VGATHERQPD_128_2,
   1058                "vpsllq $58, %%xmm7, %%xmm8;"
   1059                "vpsrlq $58, %%xmm8, %%xmm8;"
   1060                "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
   1061                "leaq randArray(%%rip), %%r14;"
   1062                "vmovq %%r14, %%xmm7;"
   1063                "vpsllq $2, %%xmm8, %%xmm8;"
   1064                "vpbroadcastq %%xmm7, %%xmm7;"
   1065                "vpaddq %%xmm7, %%xmm8, %%xmm8;"
   1066                "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
   1067                "vpsubq %%xmm7, %%xmm8, %%xmm8;"
   1068                "vmovdqa 0(%0), %%ymm7;"
   1069                "xorl %%r14d, %%r14d")
   1070 
   1071 GEN_test_Ronly(VGATHERQPD_256_2,
   1072                "vpsllq $58, %%ymm7, %%ymm8;"
   1073                "vpsrlq $58, %%ymm8, %%ymm8;"
   1074                "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
   1075                "leaq randArray(%%rip), %%r14;"
   1076                "vmovq %%r14, %%xmm7;"
   1077                "vpsllq $2, %%ymm8, %%ymm8;"
   1078                "vpbroadcastq %%xmm7, %%ymm7;"
   1079                "vpaddq %%ymm7, %%ymm8, %%ymm8;"
   1080                "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;"
   1081                "vpsubq %%ymm7, %%ymm8, %%ymm8;"
   1082                "vmovdqa 0(%0), %%ymm7;"
   1083                "xorl %%r14d, %%r14d")
   1084 
   1085 GEN_test_Ronly(VPGATHERDD_128,
   1086                "vpslld $25, %%xmm7, %%xmm8;"
   1087                "vpsrld $25, %%xmm8, %%xmm8;"
   1088                "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
   1089                "leaq randArray(%%rip), %%r14;"
   1090                "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
   1091                "xorl %%r14d, %%r14d")
   1092 
   1093 GEN_test_Ronly(VPGATHERDD_256,
   1094                "vpslld $25, %%ymm7, %%ymm8;"
   1095                "vpsrld $25, %%ymm8, %%ymm8;"
   1096                "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
   1097                "leaq randArray(%%rip), %%r14;"
   1098                "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
   1099                "xorl %%r14d, %%r14d")
   1100 
   1101 GEN_test_Ronly(VPGATHERQD_128_1,
   1102                "vpsllq $57, %%xmm7, %%xmm8;"
   1103                "vpsrlq $57, %%xmm8, %%xmm8;"
   1104                "vpmovsxdq %%xmm6, %%xmm9;"
   1105                "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
   1106                "vmovdqa 96(%0), %%ymm9;"
   1107                "leaq randArray(%%rip), %%r14;"
   1108                "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
   1109                "xorl %%r14d, %%r14d")
   1110 
   1111 GEN_test_Ronly(VPGATHERQD_256_1,
   1112                "vpsllq $57, %%ymm7, %%ymm8;"
   1113                "vpsrlq $57, %%ymm8, %%ymm8;"
   1114                "vpmovsxdq %%xmm6, %%ymm9;"
   1115                "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
   1116                "vmovdqa 96(%0), %%ymm9;"
   1117                "leaq randArray(%%rip), %%r14;"
   1118                "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
   1119                "xorl %%r14d, %%r14d")
   1120 
   1121 GEN_test_Ronly(VPGATHERQD_128_2,
   1122                "vpsllq $57, %%xmm7, %%xmm8;"
   1123                "vpsrlq $57, %%xmm8, %%xmm8;"
   1124                "vpmovsxdq %%xmm6, %%xmm9;"
   1125                "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
   1126                "vmovdqa 96(%0), %%ymm9;"
   1127                "leaq randArray(%%rip), %%r14;"
   1128                "vmovq %%r14, %%xmm7;"
   1129                "vpsllq $2, %%xmm8, %%xmm8;"
   1130                "vpbroadcastq %%xmm7, %%xmm7;"
   1131                "vpaddq %%xmm7, %%xmm8, %%xmm8;"
   1132                "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
   1133                "vpsubq %%xmm7, %%xmm8, %%xmm8;"
   1134                "vmovdqa 0(%0), %%ymm7;"
   1135                "xorl %%r14d, %%r14d")
   1136 
   1137 GEN_test_Ronly(VPGATHERQD_256_2,
   1138                "vpsllq $57, %%ymm7, %%ymm8;"
   1139                "vpsrlq $57, %%ymm8, %%ymm8;"
   1140                "vpmovsxdq %%xmm6, %%ymm9;"
   1141                "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
   1142                "vmovdqa 96(%0), %%ymm9;"
   1143                "leaq randArray(%%rip), %%r14;"
   1144                "vmovq %%r14, %%xmm7;"
   1145                "vpsllq $2, %%ymm8, %%ymm8;"
   1146                "vpbroadcastq %%xmm7, %%ymm7;"
   1147                "vpaddq %%ymm7, %%ymm8, %%ymm8;"
   1148                "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;"
   1149                "vpsubq %%ymm7, %%ymm8, %%ymm8;"
   1150                "vmovdqa 0(%0), %%ymm7;"
   1151                "xorl %%r14d, %%r14d")
   1152 
   1153 GEN_test_Ronly(VPGATHERDQ_128,
   1154                "vpslld $26, %%xmm7, %%xmm8;"
   1155                "vpsrld $26, %%xmm8, %%xmm8;"
   1156                "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
   1157                "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
   1158                "vmovdqa 96(%0), %%ymm9;"
   1159                "leaq randArray(%%rip), %%r14;"
   1160                "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
   1161                "xorl %%r14d, %%r14d")
   1162 
   1163 GEN_test_Ronly(VPGATHERDQ_256,
   1164                "vpslld $26, %%ymm7, %%ymm8;"
   1165                "vpsrld $26, %%ymm8, %%ymm8;"
   1166                "vextracti128 $1, %%ymm6, %%xmm9;"
   1167                "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
   1168                "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
   1169                "vmovdqa 96(%0), %%ymm9;"
   1170                "leaq randArray(%%rip), %%r14;"
   1171                "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
   1172                "xorl %%r14d, %%r14d")
   1173 
   1174 GEN_test_Ronly(VPGATHERQQ_128_1,
   1175                "vpsllq $58, %%xmm7, %%xmm8;"
   1176                "vpsrlq $58, %%xmm8, %%xmm8;"
   1177                "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
   1178                "leaq randArray(%%rip), %%r14;"
   1179                "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
   1180                "xorl %%r14d, %%r14d")
   1181 
   1182 GEN_test_Ronly(VPGATHERQQ_256_1,
   1183                "vpsllq $58, %%ymm7, %%ymm8;"
   1184                "vpsrlq $58, %%ymm8, %%ymm8;"
   1185                "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
   1186                "leaq randArray(%%rip), %%r14;"
   1187                "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
   1188                "xorl %%r14d, %%r14d")
   1189 
   1190 GEN_test_Ronly(VPGATHERQQ_128_2,
   1191                "vpsllq $58, %%xmm7, %%xmm8;"
   1192                "vpsrlq $58, %%xmm8, %%xmm8;"
   1193                "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
   1194                "leaq randArray(%%rip), %%r14;"
   1195                "vmovq %%r14, %%xmm7;"
   1196                "vpsllq $2, %%xmm8, %%xmm8;"
   1197                "vpbroadcastq %%xmm7, %%xmm7;"
   1198                "vpaddq %%xmm7, %%xmm8, %%xmm8;"
   1199                "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;"
   1200                "vpsubq %%xmm7, %%xmm8, %%xmm8;"
   1201                "vmovdqa 0(%0), %%ymm7;"
   1202                "xorl %%r14d, %%r14d")
   1203 
   1204 GEN_test_Ronly(VPGATHERQQ_256_2,
   1205                "vpsllq $58, %%ymm7, %%ymm8;"
   1206                "vpsrlq $58, %%ymm8, %%ymm8;"
   1207                "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
   1208                "leaq randArray(%%rip), %%r14;"
   1209                "vmovq %%r14, %%xmm7;"
   1210                "vpsllq $2, %%ymm8, %%ymm8;"
   1211                "vpbroadcastq %%xmm7, %%ymm7;"
   1212                "vpaddq %%ymm7, %%ymm8, %%ymm8;"
   1213                "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;"
   1214                "vpsubq %%ymm7, %%ymm8, %%ymm8;"
   1215                "vmovdqa 0(%0), %%ymm7;"
   1216                "xorl %%r14d, %%r14d")
   1217 
   1218 /* Comment duplicated above, for convenient reference:
   1219    Allowed operands in test insns:
   1220      Reg form:  %ymm6,  %ymm7, %ymm8, %ymm9 and %r14.
   1221      Mem form:  (%rax), %ymm7, %ymm8, %ymm9 and %r14.
   1222    Imm8 etc fields are also allowed, where they make sense.
   1223 */
   1224 
   1225 #define N_DEFAULT_ITERS 3
   1226 
   1227 // Do the specified test some number of times
   1228 #define DO_N(_iters, _testfn) \
   1229    do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)
   1230 
   1231 // Do the specified test the default number of times
   1232 #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)
   1233 
   1234 
   1235 int main ( void )
   1236 {
   1237    DO_D( VPOR_256 );
   1238    DO_D( VPXOR_256 );
   1239    DO_D( VPSUBB_256 );
   1240    DO_D( VPSUBD_256 );
   1241    DO_D( VPADDD_256 );
   1242    DO_D( VPMOVZXWD_256 );
   1243    DO_D( VPMOVZXBW_256 );
   1244    DO_D( VPBLENDVB_256 );
   1245    DO_D( VPMINSD_256 );
   1246    DO_D( VPMAXSD_256 );
   1247    DO_D( VPSHUFB_256 );
   1248    DO_D( VPUNPCKLBW_256 );
   1249    DO_D( VPUNPCKHBW_256 );
   1250    DO_D( VPABSD_256 );
   1251    DO_D( VPACKUSWB_256 );
   1252    DO_D( VPMOVMSKB_256 );
   1253    DO_D( VPAND_256 );
   1254    DO_D( VPCMPEQB_256 );
   1255    DO_D( VPSHUFLW_0x39_256 );
   1256    DO_D( VPSHUFHW_0x39_256 );
   1257    DO_D( VPMULLW_256 );
   1258    DO_D( VPADDUSW_256 );
   1259    DO_D( VPMULHUW_256 );
   1260    DO_D( VPADDUSB_256 );
   1261    DO_D( VPUNPCKLWD_256 );
   1262    DO_D( VPUNPCKHWD_256 );
   1263    DO_D( VPSLLD_0x05_256 );
   1264    DO_D( VPSRLD_0x05_256 );
   1265    DO_D( VPSRAD_0x05_256 );
   1266    DO_D( VPSUBUSB_256 );
   1267    DO_D( VPSUBSB_256 );
   1268    DO_D( VPSRLDQ_0x05_256 );
   1269    DO_D( VPSLLDQ_0x05_256 );
   1270    DO_D( VPANDN_256 );
   1271    DO_D( VPUNPCKLQDQ_256 );
   1272    DO_D( VPSRLW_0x05_256 );
   1273    DO_D( VPSLLW_0x05_256 );
   1274    DO_D( VPADDW_256 );
   1275    DO_D( VPACKSSDW_256 );
   1276    DO_D( VPUNPCKLDQ_256 );
   1277    DO_D( VPCMPEQD_256 );
   1278    DO_D( VPSHUFD_0x39_256 );
   1279    DO_D( VPADDQ_256 );
   1280    DO_D( VPSUBQ_256 );
   1281    DO_D( VPSUBW_256 );
   1282    DO_D( VPCMPEQQ_256 );
   1283    DO_D( VPCMPGTQ_256 );
   1284    DO_D( VPSRLQ_0x05_256 );
   1285    DO_D( VPMULUDQ_256 );
   1286    DO_D( VPMULDQ_256 );
   1287    DO_D( VPSLLQ_0x05_256 );
   1288    DO_D( VPMAXUD_256 );
   1289    DO_D( VPMINUD_256 );
   1290    DO_D( VPMULLD_256 );
   1291    DO_D( VPMAXUW_256 );
   1292    DO_D( VPMINUW_256 );
   1293    DO_D( VPMAXSW_256 );
   1294    DO_D( VPMINSW_256 );
   1295    DO_D( VPMAXUB_256 );
   1296    DO_D( VPMINUB_256 );
   1297    DO_D( VPMAXSB_256 );
   1298    DO_D( VPMINSB_256 );
   1299    DO_D( VPMOVSXBW_256 );
   1300    DO_D( VPSUBUSW_256 );
   1301    DO_D( VPSUBSW_256 );
   1302    DO_D( VPCMPEQW_256 );
   1303    DO_D( VPADDB_256 );
   1304    DO_D( VPUNPCKHDQ_256 );
   1305    DO_D( VPMOVSXDQ_256 );
   1306    DO_D( VPMOVSXWD_256 );
   1307    DO_D( VPMULHW_256 );
   1308    DO_D( VPUNPCKHQDQ_256 );
   1309    DO_D( VPSRAW_0x05_256 );
   1310    DO_D( VPCMPGTB_256 );
   1311    DO_D( VPCMPGTW_256 );
   1312    DO_D( VPCMPGTD_256 );
   1313    DO_D( VPMOVZXBD_256 );
   1314    DO_D( VPMOVSXBD_256 );
   1315    DO_D( VPALIGNR_256_1of3 );
   1316    DO_D( VPALIGNR_256_2of3 );
   1317    DO_D( VPALIGNR_256_3of3 );
   1318    DO_D( VPBLENDW_256_0x00 );
   1319    DO_D( VPBLENDW_256_0xFE );
   1320    DO_D( VPBLENDW_256_0x30 );
   1321    DO_D( VPBLENDW_256_0x21 );
   1322    DO_D( VPBLENDW_256_0xD7 );
   1323    DO_D( VPBLENDW_256_0xB5 );
   1324    DO_D( VPBLENDW_256_0x85 );
   1325    DO_D( VPBLENDW_256_0x29 );
   1326    DO_D( VPSLLW_256 );
   1327    DO_D( VPSRLW_256 );
   1328    DO_D( VPSRAW_256 );
   1329    DO_D( VPSLLD_256 );
   1330    DO_D( VPSRLD_256 );
   1331    DO_D( VPSRAD_256 );
   1332    DO_D( VPSLLQ_256 );
   1333    DO_D( VPSRLQ_256 );
   1334    DO_D( VPMADDWD_256 );
   1335    DO_D( VMOVNTDQA_256 );
   1336    DO_D( VPACKSSWB_256 );
   1337    DO_D( VPAVGB_256 );
   1338    DO_D( VPAVGW_256 );
   1339    DO_D( VPADDSB_256 );
   1340    DO_D( VPADDSW_256 );
   1341    DO_D( VPHADDW_256 );
   1342    DO_D( VPHADDD_256 );
   1343    DO_D( VPHADDSW_256 );
   1344    DO_D( VPMADDUBSW_256 );
   1345    DO_D( VPHSUBW_256 );
   1346    DO_D( VPHSUBD_256 );
   1347    DO_D( VPHSUBSW_256 );
   1348    DO_D( VPABSB_256 );
   1349    DO_D( VPABSW_256 );
   1350    DO_D( VPMOVSXBQ_256 );
   1351    DO_D( VPMOVSXWQ_256 );
   1352    DO_D( VPACKUSDW_256 );
   1353    DO_D( VPMOVZXBQ_256 );
   1354    DO_D( VPMOVZXWQ_256 );
   1355    DO_D( VPMOVZXDQ_256 );
   1356    DO_D( VMPSADBW_256_0x0 );
   1357    DO_D( VMPSADBW_256_0x39 );
   1358    DO_D( VMPSADBW_256_0x32 );
   1359    DO_D( VMPSADBW_256_0x2b );
   1360    DO_D( VMPSADBW_256_0x24 );
   1361    DO_D( VMPSADBW_256_0x1d );
   1362    DO_D( VMPSADBW_256_0x16 );
   1363    DO_D( VMPSADBW_256_0x0f );
   1364    DO_D( VPSADBW_256 );
   1365    DO_D( VPSIGNB_256 );
   1366    DO_D( VPSIGNW_256 );
   1367    DO_D( VPSIGND_256 );
   1368    DO_D( VPMULHRSW_256 );
   1369    DO_D( VBROADCASTI128 );
   1370    DO_D( VEXTRACTI128_0x0 );
   1371    DO_D( VEXTRACTI128_0x1 );
   1372    DO_D( VINSERTI128_0x0 );
   1373    DO_D( VINSERTI128_0x1 );
   1374    DO_D( VPERM2I128_0x00 );
   1375    DO_D( VPERM2I128_0xFF );
   1376    DO_D( VPERM2I128_0x30 );
   1377    DO_D( VPERM2I128_0x21 );
   1378    DO_D( VPERM2I128_0x12 );
   1379    DO_D( VPERM2I128_0x03 );
   1380    DO_D( VPERM2I128_0x85 );
   1381    DO_D( VPERM2I128_0x5A );
   1382    DO_D( VBROADCASTSS_128 );
   1383    DO_D( VBROADCASTSS_256 );
   1384    DO_D( VBROADCASTSD_256 );
   1385    DO_D( VPERMD );
   1386    DO_D( VPERMQ_0x00 );
   1387    DO_D( VPERMQ_0xFE );
   1388    DO_D( VPERMQ_0x30 );
   1389    DO_D( VPERMQ_0x21 );
   1390    DO_D( VPERMQ_0xD7 );
   1391    DO_D( VPERMQ_0xB5 );
   1392    DO_D( VPERMQ_0x85 );
   1393    DO_D( VPERMQ_0x29 );
   1394    DO_D( VPERMPS );
   1395    DO_D( VPERMPD_0x00 );
   1396    DO_D( VPERMPD_0xFE );
   1397    DO_D( VPERMPD_0x30 );
   1398    DO_D( VPERMPD_0x21 );
   1399    DO_D( VPERMPD_0xD7 );
   1400    DO_D( VPERMPD_0xB5 );
   1401    DO_D( VPERMPD_0x85 );
   1402    DO_D( VPERMPD_0x29 );
   1403    DO_D( VPBLENDD_128_0x00 );
   1404    DO_D( VPBLENDD_128_0x02 );
   1405    DO_D( VPBLENDD_128_0x04 );
   1406    DO_D( VPBLENDD_128_0x06 );
   1407    DO_D( VPBLENDD_128_0x08 );
   1408    DO_D( VPBLENDD_128_0x0A );
   1409    DO_D( VPBLENDD_128_0x0C );
   1410    DO_D( VPBLENDD_128_0x0E );
   1411    DO_D( VPBLENDD_256_0x00 );
   1412    DO_D( VPBLENDD_256_0xFE );
   1413    DO_D( VPBLENDD_256_0x30 );
   1414    DO_D( VPBLENDD_256_0x21 );
   1415    DO_D( VPBLENDD_256_0xD7 );
   1416    DO_D( VPBLENDD_256_0xB5 );
   1417    DO_D( VPBLENDD_256_0x85 );
   1418    DO_D( VPBLENDD_256_0x29 );
   1419    DO_D( VPSLLVD_128 );
   1420    DO_D( VPSLLVD_256 );
   1421    DO_D( VPSLLVQ_128 );
   1422    DO_D( VPSLLVQ_256 );
   1423    DO_D( VPSRLVD_128 );
   1424    DO_D( VPSRLVD_256 );
   1425    DO_D( VPSRLVQ_128 );
   1426    DO_D( VPSRLVQ_256 );
   1427    DO_D( VPSRAVD_128 );
   1428    DO_D( VPSRAVD_256 );
   1429    DO_D( VPBROADCASTB_128 );
   1430    DO_D( VPBROADCASTB_256 );
   1431    DO_D( VPBROADCASTW_128 );
   1432    DO_D( VPBROADCASTW_256 );
   1433    DO_D( VPBROADCASTD_128 );
   1434    DO_D( VPBROADCASTD_256 );
   1435    DO_D( VPBROADCASTQ_128 );
   1436    DO_D( VPBROADCASTQ_256 );
   1437    DO_D( VPMASKMOVD_128_LoadForm );
   1438    DO_D( VPMASKMOVD_256_LoadForm );
   1439    DO_D( VPMASKMOVQ_128_LoadForm );
   1440    DO_D( VPMASKMOVQ_256_LoadForm );
   1441    { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); }
   1442    DO_D( VGATHERDPS_128 );
   1443    DO_D( VGATHERDPS_256 );
   1444    DO_D( VGATHERQPS_128_1 );
   1445    DO_D( VGATHERQPS_256_1 );
   1446    DO_D( VGATHERQPS_128_2 );
   1447    DO_D( VGATHERQPS_256_2 );
   1448    DO_D( VGATHERDPD_128 );
   1449    DO_D( VGATHERDPD_256 );
   1450    DO_D( VGATHERQPD_128_1 );
   1451    DO_D( VGATHERQPD_256_1 );
   1452    DO_D( VGATHERQPD_128_2 );
   1453    DO_D( VGATHERQPD_256_2 );
   1454    DO_D( VPGATHERDD_128 );
   1455    DO_D( VPGATHERDD_256 );
   1456    DO_D( VPGATHERQD_128_1 );
   1457    DO_D( VPGATHERQD_256_1 );
   1458    DO_D( VPGATHERQD_128_2 );
   1459    DO_D( VPGATHERQD_256_2 );
   1460    DO_D( VPGATHERDQ_128 );
   1461    DO_D( VPGATHERDQ_256 );
   1462    DO_D( VPGATHERQQ_128_1 );
   1463    DO_D( VPGATHERQQ_256_1 );
   1464    DO_D( VPGATHERQQ_128_2 );
   1465    DO_D( VPGATHERQQ_256_2 );
   1466    return 0;
   1467 }
   1468