1 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <assert.h> 5 #include <malloc.h> 6 7 typedef unsigned char UChar; 8 typedef unsigned int UInt; 9 typedef unsigned long int UWord; 10 typedef unsigned long long int ULong; 11 12 UChar randArray[1027] __attribute__((used)); 13 14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) 15 16 typedef union { UChar u8[32]; UInt u32[8]; } YMM; 17 18 typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; 19 20 void showYMM ( YMM* vec ) 21 { 22 int i; 23 assert(IS_32_ALIGNED(vec)); 24 for (i = 31; i >= 0; i--) { 25 printf("%02x", (UInt)vec->u8[i]); 26 if (i > 0 && 0 == ((i+0) & 7)) printf("."); 27 } 28 } 29 30 void showBlock ( char* msg, Block* block ) 31 { 32 printf(" %s\n", msg); 33 printf(" "); showYMM(&block->a1); printf("\n"); 34 printf(" "); showYMM(&block->a2); printf("\n"); 35 printf(" "); showYMM(&block->a3); printf("\n"); 36 printf(" "); showYMM(&block->a4); printf("\n"); 37 printf(" %016llx\n", block->u64); 38 } 39 40 UChar randUChar ( void ) 41 { 42 static UInt seed = 80021; 43 seed = 1103515245 * seed + 12345; 44 return (seed >> 17) & 0xFF; 45 } 46 47 void randBlock ( Block* b ) 48 { 49 int i; 50 UChar* p = (UChar*)b; 51 for (i = 0; i < sizeof(Block); i++) 52 p[i] = randUChar(); 53 } 54 55 56 /* Generate a function test_NAME, that tests the given insn, in both 57 its mem and reg forms. The reg form of the insn may mention, as 58 operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of 59 the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 60 and %r14. It's OK for the insn to clobber ymm0, as this is needed 61 for testing PCMPxSTRx. */ 62 63 #define GEN_test_RandM(_name, _reg_form, _mem_form) \ 64 \ 65 __attribute__ ((noinline)) static void test_##_name ( void ) \ 66 { \ 67 Block* b = memalign(32, sizeof(Block)); \ 68 randBlock(b); \ 69 printf("%s(reg)\n", #_name); \ 70 showBlock("before", b); \ 71 __asm__ __volatile__( \ 72 "vmovdqa 0(%0),%%ymm7" "\n\t" \ 73 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 74 "vmovdqa 64(%0),%%ymm6" "\n\t" \ 75 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 76 "movq 128(%0),%%r14" "\n\t" \ 77 _reg_form "\n\t" \ 78 "vmovdqa %%ymm7, 0(%0)" "\n\t" \ 79 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 80 "vmovdqa %%ymm6, 64(%0)" "\n\t" \ 81 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 82 "movq %%r14, 128(%0)" "\n\t" \ 83 : /*OUT*/ \ 84 : /*IN*/"r"(b) \ 85 : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ 86 ); \ 87 showBlock("after", b); \ 88 randBlock(b); \ 89 printf("%s(mem)\n", #_name); \ 90 showBlock("before", b); \ 91 __asm__ __volatile__( \ 92 "leaq 0(%0),%%rax" "\n\t" \ 93 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 94 "vmovdqa 64(%0),%%ymm7" "\n\t" \ 95 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 96 "movq 128(%0),%%r14" "\n\t" \ 97 _mem_form "\n\t" \ 98 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 99 "vmovdqa %%ymm7, 64(%0)" "\n\t" \ 100 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 101 "movq %%r14, 128(%0)" "\n\t" \ 102 : /*OUT*/ \ 103 : /*IN*/"r"(b) \ 104 : /*TRASH*/"xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ 105 ); \ 106 showBlock("after", b); \ 107 printf("\n"); \ 108 free(b); \ 109 } 110 111 #define GEN_test_Ronly(_name, _reg_form) \ 112 GEN_test_RandM(_name, _reg_form, "") 113 #define GEN_test_Monly(_name, _mem_form) \ 114 GEN_test_RandM(_name, "", _mem_form) 115 116 /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */ 117 118 GEN_test_RandM(VPOR_256, 119 "vpor %%ymm6, %%ymm8, %%ymm7", 120 "vpor (%%rax), %%ymm8, %%ymm7") 121 122 GEN_test_RandM(VPXOR_256, 123 "vpxor %%ymm6, %%ymm8, %%ymm7", 124 "vpxor (%%rax), %%ymm8, %%ymm7") 125 126 GEN_test_RandM(VPSUBB_256, 127 "vpsubb %%ymm6, %%ymm8, %%ymm7", 128 "vpsubb (%%rax), %%ymm8, %%ymm7") 129 130 GEN_test_RandM(VPSUBD_256, 131 "vpsubd %%ymm6, %%ymm8, %%ymm7", 132 "vpsubd (%%rax), %%ymm8, %%ymm7") 133 134 GEN_test_RandM(VPADDD_256, 135 "vpaddd %%ymm6, %%ymm8, %%ymm7", 136 "vpaddd (%%rax), %%ymm8, %%ymm7") 137 138 GEN_test_RandM(VPMOVZXWD_256, 139 "vpmovzxwd %%xmm6, %%ymm8", 140 "vpmovzxwd (%%rax), %%ymm8") 141 142 GEN_test_RandM(VPMOVZXBW_256, 143 "vpmovzxbw %%xmm6, %%ymm8", 144 "vpmovzxbw (%%rax), %%ymm8") 145 146 GEN_test_RandM(VPBLENDVB_256, 147 "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7", 148 "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7") 149 150 GEN_test_RandM(VPMINSD_256, 151 "vpminsd %%ymm6, %%ymm8, %%ymm7", 152 "vpminsd (%%rax), %%ymm8, %%ymm7") 153 154 GEN_test_RandM(VPMAXSD_256, 155 "vpmaxsd %%ymm6, %%ymm8, %%ymm7", 156 "vpmaxsd (%%rax), %%ymm8, %%ymm7") 157 158 GEN_test_RandM(VPSHUFB_256, 159 "vpshufb %%ymm6, %%ymm8, %%ymm7", 160 "vpshufb (%%rax), %%ymm8, %%ymm7") 161 162 GEN_test_RandM(VPUNPCKLBW_256, 163 "vpunpcklbw %%ymm6, %%ymm8, %%ymm7", 164 "vpunpcklbw (%%rax), %%ymm8, %%ymm7") 165 166 GEN_test_RandM(VPUNPCKHBW_256, 167 "vpunpckhbw %%ymm6, %%ymm8, %%ymm7", 168 "vpunpckhbw (%%rax), %%ymm8, %%ymm7") 169 170 GEN_test_RandM(VPABSD_256, 171 "vpabsd %%ymm6, %%ymm8", 172 "vpabsd (%%rax), %%ymm8") 173 174 GEN_test_RandM(VPACKUSWB_256, 175 "vpackuswb %%ymm9, %%ymm8, %%ymm7", 176 "vpackuswb (%%rax), %%ymm8, %%ymm7") 177 178 GEN_test_Ronly(VPMOVMSKB_256, 179 "vpmovmskb %%ymm8, %%r14") 180 181 GEN_test_RandM(VPAND_256, 182 "vpand %%ymm9, %%ymm8, %%ymm7", 183 "vpand (%%rax), %%ymm8, %%ymm7") 184 185 GEN_test_RandM(VPCMPEQB_256, 186 "vpcmpeqb %%ymm9, %%ymm8, %%ymm7", 187 "vpcmpeqb (%%rax), %%ymm8, %%ymm7") 188 189 GEN_test_RandM(VPSHUFLW_0x39_256, 190 "vpshuflw $0x39, %%ymm9, %%ymm7", 191 "vpshuflw $0xC6, (%%rax), %%ymm8") 192 193 GEN_test_RandM(VPSHUFHW_0x39_256, 194 "vpshufhw $0x39, %%ymm9, %%ymm7", 195 "vpshufhw $0xC6, (%%rax), %%ymm8") 196 197 GEN_test_RandM(VPMULLW_256, 198 "vpmullw %%ymm9, %%ymm8, %%ymm7", 199 "vpmullw (%%rax), %%ymm8, %%ymm7") 200 201 GEN_test_RandM(VPADDUSW_256, 202 "vpaddusw %%ymm9, %%ymm8, %%ymm7", 203 "vpaddusw (%%rax), %%ymm8, %%ymm7") 204 205 GEN_test_RandM(VPMULHUW_256, 206 "vpmulhuw %%ymm9, %%ymm8, %%ymm7", 207 "vpmulhuw (%%rax), %%ymm8, %%ymm7") 208 209 GEN_test_RandM(VPADDUSB_256, 210 "vpaddusb %%ymm9, %%ymm8, %%ymm7", 211 "vpaddusb (%%rax), %%ymm8, %%ymm7") 212 213 GEN_test_RandM(VPUNPCKLWD_256, 214 "vpunpcklwd %%ymm6, %%ymm8, %%ymm7", 215 "vpunpcklwd (%%rax), %%ymm8, %%ymm7") 216 217 GEN_test_RandM(VPUNPCKHWD_256, 218 "vpunpckhwd %%ymm6, %%ymm8, %%ymm7", 219 "vpunpckhwd (%%rax), %%ymm8, %%ymm7") 220 221 GEN_test_Ronly(VPSLLD_0x05_256, 222 "vpslld $0x5, %%ymm9, %%ymm7") 223 224 GEN_test_Ronly(VPSRLD_0x05_256, 225 "vpsrld $0x5, %%ymm9, %%ymm7") 226 227 GEN_test_Ronly(VPSRAD_0x05_256, 228 "vpsrad $0x5, %%ymm9, %%ymm7") 229 230 GEN_test_RandM(VPSUBUSB_256, 231 "vpsubusb %%ymm9, %%ymm8, %%ymm7", 232 "vpsubusb (%%rax), %%ymm8, %%ymm7") 233 234 GEN_test_RandM(VPSUBSB_256, 235 "vpsubsb %%ymm9, %%ymm8, %%ymm7", 236 "vpsubsb (%%rax), %%ymm8, %%ymm7") 237 238 GEN_test_Ronly(VPSRLDQ_0x05_256, 239 "vpsrldq $0x5, %%ymm9, %%ymm7") 240 241 GEN_test_Ronly(VPSLLDQ_0x05_256, 242 "vpslldq $0x5, %%ymm9, %%ymm7") 243 244 GEN_test_RandM(VPANDN_256, 245 "vpandn %%ymm9, %%ymm8, %%ymm7", 246 "vpandn (%%rax), %%ymm8, %%ymm7") 247 248 GEN_test_RandM(VPUNPCKLQDQ_256, 249 "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7", 250 "vpunpcklqdq (%%rax), %%ymm8, %%ymm7") 251 252 GEN_test_Ronly(VPSRLW_0x05_256, 253 "vpsrlw $0x5, %%ymm9, %%ymm7") 254 255 GEN_test_Ronly(VPSLLW_0x05_256, 256 "vpsllw $0x5, %%ymm9, %%ymm7") 257 258 GEN_test_RandM(VPADDW_256, 259 "vpaddw %%ymm6, %%ymm8, %%ymm7", 260 "vpaddw (%%rax), %%ymm8, %%ymm7") 261 262 GEN_test_RandM(VPACKSSDW_256, 263 "vpackssdw %%ymm9, %%ymm8, %%ymm7", 264 "vpackssdw (%%rax), %%ymm8, %%ymm7") 265 266 GEN_test_RandM(VPUNPCKLDQ_256, 267 "vpunpckldq %%ymm6, %%ymm8, %%ymm7", 268 "vpunpckldq (%%rax), %%ymm8, %%ymm7") 269 270 GEN_test_RandM(VPCMPEQD_256, 271 "vpcmpeqd %%ymm6, %%ymm8, %%ymm7", 272 "vpcmpeqd (%%rax), %%ymm8, %%ymm7") 273 274 GEN_test_RandM(VPSHUFD_0x39_256, 275 "vpshufd $0x39, %%ymm9, %%ymm8", 276 "vpshufd $0xC6, (%%rax), %%ymm7") 277 278 GEN_test_RandM(VPADDQ_256, 279 "vpaddq %%ymm6, %%ymm8, %%ymm7", 280 "vpaddq (%%rax), %%ymm8, %%ymm7") 281 282 GEN_test_RandM(VPSUBQ_256, 283 "vpsubq %%ymm6, %%ymm8, %%ymm7", 284 "vpsubq (%%rax), %%ymm8, %%ymm7") 285 286 GEN_test_RandM(VPSUBW_256, 287 "vpsubw %%ymm6, %%ymm8, %%ymm7", 288 "vpsubw (%%rax), %%ymm8, %%ymm7") 289 290 GEN_test_RandM(VPCMPEQQ_256, 291 "vpcmpeqq %%ymm6, %%ymm8, %%ymm7", 292 "vpcmpeqq (%%rax), %%ymm8, %%ymm7") 293 294 GEN_test_RandM(VPCMPGTQ_256, 295 "vpcmpgtq %%ymm6, %%ymm8, %%ymm7", 296 "vpcmpgtq (%%rax), %%ymm8, %%ymm7") 297 298 GEN_test_Ronly(VPSRLQ_0x05_256, 299 "vpsrlq $0x5, %%ymm9, %%ymm7") 300 301 GEN_test_RandM(VPMULUDQ_256, 302 "vpmuludq %%ymm6, %%ymm8, %%ymm7", 303 "vpmuludq (%%rax), %%ymm8, %%ymm7") 304 305 GEN_test_RandM(VPMULDQ_256, 306 "vpmuldq %%ymm6, %%ymm8, %%ymm7", 307 "vpmuldq (%%rax), %%ymm8, %%ymm7") 308 309 GEN_test_Ronly(VPSLLQ_0x05_256, 310 "vpsllq $0x5, %%ymm9, %%ymm7") 311 312 GEN_test_RandM(VPMAXUD_256, 313 "vpmaxud %%ymm6, %%ymm8, %%ymm7", 314 "vpmaxud (%%rax), %%ymm8, %%ymm7") 315 316 GEN_test_RandM(VPMINUD_256, 317 "vpminud %%ymm6, %%ymm8, %%ymm7", 318 "vpminud (%%rax), %%ymm8, %%ymm7") 319 320 GEN_test_RandM(VPMULLD_256, 321 "vpmulld %%ymm6, %%ymm8, %%ymm7", 322 "vpmulld (%%rax), %%ymm8, %%ymm7") 323 324 GEN_test_RandM(VPMAXUW_256, 325 "vpmaxuw %%ymm6, %%ymm8, %%ymm7", 326 "vpmaxuw (%%rax), %%ymm8, %%ymm7") 327 328 GEN_test_RandM(VPMINUW_256, 329 "vpminuw %%ymm6, %%ymm8, %%ymm7", 330 "vpminuw (%%rax), %%ymm8, %%ymm7") 331 332 GEN_test_RandM(VPMAXSW_256, 333 "vpmaxsw %%ymm6, %%ymm8, %%ymm7", 334 "vpmaxsw (%%rax), %%ymm8, %%ymm7") 335 336 GEN_test_RandM(VPMINSW_256, 337 "vpminsw %%ymm6, %%ymm8, %%ymm7", 338 "vpminsw (%%rax), %%ymm8, %%ymm7") 339 340 GEN_test_RandM(VPMAXUB_256, 341 "vpmaxub %%ymm6, %%ymm8, %%ymm7", 342 "vpmaxub (%%rax), %%ymm8, %%ymm7") 343 344 GEN_test_RandM(VPMINUB_256, 345 "vpminub %%ymm6, %%ymm8, %%ymm7", 346 "vpminub (%%rax), %%ymm8, %%ymm7") 347 348 GEN_test_RandM(VPMAXSB_256, 349 "vpmaxsb %%ymm6, %%ymm8, %%ymm7", 350 "vpmaxsb (%%rax), %%ymm8, %%ymm7") 351 352 GEN_test_RandM(VPMINSB_256, 353 "vpminsb %%ymm6, %%ymm8, %%ymm7", 354 "vpminsb (%%rax), %%ymm8, %%ymm7") 355 356 GEN_test_RandM(VPMOVSXBW_256, 357 "vpmovsxbw %%xmm6, %%ymm8", 358 "vpmovsxbw (%%rax), %%ymm8") 359 360 GEN_test_RandM(VPSUBUSW_256, 361 "vpsubusw %%ymm9, %%ymm8, %%ymm7", 362 "vpsubusw (%%rax), %%ymm8, %%ymm7") 363 364 GEN_test_RandM(VPSUBSW_256, 365 "vpsubsw %%ymm9, %%ymm8, %%ymm7", 366 "vpsubsw (%%rax), %%ymm8, %%ymm7") 367 368 GEN_test_RandM(VPCMPEQW_256, 369 "vpcmpeqw %%ymm6, %%ymm8, %%ymm7", 370 "vpcmpeqw (%%rax), %%ymm8, %%ymm7") 371 372 GEN_test_RandM(VPADDB_256, 373 "vpaddb %%ymm6, %%ymm8, %%ymm7", 374 "vpaddb (%%rax), %%ymm8, %%ymm7") 375 376 GEN_test_RandM(VPUNPCKHDQ_256, 377 "vpunpckhdq %%ymm6, %%ymm8, %%ymm7", 378 "vpunpckhdq (%%rax), %%ymm8, %%ymm7") 379 380 GEN_test_RandM(VPMOVSXDQ_256, 381 "vpmovsxdq %%xmm6, %%ymm8", 382 "vpmovsxdq (%%rax), %%ymm8") 383 384 GEN_test_RandM(VPMOVSXWD_256, 385 "vpmovsxwd %%xmm6, %%ymm8", 386 "vpmovsxwd (%%rax), %%ymm8") 387 388 GEN_test_RandM(VPMULHW_256, 389 "vpmulhw %%ymm9, %%ymm8, %%ymm7", 390 "vpmulhw (%%rax), %%ymm8, %%ymm7") 391 392 GEN_test_RandM(VPUNPCKHQDQ_256, 393 "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7", 394 "vpunpckhqdq (%%rax), %%ymm8, %%ymm7") 395 396 GEN_test_Ronly(VPSRAW_0x05_256, 397 "vpsraw $0x5, %%ymm9, %%ymm7") 398 399 GEN_test_RandM(VPCMPGTB_256, 400 "vpcmpgtb %%ymm6, %%ymm8, %%ymm7", 401 "vpcmpgtb (%%rax), %%ymm8, %%ymm7") 402 403 GEN_test_RandM(VPCMPGTW_256, 404 "vpcmpgtw %%ymm6, %%ymm8, %%ymm7", 405 "vpcmpgtw (%%rax), %%ymm8, %%ymm7") 406 407 GEN_test_RandM(VPCMPGTD_256, 408 "vpcmpgtd %%ymm6, %%ymm8, %%ymm7", 409 "vpcmpgtd (%%rax), %%ymm8, %%ymm7") 410 411 GEN_test_RandM(VPMOVZXBD_256, 412 "vpmovzxbd %%xmm6, %%ymm8", 413 "vpmovzxbd (%%rax), %%ymm8") 414 415 GEN_test_RandM(VPMOVSXBD_256, 416 "vpmovsxbd %%xmm6, %%ymm8", 417 "vpmovsxbd (%%rax), %%ymm8") 418 419 GEN_test_RandM(VPALIGNR_256_1of3, 420 "vpalignr $0, %%ymm6, %%ymm8, %%ymm7", 421 "vpalignr $3, (%%rax), %%ymm8, %%ymm7") 422 GEN_test_RandM(VPALIGNR_256_2of3, 423 "vpalignr $6, %%ymm6, %%ymm8, %%ymm7", 424 "vpalignr $9, (%%rax), %%ymm8, %%ymm7") 425 GEN_test_RandM(VPALIGNR_256_3of3, 426 "vpalignr $12, %%ymm6, %%ymm8, %%ymm7", 427 "vpalignr $15, (%%rax), %%ymm8, %%ymm7") 428 429 GEN_test_RandM(VPBLENDW_256_0x00, 430 "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7", 431 "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7") 432 GEN_test_RandM(VPBLENDW_256_0xFE, 433 "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7", 434 "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7") 435 GEN_test_RandM(VPBLENDW_256_0x30, 436 "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7", 437 "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7") 438 GEN_test_RandM(VPBLENDW_256_0x21, 439 "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7", 440 "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7") 441 GEN_test_RandM(VPBLENDW_256_0xD7, 442 "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7", 443 "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7") 444 GEN_test_RandM(VPBLENDW_256_0xB5, 445 "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7", 446 "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7") 447 GEN_test_RandM(VPBLENDW_256_0x85, 448 "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7", 449 "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7") 450 GEN_test_RandM(VPBLENDW_256_0x29, 451 "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7", 452 "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7") 453 454 GEN_test_RandM(VPSLLW_256, 455 "andl $15, %%r14d;" 456 "vmovd %%r14d, %%xmm6;" 457 "vpsllw %%xmm6, %%ymm8, %%ymm9", 458 "andq $15, 128(%%rax);" 459 "vpsllw 128(%%rax), %%ymm8, %%ymm9") 460 461 GEN_test_RandM(VPSRLW_256, 462 "andl $15, %%r14d;" 463 "vmovd %%r14d, %%xmm6;" 464 "vpsrlw %%xmm6, %%ymm8, %%ymm9", 465 "andq $15, 128(%%rax);" 466 "vpsrlw 128(%%rax), %%ymm8, %%ymm9") 467 468 GEN_test_RandM(VPSRAW_256, 469 "andl $31, %%r14d;" 470 "vmovd %%r14d, %%xmm6;" 471 "vpsraw %%xmm6, %%ymm8, %%ymm9", 472 "andq $15, 128(%%rax);" 473 "vpsraw 128(%%rax), %%ymm8, %%ymm9") 474 475 GEN_test_RandM(VPSLLD_256, 476 "andl $31, %%r14d;" 477 "vmovd %%r14d, %%xmm6;" 478 "vpslld %%xmm6, %%ymm8, %%ymm9", 479 "andq $31, 128(%%rax);" 480 "vpslld 128(%%rax), %%ymm8, %%ymm9") 481 482 GEN_test_RandM(VPSRLD_256, 483 "andl $31, %%r14d;" 484 "vmovd %%r14d, %%xmm6;" 485 "vpsrld %%xmm6, %%ymm8, %%ymm9", 486 "andq $31, 128(%%rax);" 487 "vpsrld 128(%%rax), %%ymm8, %%ymm9") 488 489 GEN_test_RandM(VPSRAD_256, 490 "andl $31, %%r14d;" 491 "vmovd %%r14d, %%xmm6;" 492 "vpsrad %%xmm6, %%ymm8, %%ymm9", 493 "andq $31, 128(%%rax);" 494 "vpsrad 128(%%rax), %%ymm8, %%ymm9") 495 496 GEN_test_RandM(VPSLLQ_256, 497 "andl $63, %%r14d;" 498 "vmovd %%r14d, %%xmm6;" 499 "vpsllq %%xmm6, %%ymm8, %%ymm9", 500 "andq $63, 128(%%rax);" 501 "vpsllq 128(%%rax), %%ymm8, %%ymm9") 502 503 GEN_test_RandM(VPSRLQ_256, 504 "andl $63, %%r14d;" 505 "vmovd %%r14d, %%xmm6;" 506 "vpsrlq %%xmm6, %%ymm8, %%ymm9", 507 "andq $63, 128(%%rax);" 508 "vpsrlq 128(%%rax), %%ymm8, %%ymm9") 509 510 GEN_test_RandM(VPMADDWD_256, 511 "vpmaddwd %%ymm6, %%ymm8, %%ymm7", 512 "vpmaddwd (%%rax), %%ymm8, %%ymm7") 513 514 GEN_test_Monly(VMOVNTDQA_256, 515 "vmovntdqa (%%rax), %%ymm9") 516 517 GEN_test_RandM(VPACKSSWB_256, 518 "vpacksswb %%ymm6, %%ymm8, %%ymm7", 519 "vpacksswb (%%rax), %%ymm8, %%ymm7") 520 521 GEN_test_RandM(VPAVGB_256, 522 "vpavgb %%ymm6, %%ymm8, %%ymm7", 523 "vpavgb (%%rax), %%ymm8, %%ymm7") 524 525 GEN_test_RandM(VPAVGW_256, 526 "vpavgw %%ymm6, %%ymm8, %%ymm7", 527 "vpavgw (%%rax), %%ymm8, %%ymm7") 528 529 GEN_test_RandM(VPADDSB_256, 530 "vpaddsb %%ymm6, %%ymm8, %%ymm7", 531 "vpaddsb (%%rax), %%ymm8, %%ymm7") 532 533 GEN_test_RandM(VPADDSW_256, 534 "vpaddsw %%ymm6, %%ymm8, %%ymm7", 535 "vpaddsw (%%rax), %%ymm8, %%ymm7") 536 537 GEN_test_RandM(VPHADDW_256, 538 "vphaddw %%ymm6, %%ymm8, %%ymm7", 539 "vphaddw (%%rax), %%ymm8, %%ymm7") 540 541 GEN_test_RandM(VPHADDD_256, 542 "vphaddd %%ymm6, %%ymm8, %%ymm7", 543 "vphaddd (%%rax), %%ymm8, %%ymm7") 544 545 GEN_test_RandM(VPHADDSW_256, 546 "vphaddsw %%ymm6, %%ymm8, %%ymm7", 547 "vphaddsw (%%rax), %%ymm8, %%ymm7") 548 549 GEN_test_RandM(VPMADDUBSW_256, 550 "vpmaddubsw %%ymm6, %%ymm8, %%ymm7", 551 "vpmaddubsw (%%rax), %%ymm8, %%ymm7") 552 553 GEN_test_RandM(VPHSUBW_256, 554 "vphsubw %%ymm6, %%ymm8, %%ymm7", 555 "vphsubw (%%rax), %%ymm8, %%ymm7") 556 557 GEN_test_RandM(VPHSUBD_256, 558 "vphsubd %%ymm6, %%ymm8, %%ymm7", 559 "vphsubd (%%rax), %%ymm8, %%ymm7") 560 561 GEN_test_RandM(VPHSUBSW_256, 562 "vphsubsw %%ymm6, %%ymm8, %%ymm7", 563 "vphsubsw (%%rax), %%ymm8, %%ymm7") 564 565 GEN_test_RandM(VPABSB_256, 566 "vpabsb %%ymm6, %%ymm7", 567 "vpabsb (%%rax), %%ymm7") 568 569 GEN_test_RandM(VPABSW_256, 570 "vpabsw %%ymm6, %%ymm7", 571 "vpabsw (%%rax), %%ymm7") 572 573 GEN_test_RandM(VPMOVSXBQ_256, 574 "vpmovsxbq %%xmm6, %%ymm8", 575 "vpmovsxbq (%%rax), %%ymm8") 576 577 GEN_test_RandM(VPMOVSXWQ_256, 578 "vpmovsxwq %%xmm6, %%ymm8", 579 "vpmovsxwq (%%rax), %%ymm8") 580 581 GEN_test_RandM(VPACKUSDW_256, 582 "vpackusdw %%ymm6, %%ymm8, %%ymm7", 583 "vpackusdw (%%rax), %%ymm8, %%ymm7") 584 585 GEN_test_RandM(VPMOVZXBQ_256, 586 "vpmovzxbq %%xmm6, %%ymm8", 587 "vpmovzxbq (%%rax), %%ymm8") 588 589 GEN_test_RandM(VPMOVZXWQ_256, 590 "vpmovzxwq %%xmm6, %%ymm8", 591 "vpmovzxwq (%%rax), %%ymm8") 592 593 GEN_test_RandM(VPMOVZXDQ_256, 594 "vpmovzxdq %%xmm6, %%ymm8", 595 "vpmovzxdq (%%rax), %%ymm8") 596 597 GEN_test_RandM(VMPSADBW_256_0x0, 598 "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7", 599 "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7") 600 GEN_test_RandM(VMPSADBW_256_0x39, 601 "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7", 602 "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7") 603 GEN_test_RandM(VMPSADBW_256_0x32, 604 "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7", 605 "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7") 606 GEN_test_RandM(VMPSADBW_256_0x2b, 607 "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7", 608 "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7") 609 GEN_test_RandM(VMPSADBW_256_0x24, 610 "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7", 611 "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7") 612 GEN_test_RandM(VMPSADBW_256_0x1d, 613 "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7", 614 "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7") 615 GEN_test_RandM(VMPSADBW_256_0x16, 616 "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7", 617 "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7") 618 GEN_test_RandM(VMPSADBW_256_0x0f, 619 "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7", 620 "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7") 621 622 GEN_test_RandM(VPSADBW_256, 623 "vpsadbw %%ymm6, %%ymm8, %%ymm7", 624 "vpsadbw (%%rax), %%ymm8, %%ymm7") 625 626 GEN_test_RandM(VPSIGNB_256, 627 "vpsignb %%ymm6, %%ymm8, %%ymm7", 628 "vpsignb (%%rax), %%ymm8, %%ymm7") 629 630 GEN_test_RandM(VPSIGNW_256, 631 "vpsignw %%ymm6, %%ymm8, %%ymm7", 632 "vpsignw (%%rax), %%ymm8, %%ymm7") 633 634 GEN_test_RandM(VPSIGND_256, 635 "vpsignd %%ymm6, %%ymm8, %%ymm7", 636 "vpsignd (%%rax), %%ymm8, %%ymm7") 637 638 GEN_test_RandM(VPMULHRSW_256, 639 "vpmulhrsw %%ymm6, %%ymm8, %%ymm7", 640 "vpmulhrsw (%%rax), %%ymm8, %%ymm7") 641 642 /* Instructions new in AVX2. */ 643 644 GEN_test_Monly(VBROADCASTI128, 645 "vbroadcasti128 (%%rax), %%ymm9") 646 647 GEN_test_RandM(VEXTRACTI128_0x0, 648 "vextracti128 $0x0, %%ymm7, %%xmm9", 649 "vextracti128 $0x0, %%ymm7, (%%rax)") 650 651 GEN_test_RandM(VEXTRACTI128_0x1, 652 "vextracti128 $0x1, %%ymm7, %%xmm9", 653 "vextracti128 $0x1, %%ymm7, (%%rax)") 654 655 GEN_test_RandM(VINSERTI128_0x0, 656 "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8", 657 "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8") 658 659 GEN_test_RandM(VINSERTI128_0x1, 660 "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8", 661 "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8") 662 663 GEN_test_RandM(VPERM2I128_0x00, 664 "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7", 665 "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7") 666 GEN_test_RandM(VPERM2I128_0xFF, 667 "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7", 668 "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7") 669 GEN_test_RandM(VPERM2I128_0x30, 670 "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7", 671 "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7") 672 GEN_test_RandM(VPERM2I128_0x21, 673 "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7", 674 "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7") 675 GEN_test_RandM(VPERM2I128_0x12, 676 "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7", 677 "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7") 678 GEN_test_RandM(VPERM2I128_0x03, 679 "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7", 680 "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7") 681 GEN_test_RandM(VPERM2I128_0x85, 682 "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7", 683 "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7") 684 GEN_test_RandM(VPERM2I128_0x5A, 685 "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7", 686 "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7") 687 688 GEN_test_Ronly(VBROADCASTSS_128, 689 "vbroadcastss %%xmm9, %%xmm7") 690 691 GEN_test_Ronly(VBROADCASTSS_256, 692 "vbroadcastss %%xmm9, %%ymm7") 693 694 GEN_test_Ronly(VBROADCASTSD_256, 695 "vbroadcastsd %%xmm9, %%ymm7") 696 697 GEN_test_RandM(VPERMD, 698 "vpermd %%ymm6, %%ymm7, %%ymm9", 699 "vpermd (%%rax), %%ymm7, %%ymm9") 700 701 GEN_test_RandM(VPERMQ_0x00, 702 "vpermq $0x00, %%ymm6, %%ymm7", 703 "vpermq $0x01, (%%rax), %%ymm7") 704 GEN_test_RandM(VPERMQ_0xFE, 705 "vpermq $0xFE, %%ymm6, %%ymm7", 706 "vpermq $0xFF, (%%rax), %%ymm7") 707 GEN_test_RandM(VPERMQ_0x30, 708 "vpermq $0x30, %%ymm6, %%ymm7", 709 "vpermq $0x03, (%%rax), %%ymm7") 710 GEN_test_RandM(VPERMQ_0x21, 711 "vpermq $0x21, %%ymm6, %%ymm7", 712 "vpermq $0x12, (%%rax), %%ymm7") 713 GEN_test_RandM(VPERMQ_0xD7, 714 "vpermq $0xD7, %%ymm6, %%ymm7", 715 "vpermq $0x6C, (%%rax), %%ymm7") 716 GEN_test_RandM(VPERMQ_0xB5, 717 "vpermq $0xB5, %%ymm6, %%ymm7", 718 "vpermq $0x4A, (%%rax), %%ymm7") 719 GEN_test_RandM(VPERMQ_0x85, 720 "vpermq $0x85, %%ymm6, %%ymm7", 721 "vpermq $0xDC, (%%rax), %%ymm7") 722 GEN_test_RandM(VPERMQ_0x29, 723 "vpermq $0x29, %%ymm6, %%ymm7", 724 "vpermq $0x92, (%%rax), %%ymm7") 725 726 GEN_test_RandM(VPERMPS, 727 "vpermps %%ymm6, %%ymm7, %%ymm9", 728 "vpermps (%%rax), %%ymm7, %%ymm9") 729 730 GEN_test_RandM(VPERMPD_0x00, 731 "vpermpd $0x00, %%ymm6, %%ymm7", 732 "vpermpd $0x01, (%%rax), %%ymm7") 733 GEN_test_RandM(VPERMPD_0xFE, 734 "vpermpd $0xFE, %%ymm6, %%ymm7", 735 "vpermpd $0xFF, (%%rax), %%ymm7") 736 GEN_test_RandM(VPERMPD_0x30, 737 "vpermpd $0x30, %%ymm6, %%ymm7", 738 "vpermpd $0x03, (%%rax), %%ymm7") 739 GEN_test_RandM(VPERMPD_0x21, 740 "vpermpd $0x21, %%ymm6, %%ymm7", 741 "vpermpd $0x12, (%%rax), %%ymm7") 742 GEN_test_RandM(VPERMPD_0xD7, 743 "vpermpd $0xD7, %%ymm6, %%ymm7", 744 "vpermpd $0x6C, (%%rax), %%ymm7") 745 GEN_test_RandM(VPERMPD_0xB5, 746 "vpermpd $0xB5, %%ymm6, %%ymm7", 747 "vpermpd $0x4A, (%%rax), %%ymm7") 748 GEN_test_RandM(VPERMPD_0x85, 749 "vpermpd $0x85, %%ymm6, %%ymm7", 750 "vpermpd $0xDC, (%%rax), %%ymm7") 751 GEN_test_RandM(VPERMPD_0x29, 752 "vpermpd $0x29, %%ymm6, %%ymm7", 753 "vpermpd $0x92, (%%rax), %%ymm7") 754 755 GEN_test_RandM(VPBLENDD_128_0x00, 756 "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7", 757 "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7") 758 GEN_test_RandM(VPBLENDD_128_0x02, 759 "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7", 760 "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7") 761 GEN_test_RandM(VPBLENDD_128_0x04, 762 "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7", 763 "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7") 764 GEN_test_RandM(VPBLENDD_128_0x06, 765 "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7", 766 "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7") 767 GEN_test_RandM(VPBLENDD_128_0x08, 768 "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7", 769 "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7") 770 GEN_test_RandM(VPBLENDD_128_0x0A, 771 "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7", 772 "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7") 773 GEN_test_RandM(VPBLENDD_128_0x0C, 774 "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7", 775 "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7") 776 GEN_test_RandM(VPBLENDD_128_0x0E, 777 "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7", 778 "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7") 779 780 GEN_test_RandM(VPBLENDD_256_0x00, 781 "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7", 782 "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7") 783 GEN_test_RandM(VPBLENDD_256_0xFE, 784 "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7", 785 "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7") 786 GEN_test_RandM(VPBLENDD_256_0x30, 787 "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7", 788 "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7") 789 GEN_test_RandM(VPBLENDD_256_0x21, 790 "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7", 791 "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7") 792 GEN_test_RandM(VPBLENDD_256_0xD7, 793 "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7", 794 "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7") 795 GEN_test_RandM(VPBLENDD_256_0xB5, 796 "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7", 797 "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7") 798 GEN_test_RandM(VPBLENDD_256_0x85, 799 "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7", 800 "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7") 801 GEN_test_RandM(VPBLENDD_256_0x29, 802 "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7", 803 "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7") 804 805 GEN_test_RandM(VPSLLVD_128, 806 "vpslld $27, %%xmm6, %%xmm6;" 807 "vpsrld $27, %%xmm6, %%xmm6;" 808 "vpsllvd %%xmm6, %%xmm8, %%xmm7", 809 "andl $31, (%%rax);" 810 "andl $31, 4(%%rax);" 811 "andl $31, 8(%%rax);" 812 "vpsllvd (%%rax), %%xmm8, %%xmm7") 813 814 GEN_test_RandM(VPSLLVD_256, 815 "vpslld $27, %%ymm6, %%ymm6;" 816 "vpsrld $27, %%ymm6, %%ymm6;" 817 "vpsllvd %%ymm6, %%ymm8, %%ymm7", 818 "andl $31, (%%rax);" 819 "andl $31, 4(%%rax);" 820 "andl $31, 8(%%rax);" 821 "andl $31, 16(%%rax);" 822 "andl $31, 20(%%rax);" 823 "andl $31, 24(%%rax);" 824 "vpsllvd (%%rax), %%ymm8, %%ymm7") 825 826 GEN_test_RandM(VPSLLVQ_128, 827 "vpsllq $58, %%xmm6, %%xmm6;" 828 "vpsrlq $58, %%xmm6, %%xmm6;" 829 "vpsllvq %%xmm6, %%xmm8, %%xmm7", 830 "andl $63, (%%rax);" 831 "vpsllvq (%%rax), %%xmm8, %%xmm7") 832 833 GEN_test_RandM(VPSLLVQ_256, 834 "vpsllq $58, %%ymm6, %%ymm6;" 835 "vpsrlq $58, %%ymm6, %%ymm6;" 836 "vpsllvq %%ymm6, %%ymm8, %%ymm7", 837 "andl $63, (%%rax);" 838 "andl $63, 8(%%rax);" 839 "andl $63, 16(%%rax);" 840 "vpsllvq (%%rax), %%ymm8, %%ymm7") 841 842 GEN_test_RandM(VPSRLVD_128, 843 "vpslld $27, %%xmm6, %%xmm6;" 844 "vpsrld $27, %%xmm6, %%xmm6;" 845 "vpsrlvd %%xmm6, %%xmm8, %%xmm7", 846 "andl $31, (%%rax);" 847 "andl $31, 4(%%rax);" 848 "andl $31, 8(%%rax);" 849 "vpsrlvd (%%rax), %%xmm8, %%xmm7") 850 851 GEN_test_RandM(VPSRLVD_256, 852 "vpslld $27, %%ymm6, %%ymm6;" 853 "vpsrld $27, %%ymm6, %%ymm6;" 854 "vpsrlvd %%ymm6, %%ymm8, %%ymm7", 855 "andl $31, (%%rax);" 856 "andl $31, 4(%%rax);" 857 "andl $31, 8(%%rax);" 858 "andl $31, 16(%%rax);" 859 "andl $31, 20(%%rax);" 860 "andl $31, 24(%%rax);" 861 "vpsrlvd (%%rax), %%ymm8, %%ymm7") 862 863 GEN_test_RandM(VPSRLVQ_128, 864 "vpsllq $58, %%xmm6, %%xmm6;" 865 "vpsrlq $58, %%xmm6, %%xmm6;" 866 "vpsrlvq %%xmm6, %%xmm8, %%xmm7", 867 "andl $63, (%%rax);" 868 "vpsrlvq (%%rax), %%xmm8, %%xmm7") 869 870 GEN_test_RandM(VPSRLVQ_256, 871 "vpsllq $58, %%ymm6, %%ymm6;" 872 "vpsrlq $58, %%ymm6, %%ymm6;" 873 "vpsrlvq %%ymm6, %%ymm8, %%ymm7", 874 "andl $63, (%%rax);" 875 "andl $63, 8(%%rax);" 876 "andl $63, 16(%%rax);" 877 "vpsrlvq (%%rax), %%ymm8, %%ymm7") 878 879 GEN_test_RandM(VPSRAVD_128, 880 "vpslld $27, %%xmm6, %%xmm6;" 881 "vpsrld $27, %%xmm6, %%xmm6;" 882 "vpsravd %%xmm6, %%xmm8, %%xmm7", 883 "andl $31, (%%rax);" 884 "andl $31, 4(%%rax);" 885 "andl $31, 8(%%rax);" 886 "vpsravd (%%rax), %%xmm8, %%xmm7") 887 888 GEN_test_RandM(VPSRAVD_256, 889 "vpslld $27, %%ymm6, %%ymm6;" 890 "vpsrld $27, %%ymm6, %%ymm6;" 891 "vpsravd %%ymm6, %%ymm8, %%ymm7", 892 "andl $31, (%%rax);" 893 "andl $31, 4(%%rax);" 894 "andl $31, 8(%%rax);" 895 "andl $31, 16(%%rax);" 896 "andl $31, 20(%%rax);" 897 "andl $31, 24(%%rax);" 898 "vpsravd (%%rax), %%ymm8, %%ymm7") 899 900 GEN_test_RandM(VPBROADCASTB_128, 901 "vpbroadcastb %%xmm9, %%xmm7", 902 "vpbroadcastb (%%rax), %%xmm7") 903 904 GEN_test_RandM(VPBROADCASTB_256, 905 "vpbroadcastb %%xmm9, %%ymm7", 906 "vpbroadcastb (%%rax), %%ymm7") 907 908 GEN_test_RandM(VPBROADCASTW_128, 909 "vpbroadcastw %%xmm9, %%xmm7", 910 "vpbroadcastw (%%rax), %%xmm7") 911 912 GEN_test_RandM(VPBROADCASTW_256, 913 "vpbroadcastw %%xmm9, %%ymm7", 914 "vpbroadcastw (%%rax), %%ymm7") 915 916 GEN_test_RandM(VPBROADCASTD_128, 917 "vpbroadcastd %%xmm9, %%xmm7", 918 "vpbroadcastd (%%rax), %%xmm7") 919 920 GEN_test_RandM(VPBROADCASTD_256, 921 "vpbroadcastd %%xmm9, %%ymm7", 922 "vpbroadcastd (%%rax), %%ymm7") 923 924 GEN_test_RandM(VPBROADCASTQ_128, 925 "vpbroadcastq %%xmm9, %%xmm7", 926 "vpbroadcastq (%%rax), %%xmm7") 927 928 GEN_test_RandM(VPBROADCASTQ_256, 929 "vpbroadcastq %%xmm9, %%ymm7", 930 "vpbroadcastq (%%rax), %%ymm7") 931 932 GEN_test_Monly(VPMASKMOVD_128_LoadForm, 933 "vpmaskmovd (%%rax), %%xmm8, %%xmm7;" 934 "vxorps %%xmm6, %%xmm6, %%xmm6;" 935 "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9") 936 937 GEN_test_Monly(VPMASKMOVD_256_LoadForm, 938 "vpmaskmovd (%%rax), %%ymm8, %%ymm7;" 939 "vxorps %%ymm6, %%ymm6, %%ymm6;" 940 "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9") 941 942 GEN_test_Monly(VPMASKMOVQ_128_LoadForm, 943 "vpmaskmovq (%%rax), %%xmm8, %%xmm7;" 944 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 945 "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9") 946 947 GEN_test_Monly(VPMASKMOVQ_256_LoadForm, 948 "vpmaskmovq (%%rax), %%ymm8, %%ymm7;" 949 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 950 "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9") 951 952 GEN_test_Ronly(VGATHERDPS_128, 953 "vpslld $25, %%xmm7, %%xmm8;" 954 "vpsrld $25, %%xmm8, %%xmm8;" 955 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 956 "leaq randArray(%%rip), %%r14;" 957 "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 958 "xorl %%r14d, %%r14d") 959 960 GEN_test_Ronly(VGATHERDPS_256, 961 "vpslld $25, %%ymm7, %%ymm8;" 962 "vpsrld $25, %%ymm8, %%ymm8;" 963 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 964 "leaq randArray(%%rip), %%r14;" 965 "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" 966 "xorl %%r14d, %%r14d") 967 968 GEN_test_Ronly(VGATHERQPS_128_1, 969 "vpsllq $57, %%xmm7, %%xmm8;" 970 "vpsrlq $57, %%xmm8, %%xmm8;" 971 "vpmovsxdq %%xmm6, %%xmm9;" 972 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 973 "vmovdqa 96(%0), %%ymm9;" 974 "leaq randArray(%%rip), %%r14;" 975 "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 976 "xorl %%r14d, %%r14d") 977 978 GEN_test_Ronly(VGATHERQPS_256_1, 979 "vpsllq $57, %%ymm7, %%ymm8;" 980 "vpsrlq $57, %%ymm8, %%ymm8;" 981 "vpmovsxdq %%xmm6, %%ymm9;" 982 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 983 "vmovdqa 96(%0), %%ymm9;" 984 "leaq randArray(%%rip), %%r14;" 985 "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" 986 "xorl %%r14d, %%r14d") 987 988 GEN_test_Ronly(VGATHERQPS_128_2, 989 "vpsllq $57, %%xmm7, %%xmm8;" 990 "vpsrlq $57, %%xmm8, %%xmm8;" 991 "vpmovsxdq %%xmm6, %%xmm9;" 992 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 993 "vmovdqa 96(%0), %%ymm9;" 994 "leaq randArray(%%rip), %%r14;" 995 "vmovq %%r14, %%xmm7;" 996 "vpsllq $2, %%xmm8, %%xmm8;" 997 "vpbroadcastq %%xmm7, %%xmm7;" 998 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 999 "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1000 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1001 "vmovdqa 0(%0), %%ymm7;" 1002 "xorl %%r14d, %%r14d") 1003 1004 GEN_test_Ronly(VGATHERQPS_256_2, 1005 "vpsllq $57, %%ymm7, %%ymm8;" 1006 "vpsrlq $57, %%ymm8, %%ymm8;" 1007 "vpmovsxdq %%xmm6, %%ymm9;" 1008 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1009 "vmovdqa 96(%0), %%ymm9;" 1010 "leaq randArray(%%rip), %%r14;" 1011 "vmovq %%r14, %%xmm7;" 1012 "vpsllq $2, %%ymm8, %%ymm8;" 1013 "vpbroadcastq %%xmm7, %%ymm7;" 1014 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1015 "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;" 1016 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1017 "vmovdqa 0(%0), %%ymm7;" 1018 "xorl %%r14d, %%r14d") 1019 1020 GEN_test_Ronly(VGATHERDPD_128, 1021 "vpslld $26, %%xmm7, %%xmm8;" 1022 "vpsrld $26, %%xmm8, %%xmm8;" 1023 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" 1024 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1025 "vmovdqa 96(%0), %%ymm9;" 1026 "leaq randArray(%%rip), %%r14;" 1027 "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1028 "xorl %%r14d, %%r14d") 1029 1030 GEN_test_Ronly(VGATHERDPD_256, 1031 "vpslld $26, %%ymm7, %%ymm8;" 1032 "vpsrld $26, %%ymm8, %%ymm8;" 1033 "vextracti128 $1, %%ymm6, %%xmm9;" 1034 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" 1035 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1036 "vmovdqa 96(%0), %%ymm9;" 1037 "leaq randArray(%%rip), %%r14;" 1038 "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" 1039 "xorl %%r14d, %%r14d") 1040 1041 GEN_test_Ronly(VGATHERQPD_128_1, 1042 "vpsllq $58, %%xmm7, %%xmm8;" 1043 "vpsrlq $58, %%xmm8, %%xmm8;" 1044 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1045 "leaq randArray(%%rip), %%r14;" 1046 "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1047 "xorl %%r14d, %%r14d") 1048 1049 GEN_test_Ronly(VGATHERQPD_256_1, 1050 "vpsllq $58, %%ymm7, %%ymm8;" 1051 "vpsrlq $58, %%ymm8, %%ymm8;" 1052 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1053 "leaq randArray(%%rip), %%r14;" 1054 "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" 1055 "xorl %%r14d, %%r14d") 1056 1057 GEN_test_Ronly(VGATHERQPD_128_2, 1058 "vpsllq $58, %%xmm7, %%xmm8;" 1059 "vpsrlq $58, %%xmm8, %%xmm8;" 1060 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1061 "leaq randArray(%%rip), %%r14;" 1062 "vmovq %%r14, %%xmm7;" 1063 "vpsllq $2, %%xmm8, %%xmm8;" 1064 "vpbroadcastq %%xmm7, %%xmm7;" 1065 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1066 "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1067 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1068 "vmovdqa 0(%0), %%ymm7;" 1069 "xorl %%r14d, %%r14d") 1070 1071 GEN_test_Ronly(VGATHERQPD_256_2, 1072 "vpsllq $58, %%ymm7, %%ymm8;" 1073 "vpsrlq $58, %%ymm8, %%ymm8;" 1074 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1075 "leaq randArray(%%rip), %%r14;" 1076 "vmovq %%r14, %%xmm7;" 1077 "vpsllq $2, %%ymm8, %%ymm8;" 1078 "vpbroadcastq %%xmm7, %%ymm7;" 1079 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1080 "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;" 1081 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1082 "vmovdqa 0(%0), %%ymm7;" 1083 "xorl %%r14d, %%r14d") 1084 1085 GEN_test_Ronly(VPGATHERDD_128, 1086 "vpslld $25, %%xmm7, %%xmm8;" 1087 "vpsrld $25, %%xmm8, %%xmm8;" 1088 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1089 "leaq randArray(%%rip), %%r14;" 1090 "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1091 "xorl %%r14d, %%r14d") 1092 1093 GEN_test_Ronly(VPGATHERDD_256, 1094 "vpslld $25, %%ymm7, %%ymm8;" 1095 "vpsrld $25, %%ymm8, %%ymm8;" 1096 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1097 "leaq randArray(%%rip), %%r14;" 1098 "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" 1099 "xorl %%r14d, %%r14d") 1100 1101 GEN_test_Ronly(VPGATHERQD_128_1, 1102 "vpsllq $57, %%xmm7, %%xmm8;" 1103 "vpsrlq $57, %%xmm8, %%xmm8;" 1104 "vpmovsxdq %%xmm6, %%xmm9;" 1105 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1106 "vmovdqa 96(%0), %%ymm9;" 1107 "leaq randArray(%%rip), %%r14;" 1108 "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1109 "xorl %%r14d, %%r14d") 1110 1111 GEN_test_Ronly(VPGATHERQD_256_1, 1112 "vpsllq $57, %%ymm7, %%ymm8;" 1113 "vpsrlq $57, %%ymm8, %%ymm8;" 1114 "vpmovsxdq %%xmm6, %%ymm9;" 1115 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1116 "vmovdqa 96(%0), %%ymm9;" 1117 "leaq randArray(%%rip), %%r14;" 1118 "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" 1119 "xorl %%r14d, %%r14d") 1120 1121 GEN_test_Ronly(VPGATHERQD_128_2, 1122 "vpsllq $57, %%xmm7, %%xmm8;" 1123 "vpsrlq $57, %%xmm8, %%xmm8;" 1124 "vpmovsxdq %%xmm6, %%xmm9;" 1125 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1126 "vmovdqa 96(%0), %%ymm9;" 1127 "leaq randArray(%%rip), %%r14;" 1128 "vmovq %%r14, %%xmm7;" 1129 "vpsllq $2, %%xmm8, %%xmm8;" 1130 "vpbroadcastq %%xmm7, %%xmm7;" 1131 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1132 "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1133 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1134 "vmovdqa 0(%0), %%ymm7;" 1135 "xorl %%r14d, %%r14d") 1136 1137 GEN_test_Ronly(VPGATHERQD_256_2, 1138 "vpsllq $57, %%ymm7, %%ymm8;" 1139 "vpsrlq $57, %%ymm8, %%ymm8;" 1140 "vpmovsxdq %%xmm6, %%ymm9;" 1141 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1142 "vmovdqa 96(%0), %%ymm9;" 1143 "leaq randArray(%%rip), %%r14;" 1144 "vmovq %%r14, %%xmm7;" 1145 "vpsllq $2, %%ymm8, %%ymm8;" 1146 "vpbroadcastq %%xmm7, %%ymm7;" 1147 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1148 "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;" 1149 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1150 "vmovdqa 0(%0), %%ymm7;" 1151 "xorl %%r14d, %%r14d") 1152 1153 GEN_test_Ronly(VPGATHERDQ_128, 1154 "vpslld $26, %%xmm7, %%xmm8;" 1155 "vpsrld $26, %%xmm8, %%xmm8;" 1156 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" 1157 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1158 "vmovdqa 96(%0), %%ymm9;" 1159 "leaq randArray(%%rip), %%r14;" 1160 "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1161 "xorl %%r14d, %%r14d") 1162 1163 GEN_test_Ronly(VPGATHERDQ_256, 1164 "vpslld $26, %%ymm7, %%ymm8;" 1165 "vpsrld $26, %%ymm8, %%ymm8;" 1166 "vextracti128 $1, %%ymm6, %%xmm9;" 1167 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" 1168 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1169 "vmovdqa 96(%0), %%ymm9;" 1170 "leaq randArray(%%rip), %%r14;" 1171 "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" 1172 "xorl %%r14d, %%r14d") 1173 1174 GEN_test_Ronly(VPGATHERQQ_128_1, 1175 "vpsllq $58, %%xmm7, %%xmm8;" 1176 "vpsrlq $58, %%xmm8, %%xmm8;" 1177 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1178 "leaq randArray(%%rip), %%r14;" 1179 "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1180 "xorl %%r14d, %%r14d") 1181 1182 GEN_test_Ronly(VPGATHERQQ_256_1, 1183 "vpsllq $58, %%ymm7, %%ymm8;" 1184 "vpsrlq $58, %%ymm8, %%ymm8;" 1185 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1186 "leaq randArray(%%rip), %%r14;" 1187 "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" 1188 "xorl %%r14d, %%r14d") 1189 1190 GEN_test_Ronly(VPGATHERQQ_128_2, 1191 "vpsllq $58, %%xmm7, %%xmm8;" 1192 "vpsrlq $58, %%xmm8, %%xmm8;" 1193 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1194 "leaq randArray(%%rip), %%r14;" 1195 "vmovq %%r14, %%xmm7;" 1196 "vpsllq $2, %%xmm8, %%xmm8;" 1197 "vpbroadcastq %%xmm7, %%xmm7;" 1198 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1199 "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1200 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1201 "vmovdqa 0(%0), %%ymm7;" 1202 "xorl %%r14d, %%r14d") 1203 1204 GEN_test_Ronly(VPGATHERQQ_256_2, 1205 "vpsllq $58, %%ymm7, %%ymm8;" 1206 "vpsrlq $58, %%ymm8, %%ymm8;" 1207 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1208 "leaq randArray(%%rip), %%r14;" 1209 "vmovq %%r14, %%xmm7;" 1210 "vpsllq $2, %%ymm8, %%ymm8;" 1211 "vpbroadcastq %%xmm7, %%ymm7;" 1212 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1213 "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;" 1214 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1215 "vmovdqa 0(%0), %%ymm7;" 1216 "xorl %%r14d, %%r14d") 1217 1218 /* Comment duplicated above, for convenient reference: 1219 Allowed operands in test insns: 1220 Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. 1221 Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. 1222 Imm8 etc fields are also allowed, where they make sense. 1223 */ 1224 1225 #define N_DEFAULT_ITERS 3 1226 1227 // Do the specified test some number of times 1228 #define DO_N(_iters, _testfn) \ 1229 do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) 1230 1231 // Do the specified test the default number of times 1232 #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) 1233 1234 1235 int main ( void ) 1236 { 1237 DO_D( VPOR_256 ); 1238 DO_D( VPXOR_256 ); 1239 DO_D( VPSUBB_256 ); 1240 DO_D( VPSUBD_256 ); 1241 DO_D( VPADDD_256 ); 1242 DO_D( VPMOVZXWD_256 ); 1243 DO_D( VPMOVZXBW_256 ); 1244 DO_D( VPBLENDVB_256 ); 1245 DO_D( VPMINSD_256 ); 1246 DO_D( VPMAXSD_256 ); 1247 DO_D( VPSHUFB_256 ); 1248 DO_D( VPUNPCKLBW_256 ); 1249 DO_D( VPUNPCKHBW_256 ); 1250 DO_D( VPABSD_256 ); 1251 DO_D( VPACKUSWB_256 ); 1252 DO_D( VPMOVMSKB_256 ); 1253 DO_D( VPAND_256 ); 1254 DO_D( VPCMPEQB_256 ); 1255 DO_D( VPSHUFLW_0x39_256 ); 1256 DO_D( VPSHUFHW_0x39_256 ); 1257 DO_D( VPMULLW_256 ); 1258 DO_D( VPADDUSW_256 ); 1259 DO_D( VPMULHUW_256 ); 1260 DO_D( VPADDUSB_256 ); 1261 DO_D( VPUNPCKLWD_256 ); 1262 DO_D( VPUNPCKHWD_256 ); 1263 DO_D( VPSLLD_0x05_256 ); 1264 DO_D( VPSRLD_0x05_256 ); 1265 DO_D( VPSRAD_0x05_256 ); 1266 DO_D( VPSUBUSB_256 ); 1267 DO_D( VPSUBSB_256 ); 1268 DO_D( VPSRLDQ_0x05_256 ); 1269 DO_D( VPSLLDQ_0x05_256 ); 1270 DO_D( VPANDN_256 ); 1271 DO_D( VPUNPCKLQDQ_256 ); 1272 DO_D( VPSRLW_0x05_256 ); 1273 DO_D( VPSLLW_0x05_256 ); 1274 DO_D( VPADDW_256 ); 1275 DO_D( VPACKSSDW_256 ); 1276 DO_D( VPUNPCKLDQ_256 ); 1277 DO_D( VPCMPEQD_256 ); 1278 DO_D( VPSHUFD_0x39_256 ); 1279 DO_D( VPADDQ_256 ); 1280 DO_D( VPSUBQ_256 ); 1281 DO_D( VPSUBW_256 ); 1282 DO_D( VPCMPEQQ_256 ); 1283 DO_D( VPCMPGTQ_256 ); 1284 DO_D( VPSRLQ_0x05_256 ); 1285 DO_D( VPMULUDQ_256 ); 1286 DO_D( VPMULDQ_256 ); 1287 DO_D( VPSLLQ_0x05_256 ); 1288 DO_D( VPMAXUD_256 ); 1289 DO_D( VPMINUD_256 ); 1290 DO_D( VPMULLD_256 ); 1291 DO_D( VPMAXUW_256 ); 1292 DO_D( VPMINUW_256 ); 1293 DO_D( VPMAXSW_256 ); 1294 DO_D( VPMINSW_256 ); 1295 DO_D( VPMAXUB_256 ); 1296 DO_D( VPMINUB_256 ); 1297 DO_D( VPMAXSB_256 ); 1298 DO_D( VPMINSB_256 ); 1299 DO_D( VPMOVSXBW_256 ); 1300 DO_D( VPSUBUSW_256 ); 1301 DO_D( VPSUBSW_256 ); 1302 DO_D( VPCMPEQW_256 ); 1303 DO_D( VPADDB_256 ); 1304 DO_D( VPUNPCKHDQ_256 ); 1305 DO_D( VPMOVSXDQ_256 ); 1306 DO_D( VPMOVSXWD_256 ); 1307 DO_D( VPMULHW_256 ); 1308 DO_D( VPUNPCKHQDQ_256 ); 1309 DO_D( VPSRAW_0x05_256 ); 1310 DO_D( VPCMPGTB_256 ); 1311 DO_D( VPCMPGTW_256 ); 1312 DO_D( VPCMPGTD_256 ); 1313 DO_D( VPMOVZXBD_256 ); 1314 DO_D( VPMOVSXBD_256 ); 1315 DO_D( VPALIGNR_256_1of3 ); 1316 DO_D( VPALIGNR_256_2of3 ); 1317 DO_D( VPALIGNR_256_3of3 ); 1318 DO_D( VPBLENDW_256_0x00 ); 1319 DO_D( VPBLENDW_256_0xFE ); 1320 DO_D( VPBLENDW_256_0x30 ); 1321 DO_D( VPBLENDW_256_0x21 ); 1322 DO_D( VPBLENDW_256_0xD7 ); 1323 DO_D( VPBLENDW_256_0xB5 ); 1324 DO_D( VPBLENDW_256_0x85 ); 1325 DO_D( VPBLENDW_256_0x29 ); 1326 DO_D( VPSLLW_256 ); 1327 DO_D( VPSRLW_256 ); 1328 DO_D( VPSRAW_256 ); 1329 DO_D( VPSLLD_256 ); 1330 DO_D( VPSRLD_256 ); 1331 DO_D( VPSRAD_256 ); 1332 DO_D( VPSLLQ_256 ); 1333 DO_D( VPSRLQ_256 ); 1334 DO_D( VPMADDWD_256 ); 1335 DO_D( VMOVNTDQA_256 ); 1336 DO_D( VPACKSSWB_256 ); 1337 DO_D( VPAVGB_256 ); 1338 DO_D( VPAVGW_256 ); 1339 DO_D( VPADDSB_256 ); 1340 DO_D( VPADDSW_256 ); 1341 DO_D( VPHADDW_256 ); 1342 DO_D( VPHADDD_256 ); 1343 DO_D( VPHADDSW_256 ); 1344 DO_D( VPMADDUBSW_256 ); 1345 DO_D( VPHSUBW_256 ); 1346 DO_D( VPHSUBD_256 ); 1347 DO_D( VPHSUBSW_256 ); 1348 DO_D( VPABSB_256 ); 1349 DO_D( VPABSW_256 ); 1350 DO_D( VPMOVSXBQ_256 ); 1351 DO_D( VPMOVSXWQ_256 ); 1352 DO_D( VPACKUSDW_256 ); 1353 DO_D( VPMOVZXBQ_256 ); 1354 DO_D( VPMOVZXWQ_256 ); 1355 DO_D( VPMOVZXDQ_256 ); 1356 DO_D( VMPSADBW_256_0x0 ); 1357 DO_D( VMPSADBW_256_0x39 ); 1358 DO_D( VMPSADBW_256_0x32 ); 1359 DO_D( VMPSADBW_256_0x2b ); 1360 DO_D( VMPSADBW_256_0x24 ); 1361 DO_D( VMPSADBW_256_0x1d ); 1362 DO_D( VMPSADBW_256_0x16 ); 1363 DO_D( VMPSADBW_256_0x0f ); 1364 DO_D( VPSADBW_256 ); 1365 DO_D( VPSIGNB_256 ); 1366 DO_D( VPSIGNW_256 ); 1367 DO_D( VPSIGND_256 ); 1368 DO_D( VPMULHRSW_256 ); 1369 DO_D( VBROADCASTI128 ); 1370 DO_D( VEXTRACTI128_0x0 ); 1371 DO_D( VEXTRACTI128_0x1 ); 1372 DO_D( VINSERTI128_0x0 ); 1373 DO_D( VINSERTI128_0x1 ); 1374 DO_D( VPERM2I128_0x00 ); 1375 DO_D( VPERM2I128_0xFF ); 1376 DO_D( VPERM2I128_0x30 ); 1377 DO_D( VPERM2I128_0x21 ); 1378 DO_D( VPERM2I128_0x12 ); 1379 DO_D( VPERM2I128_0x03 ); 1380 DO_D( VPERM2I128_0x85 ); 1381 DO_D( VPERM2I128_0x5A ); 1382 DO_D( VBROADCASTSS_128 ); 1383 DO_D( VBROADCASTSS_256 ); 1384 DO_D( VBROADCASTSD_256 ); 1385 DO_D( VPERMD ); 1386 DO_D( VPERMQ_0x00 ); 1387 DO_D( VPERMQ_0xFE ); 1388 DO_D( VPERMQ_0x30 ); 1389 DO_D( VPERMQ_0x21 ); 1390 DO_D( VPERMQ_0xD7 ); 1391 DO_D( VPERMQ_0xB5 ); 1392 DO_D( VPERMQ_0x85 ); 1393 DO_D( VPERMQ_0x29 ); 1394 DO_D( VPERMPS ); 1395 DO_D( VPERMPD_0x00 ); 1396 DO_D( VPERMPD_0xFE ); 1397 DO_D( VPERMPD_0x30 ); 1398 DO_D( VPERMPD_0x21 ); 1399 DO_D( VPERMPD_0xD7 ); 1400 DO_D( VPERMPD_0xB5 ); 1401 DO_D( VPERMPD_0x85 ); 1402 DO_D( VPERMPD_0x29 ); 1403 DO_D( VPBLENDD_128_0x00 ); 1404 DO_D( VPBLENDD_128_0x02 ); 1405 DO_D( VPBLENDD_128_0x04 ); 1406 DO_D( VPBLENDD_128_0x06 ); 1407 DO_D( VPBLENDD_128_0x08 ); 1408 DO_D( VPBLENDD_128_0x0A ); 1409 DO_D( VPBLENDD_128_0x0C ); 1410 DO_D( VPBLENDD_128_0x0E ); 1411 DO_D( VPBLENDD_256_0x00 ); 1412 DO_D( VPBLENDD_256_0xFE ); 1413 DO_D( VPBLENDD_256_0x30 ); 1414 DO_D( VPBLENDD_256_0x21 ); 1415 DO_D( VPBLENDD_256_0xD7 ); 1416 DO_D( VPBLENDD_256_0xB5 ); 1417 DO_D( VPBLENDD_256_0x85 ); 1418 DO_D( VPBLENDD_256_0x29 ); 1419 DO_D( VPSLLVD_128 ); 1420 DO_D( VPSLLVD_256 ); 1421 DO_D( VPSLLVQ_128 ); 1422 DO_D( VPSLLVQ_256 ); 1423 DO_D( VPSRLVD_128 ); 1424 DO_D( VPSRLVD_256 ); 1425 DO_D( VPSRLVQ_128 ); 1426 DO_D( VPSRLVQ_256 ); 1427 DO_D( VPSRAVD_128 ); 1428 DO_D( VPSRAVD_256 ); 1429 DO_D( VPBROADCASTB_128 ); 1430 DO_D( VPBROADCASTB_256 ); 1431 DO_D( VPBROADCASTW_128 ); 1432 DO_D( VPBROADCASTW_256 ); 1433 DO_D( VPBROADCASTD_128 ); 1434 DO_D( VPBROADCASTD_256 ); 1435 DO_D( VPBROADCASTQ_128 ); 1436 DO_D( VPBROADCASTQ_256 ); 1437 DO_D( VPMASKMOVD_128_LoadForm ); 1438 DO_D( VPMASKMOVD_256_LoadForm ); 1439 DO_D( VPMASKMOVQ_128_LoadForm ); 1440 DO_D( VPMASKMOVQ_256_LoadForm ); 1441 { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); } 1442 DO_D( VGATHERDPS_128 ); 1443 DO_D( VGATHERDPS_256 ); 1444 DO_D( VGATHERQPS_128_1 ); 1445 DO_D( VGATHERQPS_256_1 ); 1446 DO_D( VGATHERQPS_128_2 ); 1447 DO_D( VGATHERQPS_256_2 ); 1448 DO_D( VGATHERDPD_128 ); 1449 DO_D( VGATHERDPD_256 ); 1450 DO_D( VGATHERQPD_128_1 ); 1451 DO_D( VGATHERQPD_256_1 ); 1452 DO_D( VGATHERQPD_128_2 ); 1453 DO_D( VGATHERQPD_256_2 ); 1454 DO_D( VPGATHERDD_128 ); 1455 DO_D( VPGATHERDD_256 ); 1456 DO_D( VPGATHERQD_128_1 ); 1457 DO_D( VPGATHERQD_256_1 ); 1458 DO_D( VPGATHERQD_128_2 ); 1459 DO_D( VPGATHERQD_256_2 ); 1460 DO_D( VPGATHERDQ_128 ); 1461 DO_D( VPGATHERDQ_256 ); 1462 DO_D( VPGATHERQQ_128_1 ); 1463 DO_D( VPGATHERQQ_256_1 ); 1464 DO_D( VPGATHERQQ_128_2 ); 1465 DO_D( VPGATHERQQ_256_2 ); 1466 return 0; 1467 } 1468