1 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <assert.h> 5 #include "tests/malloc.h" 6 7 typedef unsigned char UChar; 8 typedef unsigned int UInt; 9 typedef unsigned long int UWord; 10 typedef unsigned long long int ULong; 11 12 #if defined(VGO_darwin) 13 UChar randArray[1027] __attribute__((used)); 14 #else 15 UChar _randArray[1027] __attribute__((used)); 16 #endif 17 18 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) 19 20 typedef union { UChar u8[32]; UInt u32[8]; } YMM; 21 22 typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; 23 24 void showYMM ( YMM* vec ) 25 { 26 int i; 27 assert(IS_32_ALIGNED(vec)); 28 for (i = 31; i >= 0; i--) { 29 printf("%02x", (UInt)vec->u8[i]); 30 if (i > 0 && 0 == ((i+0) & 7)) printf("."); 31 } 32 } 33 34 void showBlock ( char* msg, Block* block ) 35 { 36 printf(" %s\n", msg); 37 printf(" "); showYMM(&block->a1); printf("\n"); 38 printf(" "); showYMM(&block->a2); printf("\n"); 39 printf(" "); showYMM(&block->a3); printf("\n"); 40 printf(" "); showYMM(&block->a4); printf("\n"); 41 printf(" %016llx\n", block->u64); 42 } 43 44 UChar randUChar ( void ) 45 { 46 static UInt seed = 80021; 47 seed = 1103515245 * seed + 12345; 48 return (seed >> 17) & 0xFF; 49 } 50 51 void randBlock ( Block* b ) 52 { 53 int i; 54 UChar* p = (UChar*)b; 55 for (i = 0; i < sizeof(Block); i++) 56 p[i] = randUChar(); 57 } 58 59 60 /* Generate a function test_NAME, that tests the given insn, in both 61 its mem and reg forms. The reg form of the insn may mention, as 62 operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of 63 the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 64 and %r14. It's OK for the insn to clobber ymm0, as this is needed 65 for testing PCMPxSTRx, and ymm6, as this is needed for testing 66 MOVMASK variants. */ 67 68 #define GEN_test_RandM(_name, _reg_form, _mem_form) \ 69 \ 70 __attribute__ ((noinline)) static void test_##_name ( void ) \ 71 { \ 72 Block* b = memalign32(sizeof(Block)); \ 73 randBlock(b); \ 74 printf("%s(reg)\n", #_name); \ 75 showBlock("before", b); \ 76 __asm__ __volatile__( \ 77 "vmovdqa 0(%0),%%ymm7" "\n\t" \ 78 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 79 "vmovdqa 64(%0),%%ymm6" "\n\t" \ 80 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 81 "movq 128(%0),%%r14" "\n\t" \ 82 _reg_form "\n\t" \ 83 "vmovdqa %%ymm7, 0(%0)" "\n\t" \ 84 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 85 "vmovdqa %%ymm6, 64(%0)" "\n\t" \ 86 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 87 "movq %%r14, 128(%0)" "\n\t" \ 88 : /*OUT*/ \ 89 : /*IN*/"r"(b) \ 90 : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ 91 ); \ 92 showBlock("after", b); \ 93 randBlock(b); \ 94 printf("%s(mem)\n", #_name); \ 95 showBlock("before", b); \ 96 __asm__ __volatile__( \ 97 "leaq 0(%0),%%rax" "\n\t" \ 98 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 99 "vmovdqa 64(%0),%%ymm7" "\n\t" \ 100 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 101 "movq 128(%0),%%r14" "\n\t" \ 102 _mem_form "\n\t" \ 103 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 104 "vmovdqa %%ymm7, 64(%0)" "\n\t" \ 105 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 106 "movq %%r14, 128(%0)" "\n\t" \ 107 : /*OUT*/ \ 108 : /*IN*/"r"(b) \ 109 : /*TRASH*/"xmm6", \ 110 "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ 111 ); \ 112 showBlock("after", b); \ 113 printf("\n"); \ 114 free(b); \ 115 } 116 117 #define GEN_test_Ronly(_name, _reg_form) \ 118 GEN_test_RandM(_name, _reg_form, "") 119 #define GEN_test_Monly(_name, _mem_form) \ 120 GEN_test_RandM(_name, "", _mem_form) 121 122 /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */ 123 124 GEN_test_RandM(VPOR_256, 125 "vpor %%ymm6, %%ymm8, %%ymm7", 126 "vpor (%%rax), %%ymm8, %%ymm7") 127 128 GEN_test_RandM(VPXOR_256, 129 "vpxor %%ymm6, %%ymm8, %%ymm7", 130 "vpxor (%%rax), %%ymm8, %%ymm7") 131 132 GEN_test_RandM(VPSUBB_256, 133 "vpsubb %%ymm6, %%ymm8, %%ymm7", 134 "vpsubb (%%rax), %%ymm8, %%ymm7") 135 136 GEN_test_RandM(VPSUBD_256, 137 "vpsubd %%ymm6, %%ymm8, %%ymm7", 138 "vpsubd (%%rax), %%ymm8, %%ymm7") 139 140 GEN_test_RandM(VPADDD_256, 141 "vpaddd %%ymm6, %%ymm8, %%ymm7", 142 "vpaddd (%%rax), %%ymm8, %%ymm7") 143 144 GEN_test_RandM(VPMOVZXWD_256, 145 "vpmovzxwd %%xmm6, %%ymm8", 146 "vpmovzxwd (%%rax), %%ymm8") 147 148 GEN_test_RandM(VPMOVZXBW_256, 149 "vpmovzxbw %%xmm6, %%ymm8", 150 "vpmovzxbw (%%rax), %%ymm8") 151 152 GEN_test_RandM(VPBLENDVB_256, 153 "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7", 154 "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7") 155 156 GEN_test_RandM(VPMINSD_256, 157 "vpminsd %%ymm6, %%ymm8, %%ymm7", 158 "vpminsd (%%rax), %%ymm8, %%ymm7") 159 160 GEN_test_RandM(VPMAXSD_256, 161 "vpmaxsd %%ymm6, %%ymm8, %%ymm7", 162 "vpmaxsd (%%rax), %%ymm8, %%ymm7") 163 164 GEN_test_RandM(VPSHUFB_256, 165 "vpshufb %%ymm6, %%ymm8, %%ymm7", 166 "vpshufb (%%rax), %%ymm8, %%ymm7") 167 168 GEN_test_RandM(VPUNPCKLBW_256, 169 "vpunpcklbw %%ymm6, %%ymm8, %%ymm7", 170 "vpunpcklbw (%%rax), %%ymm8, %%ymm7") 171 172 GEN_test_RandM(VPUNPCKHBW_256, 173 "vpunpckhbw %%ymm6, %%ymm8, %%ymm7", 174 "vpunpckhbw (%%rax), %%ymm8, %%ymm7") 175 176 GEN_test_RandM(VPABSD_256, 177 "vpabsd %%ymm6, %%ymm8", 178 "vpabsd (%%rax), %%ymm8") 179 180 GEN_test_RandM(VPACKUSWB_256, 181 "vpackuswb %%ymm9, %%ymm8, %%ymm7", 182 "vpackuswb (%%rax), %%ymm8, %%ymm7") 183 184 GEN_test_Ronly(VPMOVMSKB_256, 185 "vpmovmskb %%ymm8, %%r14") 186 187 GEN_test_RandM(VPAND_256, 188 "vpand %%ymm9, %%ymm8, %%ymm7", 189 "vpand (%%rax), %%ymm8, %%ymm7") 190 191 GEN_test_RandM(VPCMPEQB_256, 192 "vpcmpeqb %%ymm9, %%ymm8, %%ymm7", 193 "vpcmpeqb (%%rax), %%ymm8, %%ymm7") 194 195 GEN_test_RandM(VPSHUFLW_0x39_256, 196 "vpshuflw $0x39, %%ymm9, %%ymm7", 197 "vpshuflw $0xC6, (%%rax), %%ymm8") 198 199 GEN_test_RandM(VPSHUFHW_0x39_256, 200 "vpshufhw $0x39, %%ymm9, %%ymm7", 201 "vpshufhw $0xC6, (%%rax), %%ymm8") 202 203 GEN_test_RandM(VPMULLW_256, 204 "vpmullw %%ymm9, %%ymm8, %%ymm7", 205 "vpmullw (%%rax), %%ymm8, %%ymm7") 206 207 GEN_test_RandM(VPADDUSW_256, 208 "vpaddusw %%ymm9, %%ymm8, %%ymm7", 209 "vpaddusw (%%rax), %%ymm8, %%ymm7") 210 211 GEN_test_RandM(VPMULHUW_256, 212 "vpmulhuw %%ymm9, %%ymm8, %%ymm7", 213 "vpmulhuw (%%rax), %%ymm8, %%ymm7") 214 215 GEN_test_RandM(VPADDUSB_256, 216 "vpaddusb %%ymm9, %%ymm8, %%ymm7", 217 "vpaddusb (%%rax), %%ymm8, %%ymm7") 218 219 GEN_test_RandM(VPUNPCKLWD_256, 220 "vpunpcklwd %%ymm6, %%ymm8, %%ymm7", 221 "vpunpcklwd (%%rax), %%ymm8, %%ymm7") 222 223 GEN_test_RandM(VPUNPCKHWD_256, 224 "vpunpckhwd %%ymm6, %%ymm8, %%ymm7", 225 "vpunpckhwd (%%rax), %%ymm8, %%ymm7") 226 227 GEN_test_Ronly(VPSLLD_0x05_256, 228 "vpslld $0x5, %%ymm9, %%ymm7") 229 230 GEN_test_Ronly(VPSRLD_0x05_256, 231 "vpsrld $0x5, %%ymm9, %%ymm7") 232 233 GEN_test_Ronly(VPSRAD_0x05_256, 234 "vpsrad $0x5, %%ymm9, %%ymm7") 235 236 GEN_test_RandM(VPSUBUSB_256, 237 "vpsubusb %%ymm9, %%ymm8, %%ymm7", 238 "vpsubusb (%%rax), %%ymm8, %%ymm7") 239 240 GEN_test_RandM(VPSUBSB_256, 241 "vpsubsb %%ymm9, %%ymm8, %%ymm7", 242 "vpsubsb (%%rax), %%ymm8, %%ymm7") 243 244 GEN_test_Ronly(VPSRLDQ_0x05_256, 245 "vpsrldq $0x5, %%ymm9, %%ymm7") 246 247 GEN_test_Ronly(VPSLLDQ_0x05_256, 248 "vpslldq $0x5, %%ymm9, %%ymm7") 249 250 GEN_test_RandM(VPANDN_256, 251 "vpandn %%ymm9, %%ymm8, %%ymm7", 252 "vpandn (%%rax), %%ymm8, %%ymm7") 253 254 GEN_test_RandM(VPUNPCKLQDQ_256, 255 "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7", 256 "vpunpcklqdq (%%rax), %%ymm8, %%ymm7") 257 258 GEN_test_Ronly(VPSRLW_0x05_256, 259 "vpsrlw $0x5, %%ymm9, %%ymm7") 260 261 GEN_test_Ronly(VPSLLW_0x05_256, 262 "vpsllw $0x5, %%ymm9, %%ymm7") 263 264 GEN_test_RandM(VPADDW_256, 265 "vpaddw %%ymm6, %%ymm8, %%ymm7", 266 "vpaddw (%%rax), %%ymm8, %%ymm7") 267 268 GEN_test_RandM(VPACKSSDW_256, 269 "vpackssdw %%ymm9, %%ymm8, %%ymm7", 270 "vpackssdw (%%rax), %%ymm8, %%ymm7") 271 272 GEN_test_RandM(VPUNPCKLDQ_256, 273 "vpunpckldq %%ymm6, %%ymm8, %%ymm7", 274 "vpunpckldq (%%rax), %%ymm8, %%ymm7") 275 276 GEN_test_RandM(VPCMPEQD_256, 277 "vpcmpeqd %%ymm6, %%ymm8, %%ymm7", 278 "vpcmpeqd (%%rax), %%ymm8, %%ymm7") 279 280 GEN_test_RandM(VPSHUFD_0x39_256, 281 "vpshufd $0x39, %%ymm9, %%ymm8", 282 "vpshufd $0xC6, (%%rax), %%ymm7") 283 284 GEN_test_RandM(VPADDQ_256, 285 "vpaddq %%ymm6, %%ymm8, %%ymm7", 286 "vpaddq (%%rax), %%ymm8, %%ymm7") 287 288 GEN_test_RandM(VPSUBQ_256, 289 "vpsubq %%ymm6, %%ymm8, %%ymm7", 290 "vpsubq (%%rax), %%ymm8, %%ymm7") 291 292 GEN_test_RandM(VPSUBW_256, 293 "vpsubw %%ymm6, %%ymm8, %%ymm7", 294 "vpsubw (%%rax), %%ymm8, %%ymm7") 295 296 GEN_test_RandM(VPCMPEQQ_256, 297 "vpcmpeqq %%ymm6, %%ymm8, %%ymm7", 298 "vpcmpeqq (%%rax), %%ymm8, %%ymm7") 299 300 GEN_test_RandM(VPCMPGTQ_256, 301 "vpcmpgtq %%ymm6, %%ymm8, %%ymm7", 302 "vpcmpgtq (%%rax), %%ymm8, %%ymm7") 303 304 GEN_test_Ronly(VPSRLQ_0x05_256, 305 "vpsrlq $0x5, %%ymm9, %%ymm7") 306 307 GEN_test_RandM(VPMULUDQ_256, 308 "vpmuludq %%ymm6, %%ymm8, %%ymm7", 309 "vpmuludq (%%rax), %%ymm8, %%ymm7") 310 311 GEN_test_RandM(VPMULDQ_256, 312 "vpmuldq %%ymm6, %%ymm8, %%ymm7", 313 "vpmuldq (%%rax), %%ymm8, %%ymm7") 314 315 GEN_test_Ronly(VPSLLQ_0x05_256, 316 "vpsllq $0x5, %%ymm9, %%ymm7") 317 318 GEN_test_RandM(VPMAXUD_256, 319 "vpmaxud %%ymm6, %%ymm8, %%ymm7", 320 "vpmaxud (%%rax), %%ymm8, %%ymm7") 321 322 GEN_test_RandM(VPMINUD_256, 323 "vpminud %%ymm6, %%ymm8, %%ymm7", 324 "vpminud (%%rax), %%ymm8, %%ymm7") 325 326 GEN_test_RandM(VPMULLD_256, 327 "vpmulld %%ymm6, %%ymm8, %%ymm7", 328 "vpmulld (%%rax), %%ymm8, %%ymm7") 329 330 GEN_test_RandM(VPMAXUW_256, 331 "vpmaxuw %%ymm6, %%ymm8, %%ymm7", 332 "vpmaxuw (%%rax), %%ymm8, %%ymm7") 333 334 GEN_test_RandM(VPMINUW_256, 335 "vpminuw %%ymm6, %%ymm8, %%ymm7", 336 "vpminuw (%%rax), %%ymm8, %%ymm7") 337 338 GEN_test_RandM(VPMAXSW_256, 339 "vpmaxsw %%ymm6, %%ymm8, %%ymm7", 340 "vpmaxsw (%%rax), %%ymm8, %%ymm7") 341 342 GEN_test_RandM(VPMINSW_256, 343 "vpminsw %%ymm6, %%ymm8, %%ymm7", 344 "vpminsw (%%rax), %%ymm8, %%ymm7") 345 346 GEN_test_RandM(VPMAXUB_256, 347 "vpmaxub %%ymm6, %%ymm8, %%ymm7", 348 "vpmaxub (%%rax), %%ymm8, %%ymm7") 349 350 GEN_test_RandM(VPMINUB_256, 351 "vpminub %%ymm6, %%ymm8, %%ymm7", 352 "vpminub (%%rax), %%ymm8, %%ymm7") 353 354 GEN_test_RandM(VPMAXSB_256, 355 "vpmaxsb %%ymm6, %%ymm8, %%ymm7", 356 "vpmaxsb (%%rax), %%ymm8, %%ymm7") 357 358 GEN_test_RandM(VPMINSB_256, 359 "vpminsb %%ymm6, %%ymm8, %%ymm7", 360 "vpminsb (%%rax), %%ymm8, %%ymm7") 361 362 GEN_test_RandM(VPMOVSXBW_256, 363 "vpmovsxbw %%xmm6, %%ymm8", 364 "vpmovsxbw (%%rax), %%ymm8") 365 366 GEN_test_RandM(VPSUBUSW_256, 367 "vpsubusw %%ymm9, %%ymm8, %%ymm7", 368 "vpsubusw (%%rax), %%ymm8, %%ymm7") 369 370 GEN_test_RandM(VPSUBSW_256, 371 "vpsubsw %%ymm9, %%ymm8, %%ymm7", 372 "vpsubsw (%%rax), %%ymm8, %%ymm7") 373 374 GEN_test_RandM(VPCMPEQW_256, 375 "vpcmpeqw %%ymm6, %%ymm8, %%ymm7", 376 "vpcmpeqw (%%rax), %%ymm8, %%ymm7") 377 378 GEN_test_RandM(VPADDB_256, 379 "vpaddb %%ymm6, %%ymm8, %%ymm7", 380 "vpaddb (%%rax), %%ymm8, %%ymm7") 381 382 GEN_test_RandM(VPUNPCKHDQ_256, 383 "vpunpckhdq %%ymm6, %%ymm8, %%ymm7", 384 "vpunpckhdq (%%rax), %%ymm8, %%ymm7") 385 386 GEN_test_RandM(VPMOVSXDQ_256, 387 "vpmovsxdq %%xmm6, %%ymm8", 388 "vpmovsxdq (%%rax), %%ymm8") 389 390 GEN_test_RandM(VPMOVSXWD_256, 391 "vpmovsxwd %%xmm6, %%ymm8", 392 "vpmovsxwd (%%rax), %%ymm8") 393 394 GEN_test_RandM(VPMULHW_256, 395 "vpmulhw %%ymm9, %%ymm8, %%ymm7", 396 "vpmulhw (%%rax), %%ymm8, %%ymm7") 397 398 GEN_test_RandM(VPUNPCKHQDQ_256, 399 "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7", 400 "vpunpckhqdq (%%rax), %%ymm8, %%ymm7") 401 402 GEN_test_Ronly(VPSRAW_0x05_256, 403 "vpsraw $0x5, %%ymm9, %%ymm7") 404 405 GEN_test_RandM(VPCMPGTB_256, 406 "vpcmpgtb %%ymm6, %%ymm8, %%ymm7", 407 "vpcmpgtb (%%rax), %%ymm8, %%ymm7") 408 409 GEN_test_RandM(VPCMPGTW_256, 410 "vpcmpgtw %%ymm6, %%ymm8, %%ymm7", 411 "vpcmpgtw (%%rax), %%ymm8, %%ymm7") 412 413 GEN_test_RandM(VPCMPGTD_256, 414 "vpcmpgtd %%ymm6, %%ymm8, %%ymm7", 415 "vpcmpgtd (%%rax), %%ymm8, %%ymm7") 416 417 GEN_test_RandM(VPMOVZXBD_256, 418 "vpmovzxbd %%xmm6, %%ymm8", 419 "vpmovzxbd (%%rax), %%ymm8") 420 421 GEN_test_RandM(VPMOVSXBD_256, 422 "vpmovsxbd %%xmm6, %%ymm8", 423 "vpmovsxbd (%%rax), %%ymm8") 424 425 GEN_test_RandM(VPALIGNR_256_1of3, 426 "vpalignr $0, %%ymm6, %%ymm8, %%ymm7", 427 "vpalignr $3, (%%rax), %%ymm8, %%ymm7") 428 GEN_test_RandM(VPALIGNR_256_2of3, 429 "vpalignr $6, %%ymm6, %%ymm8, %%ymm7", 430 "vpalignr $9, (%%rax), %%ymm8, %%ymm7") 431 GEN_test_RandM(VPALIGNR_256_3of3, 432 "vpalignr $12, %%ymm6, %%ymm8, %%ymm7", 433 "vpalignr $15, (%%rax), %%ymm8, %%ymm7") 434 435 GEN_test_RandM(VPBLENDW_256_0x00, 436 "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7", 437 "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7") 438 GEN_test_RandM(VPBLENDW_256_0xFE, 439 "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7", 440 "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7") 441 GEN_test_RandM(VPBLENDW_256_0x30, 442 "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7", 443 "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7") 444 GEN_test_RandM(VPBLENDW_256_0x21, 445 "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7", 446 "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7") 447 GEN_test_RandM(VPBLENDW_256_0xD7, 448 "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7", 449 "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7") 450 GEN_test_RandM(VPBLENDW_256_0xB5, 451 "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7", 452 "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7") 453 GEN_test_RandM(VPBLENDW_256_0x85, 454 "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7", 455 "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7") 456 GEN_test_RandM(VPBLENDW_256_0x29, 457 "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7", 458 "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7") 459 460 GEN_test_RandM(VPSLLW_256, 461 "andl $15, %%r14d;" 462 "vmovd %%r14d, %%xmm6;" 463 "vpsllw %%xmm6, %%ymm8, %%ymm9", 464 "andq $15, 128(%%rax);" 465 "vpsllw 128(%%rax), %%ymm8, %%ymm9") 466 467 GEN_test_RandM(VPSRLW_256, 468 "andl $15, %%r14d;" 469 "vmovd %%r14d, %%xmm6;" 470 "vpsrlw %%xmm6, %%ymm8, %%ymm9", 471 "andq $15, 128(%%rax);" 472 "vpsrlw 128(%%rax), %%ymm8, %%ymm9") 473 474 GEN_test_RandM(VPSRAW_256, 475 "andl $31, %%r14d;" 476 "vmovd %%r14d, %%xmm6;" 477 "vpsraw %%xmm6, %%ymm8, %%ymm9", 478 "andq $15, 128(%%rax);" 479 "vpsraw 128(%%rax), %%ymm8, %%ymm9") 480 481 GEN_test_RandM(VPSLLD_256, 482 "andl $31, %%r14d;" 483 "vmovd %%r14d, %%xmm6;" 484 "vpslld %%xmm6, %%ymm8, %%ymm9", 485 "andq $31, 128(%%rax);" 486 "vpslld 128(%%rax), %%ymm8, %%ymm9") 487 488 GEN_test_RandM(VPSRLD_256, 489 "andl $31, %%r14d;" 490 "vmovd %%r14d, %%xmm6;" 491 "vpsrld %%xmm6, %%ymm8, %%ymm9", 492 "andq $31, 128(%%rax);" 493 "vpsrld 128(%%rax), %%ymm8, %%ymm9") 494 495 GEN_test_RandM(VPSRAD_256, 496 "andl $31, %%r14d;" 497 "vmovd %%r14d, %%xmm6;" 498 "vpsrad %%xmm6, %%ymm8, %%ymm9", 499 "andq $31, 128(%%rax);" 500 "vpsrad 128(%%rax), %%ymm8, %%ymm9") 501 502 GEN_test_RandM(VPSLLQ_256, 503 "andl $63, %%r14d;" 504 "vmovd %%r14d, %%xmm6;" 505 "vpsllq %%xmm6, %%ymm8, %%ymm9", 506 "andq $63, 128(%%rax);" 507 "vpsllq 128(%%rax), %%ymm8, %%ymm9") 508 509 GEN_test_RandM(VPSRLQ_256, 510 "andl $63, %%r14d;" 511 "vmovd %%r14d, %%xmm6;" 512 "vpsrlq %%xmm6, %%ymm8, %%ymm9", 513 "andq $63, 128(%%rax);" 514 "vpsrlq 128(%%rax), %%ymm8, %%ymm9") 515 516 GEN_test_RandM(VPMADDWD_256, 517 "vpmaddwd %%ymm6, %%ymm8, %%ymm7", 518 "vpmaddwd (%%rax), %%ymm8, %%ymm7") 519 520 GEN_test_Monly(VMOVNTDQA_256, 521 "vmovntdqa (%%rax), %%ymm9") 522 523 GEN_test_RandM(VPACKSSWB_256, 524 "vpacksswb %%ymm6, %%ymm8, %%ymm7", 525 "vpacksswb (%%rax), %%ymm8, %%ymm7") 526 527 GEN_test_RandM(VPAVGB_256, 528 "vpavgb %%ymm6, %%ymm8, %%ymm7", 529 "vpavgb (%%rax), %%ymm8, %%ymm7") 530 531 GEN_test_RandM(VPAVGW_256, 532 "vpavgw %%ymm6, %%ymm8, %%ymm7", 533 "vpavgw (%%rax), %%ymm8, %%ymm7") 534 535 GEN_test_RandM(VPADDSB_256, 536 "vpaddsb %%ymm6, %%ymm8, %%ymm7", 537 "vpaddsb (%%rax), %%ymm8, %%ymm7") 538 539 GEN_test_RandM(VPADDSW_256, 540 "vpaddsw %%ymm6, %%ymm8, %%ymm7", 541 "vpaddsw (%%rax), %%ymm8, %%ymm7") 542 543 GEN_test_RandM(VPHADDW_256, 544 "vphaddw %%ymm6, %%ymm8, %%ymm7", 545 "vphaddw (%%rax), %%ymm8, %%ymm7") 546 547 GEN_test_RandM(VPHADDD_256, 548 "vphaddd %%ymm6, %%ymm8, %%ymm7", 549 "vphaddd (%%rax), %%ymm8, %%ymm7") 550 551 GEN_test_RandM(VPHADDSW_256, 552 "vphaddsw %%ymm6, %%ymm8, %%ymm7", 553 "vphaddsw (%%rax), %%ymm8, %%ymm7") 554 555 GEN_test_RandM(VPMADDUBSW_256, 556 "vpmaddubsw %%ymm6, %%ymm8, %%ymm7", 557 "vpmaddubsw (%%rax), %%ymm8, %%ymm7") 558 559 GEN_test_RandM(VPHSUBW_256, 560 "vphsubw %%ymm6, %%ymm8, %%ymm7", 561 "vphsubw (%%rax), %%ymm8, %%ymm7") 562 563 GEN_test_RandM(VPHSUBD_256, 564 "vphsubd %%ymm6, %%ymm8, %%ymm7", 565 "vphsubd (%%rax), %%ymm8, %%ymm7") 566 567 GEN_test_RandM(VPHSUBSW_256, 568 "vphsubsw %%ymm6, %%ymm8, %%ymm7", 569 "vphsubsw (%%rax), %%ymm8, %%ymm7") 570 571 GEN_test_RandM(VPABSB_256, 572 "vpabsb %%ymm6, %%ymm7", 573 "vpabsb (%%rax), %%ymm7") 574 575 GEN_test_RandM(VPABSW_256, 576 "vpabsw %%ymm6, %%ymm7", 577 "vpabsw (%%rax), %%ymm7") 578 579 GEN_test_RandM(VPMOVSXBQ_256, 580 "vpmovsxbq %%xmm6, %%ymm8", 581 "vpmovsxbq (%%rax), %%ymm8") 582 583 GEN_test_RandM(VPMOVSXWQ_256, 584 "vpmovsxwq %%xmm6, %%ymm8", 585 "vpmovsxwq (%%rax), %%ymm8") 586 587 GEN_test_RandM(VPACKUSDW_256, 588 "vpackusdw %%ymm6, %%ymm8, %%ymm7", 589 "vpackusdw (%%rax), %%ymm8, %%ymm7") 590 591 GEN_test_RandM(VPMOVZXBQ_256, 592 "vpmovzxbq %%xmm6, %%ymm8", 593 "vpmovzxbq (%%rax), %%ymm8") 594 595 GEN_test_RandM(VPMOVZXWQ_256, 596 "vpmovzxwq %%xmm6, %%ymm8", 597 "vpmovzxwq (%%rax), %%ymm8") 598 599 GEN_test_RandM(VPMOVZXDQ_256, 600 "vpmovzxdq %%xmm6, %%ymm8", 601 "vpmovzxdq (%%rax), %%ymm8") 602 603 GEN_test_RandM(VMPSADBW_256_0x0, 604 "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7", 605 "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7") 606 GEN_test_RandM(VMPSADBW_256_0x39, 607 "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7", 608 "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7") 609 GEN_test_RandM(VMPSADBW_256_0x32, 610 "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7", 611 "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7") 612 GEN_test_RandM(VMPSADBW_256_0x2b, 613 "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7", 614 "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7") 615 GEN_test_RandM(VMPSADBW_256_0x24, 616 "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7", 617 "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7") 618 GEN_test_RandM(VMPSADBW_256_0x1d, 619 "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7", 620 "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7") 621 GEN_test_RandM(VMPSADBW_256_0x16, 622 "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7", 623 "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7") 624 GEN_test_RandM(VMPSADBW_256_0x0f, 625 "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7", 626 "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7") 627 628 GEN_test_RandM(VPSADBW_256, 629 "vpsadbw %%ymm6, %%ymm8, %%ymm7", 630 "vpsadbw (%%rax), %%ymm8, %%ymm7") 631 632 GEN_test_RandM(VPSIGNB_256, 633 "vpsignb %%ymm6, %%ymm8, %%ymm7", 634 "vpsignb (%%rax), %%ymm8, %%ymm7") 635 636 GEN_test_RandM(VPSIGNW_256, 637 "vpsignw %%ymm6, %%ymm8, %%ymm7", 638 "vpsignw (%%rax), %%ymm8, %%ymm7") 639 640 GEN_test_RandM(VPSIGND_256, 641 "vpsignd %%ymm6, %%ymm8, %%ymm7", 642 "vpsignd (%%rax), %%ymm8, %%ymm7") 643 644 GEN_test_RandM(VPMULHRSW_256, 645 "vpmulhrsw %%ymm6, %%ymm8, %%ymm7", 646 "vpmulhrsw (%%rax), %%ymm8, %%ymm7") 647 648 /* Instructions new in AVX2. */ 649 650 GEN_test_Monly(VBROADCASTI128, 651 "vbroadcasti128 (%%rax), %%ymm9") 652 653 GEN_test_RandM(VEXTRACTI128_0x0, 654 "vextracti128 $0x0, %%ymm7, %%xmm9", 655 "vextracti128 $0x0, %%ymm7, (%%rax)") 656 657 GEN_test_RandM(VEXTRACTI128_0x1, 658 "vextracti128 $0x1, %%ymm7, %%xmm9", 659 "vextracti128 $0x1, %%ymm7, (%%rax)") 660 661 GEN_test_RandM(VINSERTI128_0x0, 662 "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8", 663 "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8") 664 665 GEN_test_RandM(VINSERTI128_0x1, 666 "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8", 667 "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8") 668 669 GEN_test_RandM(VPERM2I128_0x00, 670 "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7", 671 "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7") 672 GEN_test_RandM(VPERM2I128_0xFF, 673 "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7", 674 "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7") 675 GEN_test_RandM(VPERM2I128_0x30, 676 "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7", 677 "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7") 678 GEN_test_RandM(VPERM2I128_0x21, 679 "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7", 680 "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7") 681 GEN_test_RandM(VPERM2I128_0x12, 682 "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7", 683 "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7") 684 GEN_test_RandM(VPERM2I128_0x03, 685 "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7", 686 "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7") 687 GEN_test_RandM(VPERM2I128_0x85, 688 "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7", 689 "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7") 690 GEN_test_RandM(VPERM2I128_0x5A, 691 "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7", 692 "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7") 693 694 GEN_test_Ronly(VBROADCASTSS_128, 695 "vbroadcastss %%xmm9, %%xmm7") 696 697 GEN_test_Ronly(VBROADCASTSS_256, 698 "vbroadcastss %%xmm9, %%ymm7") 699 700 GEN_test_Ronly(VBROADCASTSD_256, 701 "vbroadcastsd %%xmm9, %%ymm7") 702 703 GEN_test_RandM(VPERMD, 704 "vpermd %%ymm6, %%ymm7, %%ymm9", 705 "vpermd (%%rax), %%ymm7, %%ymm9") 706 707 GEN_test_RandM(VPERMQ_0x00, 708 "vpermq $0x00, %%ymm6, %%ymm7", 709 "vpermq $0x01, (%%rax), %%ymm7") 710 GEN_test_RandM(VPERMQ_0xFE, 711 "vpermq $0xFE, %%ymm6, %%ymm7", 712 "vpermq $0xFF, (%%rax), %%ymm7") 713 GEN_test_RandM(VPERMQ_0x30, 714 "vpermq $0x30, %%ymm6, %%ymm7", 715 "vpermq $0x03, (%%rax), %%ymm7") 716 GEN_test_RandM(VPERMQ_0x21, 717 "vpermq $0x21, %%ymm6, %%ymm7", 718 "vpermq $0x12, (%%rax), %%ymm7") 719 GEN_test_RandM(VPERMQ_0xD7, 720 "vpermq $0xD7, %%ymm6, %%ymm7", 721 "vpermq $0x6C, (%%rax), %%ymm7") 722 GEN_test_RandM(VPERMQ_0xB5, 723 "vpermq $0xB5, %%ymm6, %%ymm7", 724 "vpermq $0x4A, (%%rax), %%ymm7") 725 GEN_test_RandM(VPERMQ_0x85, 726 "vpermq $0x85, %%ymm6, %%ymm7", 727 "vpermq $0xDC, (%%rax), %%ymm7") 728 GEN_test_RandM(VPERMQ_0x29, 729 "vpermq $0x29, %%ymm6, %%ymm7", 730 "vpermq $0x92, (%%rax), %%ymm7") 731 732 GEN_test_RandM(VPERMPS, 733 "vpermps %%ymm6, %%ymm7, %%ymm9", 734 "vpermps (%%rax), %%ymm7, %%ymm9") 735 736 GEN_test_RandM(VPERMPD_0x00, 737 "vpermpd $0x00, %%ymm6, %%ymm7", 738 "vpermpd $0x01, (%%rax), %%ymm7") 739 GEN_test_RandM(VPERMPD_0xFE, 740 "vpermpd $0xFE, %%ymm6, %%ymm7", 741 "vpermpd $0xFF, (%%rax), %%ymm7") 742 GEN_test_RandM(VPERMPD_0x30, 743 "vpermpd $0x30, %%ymm6, %%ymm7", 744 "vpermpd $0x03, (%%rax), %%ymm7") 745 GEN_test_RandM(VPERMPD_0x21, 746 "vpermpd $0x21, %%ymm6, %%ymm7", 747 "vpermpd $0x12, (%%rax), %%ymm7") 748 GEN_test_RandM(VPERMPD_0xD7, 749 "vpermpd $0xD7, %%ymm6, %%ymm7", 750 "vpermpd $0x6C, (%%rax), %%ymm7") 751 GEN_test_RandM(VPERMPD_0xB5, 752 "vpermpd $0xB5, %%ymm6, %%ymm7", 753 "vpermpd $0x4A, (%%rax), %%ymm7") 754 GEN_test_RandM(VPERMPD_0x85, 755 "vpermpd $0x85, %%ymm6, %%ymm7", 756 "vpermpd $0xDC, (%%rax), %%ymm7") 757 GEN_test_RandM(VPERMPD_0x29, 758 "vpermpd $0x29, %%ymm6, %%ymm7", 759 "vpermpd $0x92, (%%rax), %%ymm7") 760 761 GEN_test_RandM(VPBLENDD_128_0x00, 762 "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7", 763 "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7") 764 GEN_test_RandM(VPBLENDD_128_0x02, 765 "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7", 766 "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7") 767 GEN_test_RandM(VPBLENDD_128_0x04, 768 "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7", 769 "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7") 770 GEN_test_RandM(VPBLENDD_128_0x06, 771 "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7", 772 "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7") 773 GEN_test_RandM(VPBLENDD_128_0x08, 774 "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7", 775 "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7") 776 GEN_test_RandM(VPBLENDD_128_0x0A, 777 "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7", 778 "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7") 779 GEN_test_RandM(VPBLENDD_128_0x0C, 780 "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7", 781 "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7") 782 GEN_test_RandM(VPBLENDD_128_0x0E, 783 "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7", 784 "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7") 785 786 GEN_test_RandM(VPBLENDD_256_0x00, 787 "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7", 788 "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7") 789 GEN_test_RandM(VPBLENDD_256_0xFE, 790 "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7", 791 "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7") 792 GEN_test_RandM(VPBLENDD_256_0x30, 793 "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7", 794 "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7") 795 GEN_test_RandM(VPBLENDD_256_0x21, 796 "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7", 797 "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7") 798 GEN_test_RandM(VPBLENDD_256_0xD7, 799 "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7", 800 "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7") 801 GEN_test_RandM(VPBLENDD_256_0xB5, 802 "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7", 803 "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7") 804 GEN_test_RandM(VPBLENDD_256_0x85, 805 "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7", 806 "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7") 807 GEN_test_RandM(VPBLENDD_256_0x29, 808 "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7", 809 "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7") 810 811 GEN_test_RandM(VPSLLVD_128, 812 "vpslld $27, %%xmm6, %%xmm6;" 813 "vpsrld $27, %%xmm6, %%xmm6;" 814 "vpsllvd %%xmm6, %%xmm8, %%xmm7", 815 "andl $31, (%%rax);" 816 "andl $31, 4(%%rax);" 817 "andl $31, 8(%%rax);" 818 "vpsllvd (%%rax), %%xmm8, %%xmm7") 819 820 GEN_test_RandM(VPSLLVD_256, 821 "vpslld $27, %%ymm6, %%ymm6;" 822 "vpsrld $27, %%ymm6, %%ymm6;" 823 "vpsllvd %%ymm6, %%ymm8, %%ymm7", 824 "andl $31, (%%rax);" 825 "andl $31, 4(%%rax);" 826 "andl $31, 8(%%rax);" 827 "andl $31, 16(%%rax);" 828 "andl $31, 20(%%rax);" 829 "andl $31, 24(%%rax);" 830 "vpsllvd (%%rax), %%ymm8, %%ymm7") 831 832 GEN_test_RandM(VPSLLVQ_128, 833 "vpsllq $58, %%xmm6, %%xmm6;" 834 "vpsrlq $58, %%xmm6, %%xmm6;" 835 "vpsllvq %%xmm6, %%xmm8, %%xmm7", 836 "andl $63, (%%rax);" 837 "vpsllvq (%%rax), %%xmm8, %%xmm7") 838 839 GEN_test_RandM(VPSLLVQ_256, 840 "vpsllq $58, %%ymm6, %%ymm6;" 841 "vpsrlq $58, %%ymm6, %%ymm6;" 842 "vpsllvq %%ymm6, %%ymm8, %%ymm7", 843 "andl $63, (%%rax);" 844 "andl $63, 8(%%rax);" 845 "andl $63, 16(%%rax);" 846 "vpsllvq (%%rax), %%ymm8, %%ymm7") 847 848 GEN_test_RandM(VPSRLVD_128, 849 "vpslld $27, %%xmm6, %%xmm6;" 850 "vpsrld $27, %%xmm6, %%xmm6;" 851 "vpsrlvd %%xmm6, %%xmm8, %%xmm7", 852 "andl $31, (%%rax);" 853 "andl $31, 4(%%rax);" 854 "andl $31, 8(%%rax);" 855 "vpsrlvd (%%rax), %%xmm8, %%xmm7") 856 857 GEN_test_RandM(VPSRLVD_256, 858 "vpslld $27, %%ymm6, %%ymm6;" 859 "vpsrld $27, %%ymm6, %%ymm6;" 860 "vpsrlvd %%ymm6, %%ymm8, %%ymm7", 861 "andl $31, (%%rax);" 862 "andl $31, 4(%%rax);" 863 "andl $31, 8(%%rax);" 864 "andl $31, 16(%%rax);" 865 "andl $31, 20(%%rax);" 866 "andl $31, 24(%%rax);" 867 "vpsrlvd (%%rax), %%ymm8, %%ymm7") 868 869 GEN_test_RandM(VPSRLVQ_128, 870 "vpsllq $58, %%xmm6, %%xmm6;" 871 "vpsrlq $58, %%xmm6, %%xmm6;" 872 "vpsrlvq %%xmm6, %%xmm8, %%xmm7", 873 "andl $63, (%%rax);" 874 "vpsrlvq (%%rax), %%xmm8, %%xmm7") 875 876 GEN_test_RandM(VPSRLVQ_256, 877 "vpsllq $58, %%ymm6, %%ymm6;" 878 "vpsrlq $58, %%ymm6, %%ymm6;" 879 "vpsrlvq %%ymm6, %%ymm8, %%ymm7", 880 "andl $63, (%%rax);" 881 "andl $63, 8(%%rax);" 882 "andl $63, 16(%%rax);" 883 "vpsrlvq (%%rax), %%ymm8, %%ymm7") 884 885 GEN_test_RandM(VPSRAVD_128, 886 "vpslld $27, %%xmm6, %%xmm6;" 887 "vpsrld $27, %%xmm6, %%xmm6;" 888 "vpsravd %%xmm6, %%xmm8, %%xmm7", 889 "andl $31, (%%rax);" 890 "andl $31, 4(%%rax);" 891 "andl $31, 8(%%rax);" 892 "vpsravd (%%rax), %%xmm8, %%xmm7") 893 894 GEN_test_RandM(VPSRAVD_256, 895 "vpslld $27, %%ymm6, %%ymm6;" 896 "vpsrld $27, %%ymm6, %%ymm6;" 897 "vpsravd %%ymm6, %%ymm8, %%ymm7", 898 "andl $31, (%%rax);" 899 "andl $31, 4(%%rax);" 900 "andl $31, 8(%%rax);" 901 "andl $31, 16(%%rax);" 902 "andl $31, 20(%%rax);" 903 "andl $31, 24(%%rax);" 904 "vpsravd (%%rax), %%ymm8, %%ymm7") 905 906 GEN_test_RandM(VPBROADCASTB_128, 907 "vpbroadcastb %%xmm9, %%xmm7", 908 "vpbroadcastb (%%rax), %%xmm7") 909 910 GEN_test_RandM(VPBROADCASTB_256, 911 "vpbroadcastb %%xmm9, %%ymm7", 912 "vpbroadcastb (%%rax), %%ymm7") 913 914 GEN_test_RandM(VPBROADCASTW_128, 915 "vpbroadcastw %%xmm9, %%xmm7", 916 "vpbroadcastw (%%rax), %%xmm7") 917 918 GEN_test_RandM(VPBROADCASTW_256, 919 "vpbroadcastw %%xmm9, %%ymm7", 920 "vpbroadcastw (%%rax), %%ymm7") 921 922 GEN_test_RandM(VPBROADCASTD_128, 923 "vpbroadcastd %%xmm9, %%xmm7", 924 "vpbroadcastd (%%rax), %%xmm7") 925 926 GEN_test_RandM(VPBROADCASTD_256, 927 "vpbroadcastd %%xmm9, %%ymm7", 928 "vpbroadcastd (%%rax), %%ymm7") 929 930 GEN_test_RandM(VPBROADCASTQ_128, 931 "vpbroadcastq %%xmm9, %%xmm7", 932 "vpbroadcastq (%%rax), %%xmm7") 933 934 GEN_test_RandM(VPBROADCASTQ_256, 935 "vpbroadcastq %%xmm9, %%ymm7", 936 "vpbroadcastq (%%rax), %%ymm7") 937 938 GEN_test_Monly(VPMASKMOVD_128_LoadForm, 939 "vpmaskmovd (%%rax), %%xmm8, %%xmm7;" 940 "vxorps %%xmm6, %%xmm6, %%xmm6;" 941 "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9") 942 943 GEN_test_Monly(VPMASKMOVD_256_LoadForm, 944 "vpmaskmovd (%%rax), %%ymm8, %%ymm7;" 945 "vxorps %%ymm6, %%ymm6, %%ymm6;" 946 "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9") 947 948 GEN_test_Monly(VPMASKMOVQ_128_LoadForm, 949 "vpmaskmovq (%%rax), %%xmm8, %%xmm7;" 950 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 951 "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9") 952 953 GEN_test_Monly(VPMASKMOVQ_256_LoadForm, 954 "vpmaskmovq (%%rax), %%ymm8, %%ymm7;" 955 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 956 "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9") 957 958 GEN_test_Monly(VPMASKMOVD_128_StoreForm, 959 "vpmaskmovd %%xmm8, %%xmm7, (%%rax);" 960 "vxorps %%xmm6, %%xmm6, %%xmm6;" 961 "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)") 962 963 GEN_test_Monly(VPMASKMOVD_256_StoreForm, 964 "vpmaskmovd %%ymm8, %%ymm7, (%%rax);" 965 "vxorps %%ymm6, %%ymm6, %%ymm6;" 966 "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)") 967 968 GEN_test_Monly(VPMASKMOVQ_128_StoreForm, 969 "vpmaskmovq %%xmm8, %%xmm7, (%%rax);" 970 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 971 "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)") 972 973 GEN_test_Monly(VPMASKMOVQ_256_StoreForm, 974 "vpmaskmovq %%ymm8, %%ymm7, (%%rax);" 975 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 976 "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)") 977 978 GEN_test_Ronly(VGATHERDPS_128, 979 "vpslld $25, %%xmm7, %%xmm8;" 980 "vpsrld $25, %%xmm8, %%xmm8;" 981 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 982 "leaq _randArray(%%rip), %%r14;" 983 "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 984 "xorl %%r14d, %%r14d") 985 986 GEN_test_Ronly(VGATHERDPS_256, 987 "vpslld $25, %%ymm7, %%ymm8;" 988 "vpsrld $25, %%ymm8, %%ymm8;" 989 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 990 "leaq _randArray(%%rip), %%r14;" 991 "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" 992 "xorl %%r14d, %%r14d") 993 994 GEN_test_Ronly(VGATHERQPS_128_1, 995 "vpsllq $57, %%xmm7, %%xmm8;" 996 "vpsrlq $57, %%xmm8, %%xmm8;" 997 "vpmovsxdq %%xmm6, %%xmm9;" 998 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 999 "vmovdqa 96(%0), %%ymm9;" 1000 "leaq _randArray(%%rip), %%r14;" 1001 "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1002 "xorl %%r14d, %%r14d") 1003 1004 GEN_test_Ronly(VGATHERQPS_256_1, 1005 "vpsllq $57, %%ymm7, %%ymm8;" 1006 "vpsrlq $57, %%ymm8, %%ymm8;" 1007 "vpmovsxdq %%xmm6, %%ymm9;" 1008 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1009 "vmovdqa 96(%0), %%ymm9;" 1010 "leaq _randArray(%%rip), %%r14;" 1011 "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" 1012 "xorl %%r14d, %%r14d") 1013 1014 GEN_test_Ronly(VGATHERQPS_128_2, 1015 "vpsllq $57, %%xmm7, %%xmm8;" 1016 "vpsrlq $57, %%xmm8, %%xmm8;" 1017 "vpmovsxdq %%xmm6, %%xmm9;" 1018 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1019 "vmovdqa 96(%0), %%ymm9;" 1020 "leaq _randArray(%%rip), %%r14;" 1021 "vmovq %%r14, %%xmm7;" 1022 "vpsllq $2, %%xmm8, %%xmm8;" 1023 "vpbroadcastq %%xmm7, %%xmm7;" 1024 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1025 "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1026 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1027 "vmovdqa 0(%0), %%ymm7;" 1028 "xorl %%r14d, %%r14d") 1029 1030 GEN_test_Ronly(VGATHERQPS_256_2, 1031 "vpsllq $57, %%ymm7, %%ymm8;" 1032 "vpsrlq $57, %%ymm8, %%ymm8;" 1033 "vpmovsxdq %%xmm6, %%ymm9;" 1034 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1035 "vmovdqa 96(%0), %%ymm9;" 1036 "leaq _randArray(%%rip), %%r14;" 1037 "vmovq %%r14, %%xmm7;" 1038 "vpsllq $2, %%ymm8, %%ymm8;" 1039 "vpbroadcastq %%xmm7, %%ymm7;" 1040 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1041 "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;" 1042 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1043 "vmovdqa 0(%0), %%ymm7;" 1044 "xorl %%r14d, %%r14d") 1045 1046 GEN_test_Ronly(VGATHERDPD_128, 1047 "vpslld $26, %%xmm7, %%xmm8;" 1048 "vpsrld $26, %%xmm8, %%xmm8;" 1049 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" 1050 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1051 "vmovdqa 96(%0), %%ymm9;" 1052 "leaq _randArray(%%rip), %%r14;" 1053 "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1054 "xorl %%r14d, %%r14d") 1055 1056 GEN_test_Ronly(VGATHERDPD_256, 1057 "vpslld $26, %%ymm7, %%ymm8;" 1058 "vpsrld $26, %%ymm8, %%ymm8;" 1059 "vextracti128 $1, %%ymm6, %%xmm9;" 1060 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" 1061 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1062 "vmovdqa 96(%0), %%ymm9;" 1063 "leaq _randArray(%%rip), %%r14;" 1064 "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" 1065 "xorl %%r14d, %%r14d") 1066 1067 GEN_test_Ronly(VGATHERQPD_128_1, 1068 "vpsllq $58, %%xmm7, %%xmm8;" 1069 "vpsrlq $58, %%xmm8, %%xmm8;" 1070 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1071 "leaq _randArray(%%rip), %%r14;" 1072 "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1073 "xorl %%r14d, %%r14d") 1074 1075 GEN_test_Ronly(VGATHERQPD_256_1, 1076 "vpsllq $58, %%ymm7, %%ymm8;" 1077 "vpsrlq $58, %%ymm8, %%ymm8;" 1078 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1079 "leaq _randArray(%%rip), %%r14;" 1080 "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" 1081 "xorl %%r14d, %%r14d") 1082 1083 GEN_test_Ronly(VGATHERQPD_128_2, 1084 "vpsllq $58, %%xmm7, %%xmm8;" 1085 "vpsrlq $58, %%xmm8, %%xmm8;" 1086 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1087 "leaq _randArray(%%rip), %%r14;" 1088 "vmovq %%r14, %%xmm7;" 1089 "vpsllq $2, %%xmm8, %%xmm8;" 1090 "vpbroadcastq %%xmm7, %%xmm7;" 1091 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1092 "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1093 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1094 "vmovdqa 0(%0), %%ymm7;" 1095 "xorl %%r14d, %%r14d") 1096 1097 GEN_test_Ronly(VGATHERQPD_256_2, 1098 "vpsllq $58, %%ymm7, %%ymm8;" 1099 "vpsrlq $58, %%ymm8, %%ymm8;" 1100 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1101 "leaq _randArray(%%rip), %%r14;" 1102 "vmovq %%r14, %%xmm7;" 1103 "vpsllq $2, %%ymm8, %%ymm8;" 1104 "vpbroadcastq %%xmm7, %%ymm7;" 1105 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1106 "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;" 1107 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1108 "vmovdqa 0(%0), %%ymm7;" 1109 "xorl %%r14d, %%r14d") 1110 1111 GEN_test_Ronly(VPGATHERDD_128, 1112 "vpslld $25, %%xmm7, %%xmm8;" 1113 "vpsrld $25, %%xmm8, %%xmm8;" 1114 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1115 "leaq _randArray(%%rip), %%r14;" 1116 "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1117 "xorl %%r14d, %%r14d") 1118 1119 GEN_test_Ronly(VPGATHERDD_256, 1120 "vpslld $25, %%ymm7, %%ymm8;" 1121 "vpsrld $25, %%ymm8, %%ymm8;" 1122 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1123 "leaq _randArray(%%rip), %%r14;" 1124 "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" 1125 "xorl %%r14d, %%r14d") 1126 1127 GEN_test_Ronly(VPGATHERQD_128_1, 1128 "vpsllq $57, %%xmm7, %%xmm8;" 1129 "vpsrlq $57, %%xmm8, %%xmm8;" 1130 "vpmovsxdq %%xmm6, %%xmm9;" 1131 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1132 "vmovdqa 96(%0), %%ymm9;" 1133 "leaq _randArray(%%rip), %%r14;" 1134 "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1135 "xorl %%r14d, %%r14d") 1136 1137 GEN_test_Ronly(VPGATHERQD_256_1, 1138 "vpsllq $57, %%ymm7, %%ymm8;" 1139 "vpsrlq $57, %%ymm8, %%ymm8;" 1140 "vpmovsxdq %%xmm6, %%ymm9;" 1141 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1142 "vmovdqa 96(%0), %%ymm9;" 1143 "leaq _randArray(%%rip), %%r14;" 1144 "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" 1145 "xorl %%r14d, %%r14d") 1146 1147 GEN_test_Ronly(VPGATHERQD_128_2, 1148 "vpsllq $57, %%xmm7, %%xmm8;" 1149 "vpsrlq $57, %%xmm8, %%xmm8;" 1150 "vpmovsxdq %%xmm6, %%xmm9;" 1151 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1152 "vmovdqa 96(%0), %%ymm9;" 1153 "leaq _randArray(%%rip), %%r14;" 1154 "vmovq %%r14, %%xmm7;" 1155 "vpsllq $2, %%xmm8, %%xmm8;" 1156 "vpbroadcastq %%xmm7, %%xmm7;" 1157 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1158 "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1159 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1160 "vmovdqa 0(%0), %%ymm7;" 1161 "xorl %%r14d, %%r14d") 1162 1163 GEN_test_Ronly(VPGATHERQD_256_2, 1164 "vpsllq $57, %%ymm7, %%ymm8;" 1165 "vpsrlq $57, %%ymm8, %%ymm8;" 1166 "vpmovsxdq %%xmm6, %%ymm9;" 1167 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1168 "vmovdqa 96(%0), %%ymm9;" 1169 "leaq _randArray(%%rip), %%r14;" 1170 "vmovq %%r14, %%xmm7;" 1171 "vpsllq $2, %%ymm8, %%ymm8;" 1172 "vpbroadcastq %%xmm7, %%ymm7;" 1173 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1174 "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;" 1175 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1176 "vmovdqa 0(%0), %%ymm7;" 1177 "xorl %%r14d, %%r14d") 1178 1179 GEN_test_Ronly(VPGATHERDQ_128, 1180 "vpslld $26, %%xmm7, %%xmm8;" 1181 "vpsrld $26, %%xmm8, %%xmm8;" 1182 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" 1183 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1184 "vmovdqa 96(%0), %%ymm9;" 1185 "leaq _randArray(%%rip), %%r14;" 1186 "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1187 "xorl %%r14d, %%r14d") 1188 1189 GEN_test_Ronly(VPGATHERDQ_256, 1190 "vpslld $26, %%ymm7, %%ymm8;" 1191 "vpsrld $26, %%ymm8, %%ymm8;" 1192 "vextracti128 $1, %%ymm6, %%xmm9;" 1193 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" 1194 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1195 "vmovdqa 96(%0), %%ymm9;" 1196 "leaq _randArray(%%rip), %%r14;" 1197 "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" 1198 "xorl %%r14d, %%r14d") 1199 1200 GEN_test_Ronly(VPGATHERQQ_128_1, 1201 "vpsllq $58, %%xmm7, %%xmm8;" 1202 "vpsrlq $58, %%xmm8, %%xmm8;" 1203 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1204 "leaq _randArray(%%rip), %%r14;" 1205 "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1206 "xorl %%r14d, %%r14d") 1207 1208 GEN_test_Ronly(VPGATHERQQ_256_1, 1209 "vpsllq $58, %%ymm7, %%ymm8;" 1210 "vpsrlq $58, %%ymm8, %%ymm8;" 1211 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1212 "leaq _randArray(%%rip), %%r14;" 1213 "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" 1214 "xorl %%r14d, %%r14d") 1215 1216 GEN_test_Ronly(VPGATHERQQ_128_2, 1217 "vpsllq $58, %%xmm7, %%xmm8;" 1218 "vpsrlq $58, %%xmm8, %%xmm8;" 1219 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1220 "leaq _randArray(%%rip), %%r14;" 1221 "vmovq %%r14, %%xmm7;" 1222 "vpsllq $2, %%xmm8, %%xmm8;" 1223 "vpbroadcastq %%xmm7, %%xmm7;" 1224 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1225 "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1226 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1227 "vmovdqa 0(%0), %%ymm7;" 1228 "xorl %%r14d, %%r14d") 1229 1230 GEN_test_Ronly(VPGATHERQQ_256_2, 1231 "vpsllq $58, %%ymm7, %%ymm8;" 1232 "vpsrlq $58, %%ymm8, %%ymm8;" 1233 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1234 "leaq _randArray(%%rip), %%r14;" 1235 "vmovq %%r14, %%xmm7;" 1236 "vpsllq $2, %%ymm8, %%ymm8;" 1237 "vpbroadcastq %%xmm7, %%ymm7;" 1238 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1239 "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;" 1240 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1241 "vmovdqa 0(%0), %%ymm7;" 1242 "xorl %%r14d, %%r14d") 1243 1244 /* Comment duplicated above, for convenient reference: 1245 Allowed operands in test insns: 1246 Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. 1247 Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. 1248 Imm8 etc fields are also allowed, where they make sense. 1249 Both forms may use ymm0 as scratch. Mem form may also use 1250 ymm6 as scratch. 1251 */ 1252 1253 #define N_DEFAULT_ITERS 3 1254 1255 // Do the specified test some number of times 1256 #define DO_N(_iters, _testfn) \ 1257 do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) 1258 1259 // Do the specified test the default number of times 1260 #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) 1261 1262 1263 int main ( void ) 1264 { 1265 DO_D( VPOR_256 ); 1266 DO_D( VPXOR_256 ); 1267 DO_D( VPSUBB_256 ); 1268 DO_D( VPSUBD_256 ); 1269 DO_D( VPADDD_256 ); 1270 DO_D( VPMOVZXWD_256 ); 1271 DO_D( VPMOVZXBW_256 ); 1272 DO_D( VPBLENDVB_256 ); 1273 DO_D( VPMINSD_256 ); 1274 DO_D( VPMAXSD_256 ); 1275 DO_D( VPSHUFB_256 ); 1276 DO_D( VPUNPCKLBW_256 ); 1277 DO_D( VPUNPCKHBW_256 ); 1278 DO_D( VPABSD_256 ); 1279 DO_D( VPACKUSWB_256 ); 1280 DO_D( VPMOVMSKB_256 ); 1281 DO_D( VPAND_256 ); 1282 DO_D( VPCMPEQB_256 ); 1283 DO_D( VPSHUFLW_0x39_256 ); 1284 DO_D( VPSHUFHW_0x39_256 ); 1285 DO_D( VPMULLW_256 ); 1286 DO_D( VPADDUSW_256 ); 1287 DO_D( VPMULHUW_256 ); 1288 DO_D( VPADDUSB_256 ); 1289 DO_D( VPUNPCKLWD_256 ); 1290 DO_D( VPUNPCKHWD_256 ); 1291 DO_D( VPSLLD_0x05_256 ); 1292 DO_D( VPSRLD_0x05_256 ); 1293 DO_D( VPSRAD_0x05_256 ); 1294 DO_D( VPSUBUSB_256 ); 1295 DO_D( VPSUBSB_256 ); 1296 DO_D( VPSRLDQ_0x05_256 ); 1297 DO_D( VPSLLDQ_0x05_256 ); 1298 DO_D( VPANDN_256 ); 1299 DO_D( VPUNPCKLQDQ_256 ); 1300 DO_D( VPSRLW_0x05_256 ); 1301 DO_D( VPSLLW_0x05_256 ); 1302 DO_D( VPADDW_256 ); 1303 DO_D( VPACKSSDW_256 ); 1304 DO_D( VPUNPCKLDQ_256 ); 1305 DO_D( VPCMPEQD_256 ); 1306 DO_D( VPSHUFD_0x39_256 ); 1307 DO_D( VPADDQ_256 ); 1308 DO_D( VPSUBQ_256 ); 1309 DO_D( VPSUBW_256 ); 1310 DO_D( VPCMPEQQ_256 ); 1311 DO_D( VPCMPGTQ_256 ); 1312 DO_D( VPSRLQ_0x05_256 ); 1313 DO_D( VPMULUDQ_256 ); 1314 DO_D( VPMULDQ_256 ); 1315 DO_D( VPSLLQ_0x05_256 ); 1316 DO_D( VPMAXUD_256 ); 1317 DO_D( VPMINUD_256 ); 1318 DO_D( VPMULLD_256 ); 1319 DO_D( VPMAXUW_256 ); 1320 DO_D( VPMINUW_256 ); 1321 DO_D( VPMAXSW_256 ); 1322 DO_D( VPMINSW_256 ); 1323 DO_D( VPMAXUB_256 ); 1324 DO_D( VPMINUB_256 ); 1325 DO_D( VPMAXSB_256 ); 1326 DO_D( VPMINSB_256 ); 1327 DO_D( VPMOVSXBW_256 ); 1328 DO_D( VPSUBUSW_256 ); 1329 DO_D( VPSUBSW_256 ); 1330 DO_D( VPCMPEQW_256 ); 1331 DO_D( VPADDB_256 ); 1332 DO_D( VPUNPCKHDQ_256 ); 1333 DO_D( VPMOVSXDQ_256 ); 1334 DO_D( VPMOVSXWD_256 ); 1335 DO_D( VPMULHW_256 ); 1336 DO_D( VPUNPCKHQDQ_256 ); 1337 DO_D( VPSRAW_0x05_256 ); 1338 DO_D( VPCMPGTB_256 ); 1339 DO_D( VPCMPGTW_256 ); 1340 DO_D( VPCMPGTD_256 ); 1341 DO_D( VPMOVZXBD_256 ); 1342 DO_D( VPMOVSXBD_256 ); 1343 DO_D( VPALIGNR_256_1of3 ); 1344 DO_D( VPALIGNR_256_2of3 ); 1345 DO_D( VPALIGNR_256_3of3 ); 1346 DO_D( VPBLENDW_256_0x00 ); 1347 DO_D( VPBLENDW_256_0xFE ); 1348 DO_D( VPBLENDW_256_0x30 ); 1349 DO_D( VPBLENDW_256_0x21 ); 1350 DO_D( VPBLENDW_256_0xD7 ); 1351 DO_D( VPBLENDW_256_0xB5 ); 1352 DO_D( VPBLENDW_256_0x85 ); 1353 DO_D( VPBLENDW_256_0x29 ); 1354 DO_D( VPSLLW_256 ); 1355 DO_D( VPSRLW_256 ); 1356 DO_D( VPSRAW_256 ); 1357 DO_D( VPSLLD_256 ); 1358 DO_D( VPSRLD_256 ); 1359 DO_D( VPSRAD_256 ); 1360 DO_D( VPSLLQ_256 ); 1361 DO_D( VPSRLQ_256 ); 1362 DO_D( VPMADDWD_256 ); 1363 DO_D( VMOVNTDQA_256 ); 1364 DO_D( VPACKSSWB_256 ); 1365 DO_D( VPAVGB_256 ); 1366 DO_D( VPAVGW_256 ); 1367 DO_D( VPADDSB_256 ); 1368 DO_D( VPADDSW_256 ); 1369 DO_D( VPHADDW_256 ); 1370 DO_D( VPHADDD_256 ); 1371 DO_D( VPHADDSW_256 ); 1372 DO_D( VPMADDUBSW_256 ); 1373 DO_D( VPHSUBW_256 ); 1374 DO_D( VPHSUBD_256 ); 1375 DO_D( VPHSUBSW_256 ); 1376 DO_D( VPABSB_256 ); 1377 DO_D( VPABSW_256 ); 1378 DO_D( VPMOVSXBQ_256 ); 1379 DO_D( VPMOVSXWQ_256 ); 1380 DO_D( VPACKUSDW_256 ); 1381 DO_D( VPMOVZXBQ_256 ); 1382 DO_D( VPMOVZXWQ_256 ); 1383 DO_D( VPMOVZXDQ_256 ); 1384 DO_D( VMPSADBW_256_0x0 ); 1385 DO_D( VMPSADBW_256_0x39 ); 1386 DO_D( VMPSADBW_256_0x32 ); 1387 DO_D( VMPSADBW_256_0x2b ); 1388 DO_D( VMPSADBW_256_0x24 ); 1389 DO_D( VMPSADBW_256_0x1d ); 1390 DO_D( VMPSADBW_256_0x16 ); 1391 DO_D( VMPSADBW_256_0x0f ); 1392 DO_D( VPSADBW_256 ); 1393 DO_D( VPSIGNB_256 ); 1394 DO_D( VPSIGNW_256 ); 1395 DO_D( VPSIGND_256 ); 1396 DO_D( VPMULHRSW_256 ); 1397 DO_D( VBROADCASTI128 ); 1398 DO_D( VEXTRACTI128_0x0 ); 1399 DO_D( VEXTRACTI128_0x1 ); 1400 DO_D( VINSERTI128_0x0 ); 1401 DO_D( VINSERTI128_0x1 ); 1402 DO_D( VPERM2I128_0x00 ); 1403 DO_D( VPERM2I128_0xFF ); 1404 DO_D( VPERM2I128_0x30 ); 1405 DO_D( VPERM2I128_0x21 ); 1406 DO_D( VPERM2I128_0x12 ); 1407 DO_D( VPERM2I128_0x03 ); 1408 DO_D( VPERM2I128_0x85 ); 1409 DO_D( VPERM2I128_0x5A ); 1410 DO_D( VBROADCASTSS_128 ); 1411 DO_D( VBROADCASTSS_256 ); 1412 DO_D( VBROADCASTSD_256 ); 1413 DO_D( VPERMD ); 1414 DO_D( VPERMQ_0x00 ); 1415 DO_D( VPERMQ_0xFE ); 1416 DO_D( VPERMQ_0x30 ); 1417 DO_D( VPERMQ_0x21 ); 1418 DO_D( VPERMQ_0xD7 ); 1419 DO_D( VPERMQ_0xB5 ); 1420 DO_D( VPERMQ_0x85 ); 1421 DO_D( VPERMQ_0x29 ); 1422 DO_D( VPERMPS ); 1423 DO_D( VPERMPD_0x00 ); 1424 DO_D( VPERMPD_0xFE ); 1425 DO_D( VPERMPD_0x30 ); 1426 DO_D( VPERMPD_0x21 ); 1427 DO_D( VPERMPD_0xD7 ); 1428 DO_D( VPERMPD_0xB5 ); 1429 DO_D( VPERMPD_0x85 ); 1430 DO_D( VPERMPD_0x29 ); 1431 DO_D( VPBLENDD_128_0x00 ); 1432 DO_D( VPBLENDD_128_0x02 ); 1433 DO_D( VPBLENDD_128_0x04 ); 1434 DO_D( VPBLENDD_128_0x06 ); 1435 DO_D( VPBLENDD_128_0x08 ); 1436 DO_D( VPBLENDD_128_0x0A ); 1437 DO_D( VPBLENDD_128_0x0C ); 1438 DO_D( VPBLENDD_128_0x0E ); 1439 DO_D( VPBLENDD_256_0x00 ); 1440 DO_D( VPBLENDD_256_0xFE ); 1441 DO_D( VPBLENDD_256_0x30 ); 1442 DO_D( VPBLENDD_256_0x21 ); 1443 DO_D( VPBLENDD_256_0xD7 ); 1444 DO_D( VPBLENDD_256_0xB5 ); 1445 DO_D( VPBLENDD_256_0x85 ); 1446 DO_D( VPBLENDD_256_0x29 ); 1447 DO_D( VPSLLVD_128 ); 1448 DO_D( VPSLLVD_256 ); 1449 DO_D( VPSLLVQ_128 ); 1450 DO_D( VPSLLVQ_256 ); 1451 DO_D( VPSRLVD_128 ); 1452 DO_D( VPSRLVD_256 ); 1453 DO_D( VPSRLVQ_128 ); 1454 DO_D( VPSRLVQ_256 ); 1455 DO_D( VPSRAVD_128 ); 1456 DO_D( VPSRAVD_256 ); 1457 DO_D( VPBROADCASTB_128 ); 1458 DO_D( VPBROADCASTB_256 ); 1459 DO_D( VPBROADCASTW_128 ); 1460 DO_D( VPBROADCASTW_256 ); 1461 DO_D( VPBROADCASTD_128 ); 1462 DO_D( VPBROADCASTD_256 ); 1463 DO_D( VPBROADCASTQ_128 ); 1464 DO_D( VPBROADCASTQ_256 ); 1465 DO_D( VPMASKMOVD_128_LoadForm ); 1466 DO_D( VPMASKMOVD_256_LoadForm ); 1467 DO_D( VPMASKMOVQ_128_LoadForm ); 1468 DO_D( VPMASKMOVQ_256_LoadForm ); 1469 DO_D( VPMASKMOVD_128_StoreForm ); 1470 DO_D( VPMASKMOVD_256_StoreForm ); 1471 DO_D( VPMASKMOVQ_128_StoreForm ); 1472 DO_D( VPMASKMOVQ_256_StoreForm ); 1473 #if defined(VGO_darwin) 1474 { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); } 1475 #else 1476 { int i; for (i = 0; i < sizeof(_randArray); i++) _randArray[i] = randUChar(); } 1477 #endif 1478 DO_D( VGATHERDPS_128 ); 1479 DO_D( VGATHERDPS_256 ); 1480 DO_D( VGATHERQPS_128_1 ); 1481 DO_D( VGATHERQPS_256_1 ); 1482 DO_D( VGATHERQPS_128_2 ); 1483 DO_D( VGATHERQPS_256_2 ); 1484 DO_D( VGATHERDPD_128 ); 1485 DO_D( VGATHERDPD_256 ); 1486 DO_D( VGATHERQPD_128_1 ); 1487 DO_D( VGATHERQPD_256_1 ); 1488 DO_D( VGATHERQPD_128_2 ); 1489 DO_D( VGATHERQPD_256_2 ); 1490 DO_D( VPGATHERDD_128 ); 1491 DO_D( VPGATHERDD_256 ); 1492 DO_D( VPGATHERQD_128_1 ); 1493 DO_D( VPGATHERQD_256_1 ); 1494 DO_D( VPGATHERQD_128_2 ); 1495 DO_D( VPGATHERQD_256_2 ); 1496 DO_D( VPGATHERDQ_128 ); 1497 DO_D( VPGATHERDQ_256 ); 1498 DO_D( VPGATHERQQ_128_1 ); 1499 DO_D( VPGATHERQQ_256_1 ); 1500 DO_D( VPGATHERQQ_128_2 ); 1501 DO_D( VPGATHERQQ_256_2 ); 1502 return 0; 1503 } 1504