1 2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6 #include <string.h> 7 #include <stdio.h> 8 #include <assert.h> 9 10 typedef unsigned int UInt; 11 typedef signed int Int; 12 typedef unsigned char UChar; 13 typedef unsigned long long int ULong; 14 typedef UChar Bool; 15 #define False ((Bool)0) 16 #define True ((Bool)1) 17 18 //typedef unsigned char V128[16]; 19 typedef 20 union { 21 UChar uChar[16]; 22 UInt uInt[4]; 23 } 24 V128; 25 26 #define SHIFT_O 11 27 #define SHIFT_S 7 28 #define SHIFT_Z 6 29 #define SHIFT_A 4 30 #define SHIFT_C 0 31 #define SHIFT_P 2 32 33 #define MASK_O (1ULL << SHIFT_O) 34 #define MASK_S (1ULL << SHIFT_S) 35 #define MASK_Z (1ULL << SHIFT_Z) 36 #define MASK_A (1ULL << SHIFT_A) 37 #define MASK_C (1ULL << SHIFT_C) 38 #define MASK_P (1ULL << SHIFT_P) 39 40 41 UInt clz32 ( UInt x ) 42 { 43 Int y, m, n; 44 y = -(x >> 16); 45 m = (y >> 16) & 16; 46 n = 16 - m; 47 x = x >> m; 48 y = x - 0x100; 49 m = (y >> 16) & 8; 50 n = n + m; 51 x = x << m; 52 y = x - 0x1000; 53 m = (y >> 16) & 4; 54 n = n + m; 55 x = x << m; 56 y = x - 0x4000; 57 m = (y >> 16) & 2; 58 n = n + m; 59 x = x << m; 60 y = x >> 14; 61 m = y & ~(y >> 1); 62 return n + 2 - m; 63 } 64 65 UInt ctz32 ( UInt x ) 66 { 67 return 32 - clz32((~x) & (x-1)); 68 } 69 70 void expand ( V128* dst, char* summary ) 71 { 72 Int i; 73 assert( strlen(summary) == 16 ); 74 for (i = 0; i < 16; i++) { 75 UChar xx = 0; 76 UChar x = summary[15-i]; 77 if (x >= '0' && x <= '9') { xx = x - '0'; } 78 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 79 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 80 else assert(0); 81 82 assert(xx < 16); 83 xx = (xx << 4) | xx; 84 assert(xx < 256); 85 dst->uChar[i] = xx; 86 } 87 } 88 89 void try_istri ( char* which, 90 UInt(*h_fn)(V128*,V128*), 91 UInt(*s_fn)(V128*,V128*), 92 char* summL, char* summR ) 93 { 94 assert(strlen(which) == 2); 95 V128 argL, argR; 96 expand(&argL, summL); 97 expand(&argR, summR); 98 UInt h_res = h_fn(&argL, &argR); 99 UInt s_res = s_fn(&argL, &argR); 100 printf("istri %s %s %s -> %08x %08x %s\n", 101 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 102 } 103 104 UInt zmask_from_V128 ( V128* arg ) 105 { 106 UInt i, res = 0; 107 for (i = 0; i < 16; i++) { 108 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; 109 } 110 return res; 111 } 112 113 ////////////////////////////////////////////////////////// 114 // // 115 // GENERAL // 116 // // 117 ////////////////////////////////////////////////////////// 118 119 120 /* Given partial results from a pcmpXstrX operation (intRes1, 121 basically), generate an I format (index value for ECX) output, and 122 also the new OSZACP flags. 123 */ 124 static 125 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, 126 /*OUT*/UInt* resOSZACP, 127 UInt intRes1, 128 UInt zmaskL, UInt zmaskR, 129 UInt validL, 130 UInt pol, UInt idx ) 131 { 132 assert((pol >> 2) == 0); 133 assert((idx >> 1) == 0); 134 135 UInt intRes2 = 0; 136 switch (pol) { 137 case 0: intRes2 = intRes1; break; // pol + 138 case 1: intRes2 = ~intRes1; break; // pol - 139 case 2: intRes2 = intRes1; break; // pol m+ 140 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 141 } 142 intRes2 &= 0xFFFF; 143 144 // generate ecx value 145 UInt newECX = 0; 146 if (idx) { 147 // index of ms-1-bit 148 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 149 } else { 150 // index of ls-1-bit 151 newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 152 } 153 154 *(UInt*)(&resV[0]) = newECX; 155 156 // generate new flags, common to all ISTRI and ISTRM cases 157 *resOSZACP // A, P are zero 158 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 159 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 160 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 161 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 162 } 163 164 165 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 166 variants. 167 168 For xSTRI variants, the new ECX value is placed in the 32 bits 169 pointed to by *resV. For xSTRM variants, the result is a 128 bit 170 value and is placed at *resV in the obvious way. 171 172 For all variants, the new OSZACP value is placed at *resOSZACP. 173 174 argLV and argRV are the vector args. The caller must prepare a 175 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 176 must be 1 for each zero byte of of the respective arg. For ESTRx 177 variants this is derived from the explicit length indication, and 178 must be 0 in all places except at the bit index corresponding to 179 the valid length (0 .. 16). If the valid length is 16 then the 180 mask must be all zeroes. In all cases, bits 31:16 must be zero. 181 182 imm8 is the original immediate from the instruction. isSTRM 183 indicates whether this is a xSTRM or xSTRI variant, which controls 184 how much of *res is written. 185 186 If the given imm8 case can be handled, the return value is True. 187 If not, False is returned, and neither *res not *resOSZACP are 188 altered. 189 */ 190 191 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV, 192 /*OUT*/UInt* resOSZACP, 193 V128* argLV, V128* argRV, 194 UInt zmaskL, UInt zmaskR, 195 UInt imm8, Bool isSTRM ) 196 { 197 assert(imm8 < 0x80); 198 assert((zmaskL >> 16) == 0); 199 assert((zmaskR >> 16) == 0); 200 201 /* Explicitly reject any imm8 values that haven't been validated, 202 even if they would probably work. Life is too short to have 203 unvalidated cases in the code base. */ 204 switch (imm8) { 205 case 0x00: 206 case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A: 207 case 0x38: case 0x3A: case 0x44: case 0x4A: 208 break; 209 default: 210 return False; 211 } 212 213 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 214 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 215 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 216 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 217 218 /*----------------------------------------*/ 219 /*-- strcmp on byte data --*/ 220 /*----------------------------------------*/ 221 222 if (agg == 2/*equal each, aka strcmp*/ 223 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 224 && !isSTRM) { 225 Int i; 226 UChar* argL = (UChar*)argLV; 227 UChar* argR = (UChar*)argRV; 228 UInt boolResII = 0; 229 for (i = 15; i >= 0; i--) { 230 UChar cL = argL[i]; 231 UChar cR = argR[i]; 232 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 233 } 234 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 235 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 236 237 // do invalidation, common to all equal-each cases 238 UInt intRes1 239 = (boolResII & validL & validR) // if both valid, use cmpres 240 | (~ (validL | validR)); // if both invalid, force 1 241 // else force 0 242 intRes1 &= 0xFFFF; 243 244 // generate I-format output 245 pcmpXstrX_WRK_gen_output_fmt_I( 246 resV, resOSZACP, 247 intRes1, zmaskL, zmaskR, validL, pol, idx 248 ); 249 250 return True; 251 } 252 253 /*----------------------------------------*/ 254 /*-- set membership on byte data --*/ 255 /*----------------------------------------*/ 256 257 if (agg == 0/*equal any, aka find chars in a set*/ 258 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 259 && !isSTRM) { 260 /* argL: the string, argR: charset */ 261 UInt si, ci; 262 UChar* argL = (UChar*)argLV; 263 UChar* argR = (UChar*)argRV; 264 UInt boolRes = 0; 265 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 266 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 267 268 for (si = 0; si < 16; si++) { 269 if ((validL & (1 << si)) == 0) 270 // run off the end of the string. 271 break; 272 UInt m = 0; 273 for (ci = 0; ci < 16; ci++) { 274 if ((validR & (1 << ci)) == 0) break; 275 if (argR[ci] == argL[si]) { m = 1; break; } 276 } 277 boolRes |= (m << si); 278 } 279 280 // boolRes is "pre-invalidated" 281 UInt intRes1 = boolRes & 0xFFFF; 282 283 // generate I-format output 284 pcmpXstrX_WRK_gen_output_fmt_I( 285 resV, resOSZACP, 286 intRes1, zmaskL, zmaskR, validL, pol, idx 287 ); 288 289 return True; 290 } 291 292 /*----------------------------------------*/ 293 /*-- substring search on byte data --*/ 294 /*----------------------------------------*/ 295 296 if (agg == 3/*equal ordered, aka substring search*/ 297 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 298 && !isSTRM) { 299 300 /* argL: haystack, argR: needle */ 301 UInt ni, hi; 302 UChar* argL = (UChar*)argLV; 303 UChar* argR = (UChar*)argRV; 304 UInt boolRes = 0; 305 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 306 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 307 for (hi = 0; hi < 16; hi++) { 308 UInt m = 1; 309 for (ni = 0; ni < 16; ni++) { 310 if ((validR & (1 << ni)) == 0) break; 311 UInt i = ni + hi; 312 if (i >= 16) break; 313 if (argL[i] != argR[ni]) { m = 0; break; } 314 } 315 boolRes |= (m << hi); 316 if ((validL & (1 << hi)) == 0) 317 // run off the end of the haystack 318 break; 319 } 320 321 // boolRes is "pre-invalidated" 322 UInt intRes1 = boolRes & 0xFFFF; 323 324 // generate I-format output 325 pcmpXstrX_WRK_gen_output_fmt_I( 326 resV, resOSZACP, 327 intRes1, zmaskL, zmaskR, validL, pol, idx 328 ); 329 330 return True; 331 } 332 333 /*----------------------------------------*/ 334 /*-- ranges, unsigned byte data --*/ 335 /*----------------------------------------*/ 336 337 if (agg == 1/*ranges*/ 338 && fmt == 0/*ub*/ 339 && !isSTRM) { 340 341 /* argL: string, argR: range-pairs */ 342 UInt ri, si; 343 UChar* argL = (UChar*)argLV; 344 UChar* argR = (UChar*)argRV; 345 UInt boolRes = 0; 346 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 347 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 348 for (si = 0; si < 16; si++) { 349 if ((validL & (1 << si)) == 0) 350 // run off the end of the string 351 break; 352 UInt m = 0; 353 for (ri = 0; ri < 16; ri += 2) { 354 if ((validR & (3 << ri)) != (3 << ri)) break; 355 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 356 m = 1; break; 357 } 358 } 359 boolRes |= (m << si); 360 } 361 362 // boolRes is "pre-invalidated" 363 UInt intRes1 = boolRes & 0xFFFF; 364 365 // generate I-format output 366 pcmpXstrX_WRK_gen_output_fmt_I( 367 resV, resOSZACP, 368 intRes1, zmaskL, zmaskR, validL, pol, idx 369 ); 370 371 return True; 372 } 373 374 return False; 375 } 376 377 378 ////////////////////////////////////////////////////////// 379 // // 380 // ISTRI_4A // 381 // // 382 ////////////////////////////////////////////////////////// 383 384 UInt h_pcmpistri_4A ( V128* argL, V128* argR ) 385 { 386 V128 block[2]; 387 memcpy(&block[0], argL, sizeof(V128)); 388 memcpy(&block[1], argR, sizeof(V128)); 389 ULong res, flags; 390 __asm__ __volatile__( 391 "subq $1024, %%rsp" "\n\t" 392 "movdqu 0(%2), %%xmm2" "\n\t" 393 "movdqu 16(%2), %%xmm11" "\n\t" 394 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" 395 "pushfq" "\n\t" 396 "popq %%rdx" "\n\t" 397 "movq %%rcx, %0" "\n\t" 398 "movq %%rdx, %1" "\n\t" 399 "addq $1024, %%rsp" "\n\t" 400 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 401 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 402 ); 403 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 404 } 405 406 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) 407 { 408 V128 resV; 409 UInt resOSZACP, resECX; 410 Bool ok 411 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 412 zmask_from_V128(argLU), 413 zmask_from_V128(argRU), 414 0x4A, False/*!isSTRM*/ 415 ); 416 assert(ok); 417 resECX = resV.uInt[0]; 418 return (resOSZACP << 16) | resECX; 419 } 420 421 void istri_4A ( void ) 422 { 423 char* wot = "4A"; 424 UInt(*h)(V128*,V128*) = h_pcmpistri_4A; 425 UInt(*s)(V128*,V128*) = s_pcmpistri_4A; 426 427 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 428 429 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 430 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 431 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 432 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 433 434 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 435 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 436 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 437 438 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 439 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 440 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 441 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 442 443 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 444 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 445 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 446 447 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 448 449 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 450 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 451 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 452 453 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 454 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 455 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 456 457 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 458 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 459 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 460 461 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 462 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 463 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 464 465 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 466 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 467 } 468 469 ////////////////////////////////////////////////////////// 470 // // 471 // ISTRI_3A // 472 // // 473 ////////////////////////////////////////////////////////// 474 475 UInt h_pcmpistri_3A ( V128* argL, V128* argR ) 476 { 477 V128 block[2]; 478 memcpy(&block[0], argL, sizeof(V128)); 479 memcpy(&block[1], argR, sizeof(V128)); 480 ULong res, flags; 481 __asm__ __volatile__( 482 "subq $1024, %%rsp" "\n\t" 483 "movdqu 0(%2), %%xmm2" "\n\t" 484 "movdqu 16(%2), %%xmm11" "\n\t" 485 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" 486 "pushfq" "\n\t" 487 "popq %%rdx" "\n\t" 488 "movq %%rcx, %0" "\n\t" 489 "movq %%rdx, %1" "\n\t" 490 "addq $1024, %%rsp" "\n\t" 491 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 492 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 493 ); 494 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 495 } 496 497 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) 498 { 499 V128 resV; 500 UInt resOSZACP, resECX; 501 Bool ok 502 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 503 zmask_from_V128(argLU), 504 zmask_from_V128(argRU), 505 0x3A, False/*!isSTRM*/ 506 ); 507 assert(ok); 508 resECX = resV.uInt[0]; 509 return (resOSZACP << 16) | resECX; 510 } 511 512 void istri_3A ( void ) 513 { 514 char* wot = "3A"; 515 UInt(*h)(V128*,V128*) = h_pcmpistri_3A; 516 UInt(*s)(V128*,V128*) = s_pcmpistri_3A; 517 518 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 519 520 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 521 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 522 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 523 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 524 525 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 526 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 527 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 528 529 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 530 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 531 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 532 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 533 534 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 535 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 536 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 537 538 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 539 540 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 541 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 542 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 543 544 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 545 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 546 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 547 548 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 549 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 550 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 551 552 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 553 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 554 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 555 556 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 557 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 558 } 559 560 561 562 ////////////////////////////////////////////////////////// 563 // // 564 // ISTRI_0C // 565 // // 566 ////////////////////////////////////////////////////////// 567 568 __attribute__((noinline)) 569 UInt h_pcmpistri_0C ( V128* argL, V128* argR ) 570 { 571 V128 block[2]; 572 memcpy(&block[0], argL, sizeof(V128)); 573 memcpy(&block[1], argR, sizeof(V128)); 574 ULong res = 0, flags = 0; 575 __asm__ __volatile__( 576 "movdqu 0(%2), %%xmm2" "\n\t" 577 "movdqu 16(%2), %%xmm11" "\n\t" 578 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" 579 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" 580 //"movd %%xmm0, %%ecx" "\n\t" 581 "pushfq" "\n\t" 582 "popq %%rdx" "\n\t" 583 "movq %%rcx, %0" "\n\t" 584 "movq %%rdx, %1" "\n\t" 585 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 586 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 587 ); 588 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 589 } 590 591 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) 592 { 593 V128 resV; 594 UInt resOSZACP, resECX; 595 Bool ok 596 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 597 zmask_from_V128(argLU), 598 zmask_from_V128(argRU), 599 0x0C, False/*!isSTRM*/ 600 ); 601 assert(ok); 602 resECX = resV.uInt[0]; 603 return (resOSZACP << 16) | resECX; 604 } 605 606 void istri_0C ( void ) 607 { 608 char* wot = "0C"; 609 UInt(*h)(V128*,V128*) = h_pcmpistri_0C; 610 UInt(*s)(V128*,V128*) = s_pcmpistri_0C; 611 612 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 613 614 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 615 616 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 617 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 618 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 619 620 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 621 622 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 623 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 624 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 625 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 626 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 627 628 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 629 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 630 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 631 632 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 633 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 634 635 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 636 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 637 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 638 639 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 640 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 641 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 642 643 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 644 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 645 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 646 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 647 } 648 649 650 ////////////////////////////////////////////////////////// 651 // // 652 // ISTRI_08 // 653 // // 654 ////////////////////////////////////////////////////////// 655 656 UInt h_pcmpistri_08 ( V128* argL, V128* argR ) 657 { 658 V128 block[2]; 659 memcpy(&block[0], argL, sizeof(V128)); 660 memcpy(&block[1], argR, sizeof(V128)); 661 ULong res, flags; 662 __asm__ __volatile__( 663 "subq $1024, %%rsp" "\n\t" 664 "movdqu 0(%2), %%xmm2" "\n\t" 665 "movdqu 16(%2), %%xmm11" "\n\t" 666 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" 667 "pushfq" "\n\t" 668 "popq %%rdx" "\n\t" 669 "movq %%rcx, %0" "\n\t" 670 "movq %%rdx, %1" "\n\t" 671 "addq $1024, %%rsp" "\n\t" 672 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 673 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 674 ); 675 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 676 } 677 678 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) 679 { 680 V128 resV; 681 UInt resOSZACP, resECX; 682 Bool ok 683 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 684 zmask_from_V128(argLU), 685 zmask_from_V128(argRU), 686 0x08, False/*!isSTRM*/ 687 ); 688 assert(ok); 689 resECX = resV.uInt[0]; 690 return (resOSZACP << 16) | resECX; 691 } 692 693 void istri_08 ( void ) 694 { 695 char* wot = "08"; 696 UInt(*h)(V128*,V128*) = h_pcmpistri_08; 697 UInt(*s)(V128*,V128*) = s_pcmpistri_08; 698 699 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 700 701 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 702 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 703 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 704 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 705 706 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 707 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 708 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 709 710 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 711 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 712 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 713 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 714 715 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 716 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 717 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 718 719 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 720 721 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 722 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 723 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 724 725 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 726 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 727 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 728 729 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 730 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 731 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 732 733 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 734 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 735 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 736 737 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 738 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 739 } 740 741 742 743 ////////////////////////////////////////////////////////// 744 // // 745 // ISTRI_1A // 746 // // 747 ////////////////////////////////////////////////////////// 748 749 UInt h_pcmpistri_1A ( V128* argL, V128* argR ) 750 { 751 V128 block[2]; 752 memcpy(&block[0], argL, sizeof(V128)); 753 memcpy(&block[1], argR, sizeof(V128)); 754 ULong res, flags; 755 __asm__ __volatile__( 756 "subq $1024, %%rsp" "\n\t" 757 "movdqu 0(%2), %%xmm2" "\n\t" 758 "movdqu 16(%2), %%xmm11" "\n\t" 759 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" 760 "pushfq" "\n\t" 761 "popq %%rdx" "\n\t" 762 "movq %%rcx, %0" "\n\t" 763 "movq %%rdx, %1" "\n\t" 764 "addq $1024, %%rsp" "\n\t" 765 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 766 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 767 ); 768 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 769 } 770 771 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) 772 { 773 V128 resV; 774 UInt resOSZACP, resECX; 775 Bool ok 776 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 777 zmask_from_V128(argLU), 778 zmask_from_V128(argRU), 779 0x1A, False/*!isSTRM*/ 780 ); 781 assert(ok); 782 resECX = resV.uInt[0]; 783 return (resOSZACP << 16) | resECX; 784 } 785 786 void istri_1A ( void ) 787 { 788 char* wot = "1A"; 789 UInt(*h)(V128*,V128*) = h_pcmpistri_1A; 790 UInt(*s)(V128*,V128*) = s_pcmpistri_1A; 791 792 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 793 794 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 795 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 796 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 797 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 798 799 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 800 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 801 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 802 803 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 804 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 805 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 806 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 807 808 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 809 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 810 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 811 812 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 813 814 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 815 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 816 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 817 818 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 819 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 820 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 821 822 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 823 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 824 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 825 826 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 827 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 828 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 829 830 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 831 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 832 } 833 834 835 836 ////////////////////////////////////////////////////////// 837 // // 838 // ISTRI_02 // 839 // // 840 ////////////////////////////////////////////////////////// 841 842 UInt h_pcmpistri_02 ( V128* argL, V128* argR ) 843 { 844 V128 block[2]; 845 memcpy(&block[0], argL, sizeof(V128)); 846 memcpy(&block[1], argR, sizeof(V128)); 847 ULong res, flags; 848 __asm__ __volatile__( 849 "subq $1024, %%rsp" "\n\t" 850 "movdqu 0(%2), %%xmm2" "\n\t" 851 "movdqu 16(%2), %%xmm11" "\n\t" 852 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" 853 //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" 854 //"movd %%xmm0, %%ecx" "\n\t" 855 "pushfq" "\n\t" 856 "popq %%rdx" "\n\t" 857 "movq %%rcx, %0" "\n\t" 858 "movq %%rdx, %1" "\n\t" 859 "addq $1024, %%rsp" "\n\t" 860 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 861 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 862 ); 863 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 864 } 865 866 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) 867 { 868 V128 resV; 869 UInt resOSZACP, resECX; 870 Bool ok 871 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 872 zmask_from_V128(argLU), 873 zmask_from_V128(argRU), 874 0x02, False/*!isSTRM*/ 875 ); 876 assert(ok); 877 resECX = resV.uInt[0]; 878 return (resOSZACP << 16) | resECX; 879 } 880 881 void istri_02 ( void ) 882 { 883 char* wot = "02"; 884 UInt(*h)(V128*,V128*) = h_pcmpistri_02; 885 UInt(*s)(V128*,V128*) = s_pcmpistri_02; 886 887 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 888 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 889 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 890 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 891 892 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 893 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 894 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 895 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 896 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 897 898 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 899 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 900 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 901 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 902 903 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 904 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 905 906 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 907 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 908 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 909 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 910 911 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 912 913 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 914 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 915 } 916 917 918 ////////////////////////////////////////////////////////// 919 // // 920 // ISTRI_12 // 921 // // 922 ////////////////////////////////////////////////////////// 923 924 UInt h_pcmpistri_12 ( V128* argL, V128* argR ) 925 { 926 V128 block[2]; 927 memcpy(&block[0], argL, sizeof(V128)); 928 memcpy(&block[1], argR, sizeof(V128)); 929 ULong res, flags; 930 __asm__ __volatile__( 931 "subq $1024, %%rsp" "\n\t" 932 "movdqu 0(%2), %%xmm2" "\n\t" 933 "movdqu 16(%2), %%xmm11" "\n\t" 934 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" 935 //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" 936 //"movd %%xmm0, %%ecx" "\n\t" 937 "pushfq" "\n\t" 938 "popq %%rdx" "\n\t" 939 "movq %%rcx, %0" "\n\t" 940 "movq %%rdx, %1" "\n\t" 941 "addq $1024, %%rsp" "\n\t" 942 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 943 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 944 ); 945 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 946 } 947 948 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) 949 { 950 V128 resV; 951 UInt resOSZACP, resECX; 952 Bool ok 953 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 954 zmask_from_V128(argLU), 955 zmask_from_V128(argRU), 956 0x12, False/*!isSTRM*/ 957 ); 958 assert(ok); 959 resECX = resV.uInt[0]; 960 return (resOSZACP << 16) | resECX; 961 } 962 963 void istri_12 ( void ) 964 { 965 char* wot = "12"; 966 UInt(*h)(V128*,V128*) = h_pcmpistri_12; 967 UInt(*s)(V128*,V128*) = s_pcmpistri_12; 968 969 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 970 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 971 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 972 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 973 974 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 975 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 976 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 977 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 978 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 979 980 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 981 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 982 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 983 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 984 985 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 986 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 987 988 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 989 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 990 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 991 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 992 993 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 994 995 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 996 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 997 } 998 999 1000 1001 ////////////////////////////////////////////////////////// 1002 // // 1003 // ISTRI_44 // 1004 // // 1005 ////////////////////////////////////////////////////////// 1006 1007 UInt h_pcmpistri_44 ( V128* argL, V128* argR ) 1008 { 1009 V128 block[2]; 1010 memcpy(&block[0], argL, sizeof(V128)); 1011 memcpy(&block[1], argR, sizeof(V128)); 1012 ULong res, flags; 1013 __asm__ __volatile__( 1014 "subq $1024, %%rsp" "\n\t" 1015 "movdqu 0(%2), %%xmm2" "\n\t" 1016 "movdqu 16(%2), %%xmm11" "\n\t" 1017 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" 1018 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1019 //"movd %%xmm0, %%ecx" "\n\t" 1020 "pushfq" "\n\t" 1021 "popq %%rdx" "\n\t" 1022 "movq %%rcx, %0" "\n\t" 1023 "movq %%rdx, %1" "\n\t" 1024 "addq $1024, %%rsp" "\n\t" 1025 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1026 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1027 ); 1028 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1029 } 1030 1031 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) 1032 { 1033 V128 resV; 1034 UInt resOSZACP, resECX; 1035 Bool ok 1036 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1037 zmask_from_V128(argLU), 1038 zmask_from_V128(argRU), 1039 0x44, False/*!isSTRM*/ 1040 ); 1041 assert(ok); 1042 resECX = resV.uInt[0]; 1043 return (resOSZACP << 16) | resECX; 1044 } 1045 1046 void istri_44 ( void ) 1047 { 1048 char* wot = "44"; 1049 UInt(*h)(V128*,V128*) = h_pcmpistri_44; 1050 UInt(*s)(V128*,V128*) = s_pcmpistri_44; 1051 1052 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1053 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1054 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1055 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1056 1057 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1058 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1059 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1060 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1061 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1062 1063 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1064 1065 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1066 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1067 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1068 1069 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1070 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1071 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1072 1073 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1074 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1075 1076 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1077 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1078 } 1079 1080 1081 ////////////////////////////////////////////////////////// 1082 // // 1083 // ISTRI_00 // 1084 // // 1085 ////////////////////////////////////////////////////////// 1086 1087 UInt h_pcmpistri_00 ( V128* argL, V128* argR ) 1088 { 1089 V128 block[2]; 1090 memcpy(&block[0], argL, sizeof(V128)); 1091 memcpy(&block[1], argR, sizeof(V128)); 1092 ULong res, flags; 1093 __asm__ __volatile__( 1094 "subq $1024, %%rsp" "\n\t" 1095 "movdqu 0(%2), %%xmm2" "\n\t" 1096 "movdqu 16(%2), %%xmm11" "\n\t" 1097 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t" 1098 //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t" 1099 //"movd %%xmm0, %%ecx" "\n\t" 1100 "pushfq" "\n\t" 1101 "popq %%rdx" "\n\t" 1102 "movq %%rcx, %0" "\n\t" 1103 "movq %%rdx, %1" "\n\t" 1104 "addq $1024, %%rsp" "\n\t" 1105 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1106 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1107 ); 1108 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1109 } 1110 1111 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU ) 1112 { 1113 V128 resV; 1114 UInt resOSZACP, resECX; 1115 Bool ok 1116 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1117 zmask_from_V128(argLU), 1118 zmask_from_V128(argRU), 1119 0x00, False/*!isSTRM*/ 1120 ); 1121 assert(ok); 1122 resECX = resV.uInt[0]; 1123 return (resOSZACP << 16) | resECX; 1124 } 1125 1126 void istri_00 ( void ) 1127 { 1128 char* wot = "00"; 1129 UInt(*h)(V128*,V128*) = h_pcmpistri_00; 1130 UInt(*s)(V128*,V128*) = s_pcmpistri_00; 1131 1132 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1133 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1134 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1135 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1136 1137 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1138 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1139 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1140 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1141 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1142 1143 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1144 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1145 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1146 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1147 1148 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1149 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1150 1151 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1152 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1153 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1154 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1155 1156 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1157 1158 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1159 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1160 } 1161 1162 1163 ////////////////////////////////////////////////////////// 1164 // // 1165 // ISTRI_38 // 1166 // // 1167 ////////////////////////////////////////////////////////// 1168 1169 UInt h_pcmpistri_38 ( V128* argL, V128* argR ) 1170 { 1171 V128 block[2]; 1172 memcpy(&block[0], argL, sizeof(V128)); 1173 memcpy(&block[1], argR, sizeof(V128)); 1174 ULong res, flags; 1175 __asm__ __volatile__( 1176 "subq $1024, %%rsp" "\n\t" 1177 "movdqu 0(%2), %%xmm2" "\n\t" 1178 "movdqu 16(%2), %%xmm11" "\n\t" 1179 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t" 1180 "pushfq" "\n\t" 1181 "popq %%rdx" "\n\t" 1182 "movq %%rcx, %0" "\n\t" 1183 "movq %%rdx, %1" "\n\t" 1184 "addq $1024, %%rsp" "\n\t" 1185 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1186 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1187 ); 1188 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1189 } 1190 1191 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU ) 1192 { 1193 V128 resV; 1194 UInt resOSZACP, resECX; 1195 Bool ok 1196 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1197 zmask_from_V128(argLU), 1198 zmask_from_V128(argRU), 1199 0x38, False/*!isSTRM*/ 1200 ); 1201 assert(ok); 1202 resECX = resV.uInt[0]; 1203 return (resOSZACP << 16) | resECX; 1204 } 1205 1206 void istri_38 ( void ) 1207 { 1208 char* wot = "38"; 1209 UInt(*h)(V128*,V128*) = h_pcmpistri_38; 1210 UInt(*s)(V128*,V128*) = s_pcmpistri_38; 1211 1212 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1213 1214 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1215 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1216 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1217 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1218 1219 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1220 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1221 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1222 1223 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1224 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1225 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1226 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1227 1228 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1229 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1230 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1231 1232 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1233 1234 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1235 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1236 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1237 1238 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1239 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1240 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1241 1242 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1243 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1244 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1245 1246 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1247 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1248 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1249 1250 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1251 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1252 } 1253 1254 1255 1256 ////////////////////////////////////////////////////////// 1257 // // 1258 // main // 1259 // // 1260 ////////////////////////////////////////////////////////// 1261 1262 int main ( void ) 1263 { 1264 istri_4A(); 1265 istri_3A(); 1266 istri_08(); 1267 istri_1A(); 1268 istri_02(); 1269 istri_0C(); 1270 istri_12(); 1271 istri_44(); 1272 istri_00(); 1273 istri_38(); 1274 return 0; 1275 } 1276