1 2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6 #include <string.h> 7 #include <stdio.h> 8 #include <assert.h> 9 10 typedef unsigned int UInt; 11 typedef signed int Int; 12 typedef unsigned char UChar; 13 typedef unsigned short UShort; 14 typedef unsigned long long int ULong; 15 typedef UChar Bool; 16 #define False ((Bool)0) 17 #define True ((Bool)1) 18 19 //typedef unsigned char V128[16]; 20 typedef 21 union { 22 UChar uChar[16]; 23 UShort uShort[8]; 24 UInt uInt[4]; 25 UInt w32[4]; 26 } 27 V128; 28 29 #define SHIFT_O 11 30 #define SHIFT_S 7 31 #define SHIFT_Z 6 32 #define SHIFT_A 4 33 #define SHIFT_C 0 34 #define SHIFT_P 2 35 36 #define MASK_O (1ULL << SHIFT_O) 37 #define MASK_S (1ULL << SHIFT_S) 38 #define MASK_Z (1ULL << SHIFT_Z) 39 #define MASK_A (1ULL << SHIFT_A) 40 #define MASK_C (1ULL << SHIFT_C) 41 #define MASK_P (1ULL << SHIFT_P) 42 43 44 UInt clz32 ( UInt x ) 45 { 46 Int y, m, n; 47 y = -(x >> 16); 48 m = (y >> 16) & 16; 49 n = 16 - m; 50 x = x >> m; 51 y = x - 0x100; 52 m = (y >> 16) & 8; 53 n = n + m; 54 x = x << m; 55 y = x - 0x1000; 56 m = (y >> 16) & 4; 57 n = n + m; 58 x = x << m; 59 y = x - 0x4000; 60 m = (y >> 16) & 2; 61 n = n + m; 62 x = x << m; 63 y = x >> 14; 64 m = y & ~(y >> 1); 65 return n + 2 - m; 66 } 67 68 UInt ctz32 ( UInt x ) 69 { 70 return 32 - clz32((~x) & (x-1)); 71 } 72 73 void expand ( V128* dst, char* summary ) 74 { 75 Int i; 76 assert( strlen(summary) == 16 ); 77 for (i = 0; i < 16; i++) { 78 UChar xx = 0; 79 UChar x = summary[15-i]; 80 if (x >= '0' && x <= '9') { xx = x - '0'; } 81 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 82 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 83 else assert(0); 84 85 assert(xx < 16); 86 xx = (xx << 4) | xx; 87 assert(xx < 256); 88 dst->uChar[i] = xx; 89 } 90 } 91 92 void try_istri ( char* which, 93 UInt(*h_fn)(V128*,V128*), 94 UInt(*s_fn)(V128*,V128*), 95 char* summL, char* summR ) 96 { 97 assert(strlen(which) == 2); 98 V128 argL, argR; 99 expand(&argL, summL); 100 expand(&argR, summR); 101 UInt h_res = h_fn(&argL, &argR); 102 UInt s_res = s_fn(&argL, &argR); 103 printf("istri %s %s %s -> %08x %08x %s\n", 104 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 105 } 106 107 UInt zmask_from_V128 ( V128* arg ) 108 { 109 UInt i, res = 0; 110 for (i = 0; i < 8; i++) { 111 res |= ((arg->uShort[i] == 0) ? 1 : 0) << i; 112 } 113 return res; 114 } 115 116 ////////////////////////////////////////////////////////// 117 // // 118 // GENERAL // 119 // // 120 ////////////////////////////////////////////////////////// 121 122 123 /* Given partial results from a 16-bit pcmpXstrX operation (intRes1, 124 basically), generate an I- or M-format output value, also the new 125 OSZACP flags. */ 126 static 127 void PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV, 128 /*OUT*/UInt* resOSZACP, 129 UInt intRes1, 130 UInt zmaskL, UInt zmaskR, 131 UInt validL, 132 UInt pol, UInt idx ) 133 { 134 assert((pol >> 2) == 0); 135 assert((idx >> 1) == 0); 136 137 UInt intRes2 = 0; 138 switch (pol) { 139 case 0: intRes2 = intRes1; break; // pol + 140 case 1: intRes2 = ~intRes1; break; // pol - 141 case 2: intRes2 = intRes1; break; // pol m+ 142 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 143 } 144 intRes2 &= 0xFF; 145 146 // generate I-format output (an index in ECX) 147 // generate ecx value 148 UInt newECX = 0; 149 if (idx) { 150 // index of ms-1-bit 151 newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2)); 152 } else { 153 // index of ls-1-bit 154 newECX = intRes2 == 0 ? 8 : ctz32(intRes2); 155 } 156 157 resV->w32[0] = newECX; 158 resV->w32[1] = 0; 159 resV->w32[2] = 0; 160 resV->w32[3] = 0; 161 162 // generate new flags, common to all ISTRI and ISTRM cases 163 *resOSZACP // A, P are zero 164 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 165 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 166 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 167 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 168 } 169 170 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 171 variants on 16-bit characters. 172 173 For xSTRI variants, the new ECX value is placed in the 32 bits 174 pointed to by *resV, and the top 96 bits are zeroed. For xSTRM 175 variants, the result is a 128 bit value and is placed at *resV in 176 the obvious way. 177 178 For all variants, the new OSZACP value is placed at *resOSZACP. 179 180 argLV and argRV are the vector args. The caller must prepare a 181 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 182 must be 1 for each zero byte of of the respective arg. For ESTRx 183 variants this is derived from the explicit length indication, and 184 must be 0 in all places except at the bit index corresponding to 185 the valid length (0 .. 8). If the valid length is 8 then the 186 mask must be all zeroes. In all cases, bits 31:8 must be zero. 187 188 imm8 is the original immediate from the instruction. isSTRM 189 indicates whether this is a xSTRM or xSTRI variant, which controls 190 how much of *res is written. 191 192 If the given imm8 case can be handled, the return value is True. 193 If not, False is returned, and neither *res not *resOSZACP are 194 altered. 195 */ 196 197 Bool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV, 198 /*OUT*/UInt* resOSZACP, 199 V128* argLV, V128* argRV, 200 UInt zmaskL, UInt zmaskR, 201 UInt imm8, Bool isxSTRM ) 202 { 203 assert(imm8 < 0x80); 204 assert((zmaskL >> 8) == 0); 205 assert((zmaskR >> 8) == 0); 206 207 /* Explicitly reject any imm8 values that haven't been validated, 208 even if they would probably work. Life is too short to have 209 unvalidated cases in the code base. */ 210 switch (imm8) { 211 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 212 case 0x13: case 0x19: case 0x1B: 213 case 0x39: case 0x3B: 214 case 0x45: case 0x4B: 215 break; 216 default: 217 return False; 218 } 219 220 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 221 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 222 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 223 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 224 225 /*----------------------------------------*/ 226 /*-- strcmp on wide data --*/ 227 /*----------------------------------------*/ 228 229 if (agg == 2/*equal each, aka strcmp*/ 230 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 231 Int i; 232 UShort* argL = (UShort*)argLV; 233 UShort* argR = (UShort*)argRV; 234 UInt boolResII = 0; 235 for (i = 7; i >= 0; i--) { 236 UShort cL = argL[i]; 237 UShort cR = argR[i]; 238 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 239 } 240 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 241 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 242 243 // do invalidation, common to all equal-each cases 244 UInt intRes1 245 = (boolResII & validL & validR) // if both valid, use cmpres 246 | (~ (validL | validR)); // if both invalid, force 1 247 // else force 0 248 intRes1 &= 0xFF; 249 250 // generate I-format output 251 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 252 resV, resOSZACP, 253 intRes1, zmaskL, zmaskR, validL, pol, idx 254 ); 255 256 return True; 257 } 258 259 /*----------------------------------------*/ 260 /*-- set membership on wide data --*/ 261 /*----------------------------------------*/ 262 263 if (agg == 0/*equal any, aka find chars in a set*/ 264 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 265 /* argL: the string, argR: charset */ 266 UInt si, ci; 267 UShort* argL = (UShort*)argLV; 268 UShort* argR = (UShort*)argRV; 269 UInt boolRes = 0; 270 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 271 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 272 273 for (si = 0; si < 8; si++) { 274 if ((validL & (1 << si)) == 0) 275 // run off the end of the string. 276 break; 277 UInt m = 0; 278 for (ci = 0; ci < 8; ci++) { 279 if ((validR & (1 << ci)) == 0) break; 280 if (argR[ci] == argL[si]) { m = 1; break; } 281 } 282 boolRes |= (m << si); 283 } 284 285 // boolRes is "pre-invalidated" 286 UInt intRes1 = boolRes & 0xFF; 287 288 // generate I-format output 289 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 290 resV, resOSZACP, 291 intRes1, zmaskL, zmaskR, validL, pol, idx 292 ); 293 294 return True; 295 } 296 297 /*----------------------------------------*/ 298 /*-- substring search on wide data --*/ 299 /*----------------------------------------*/ 300 301 if (agg == 3/*equal ordered, aka substring search*/ 302 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 303 304 /* argL: haystack, argR: needle */ 305 UInt ni, hi; 306 UShort* argL = (UShort*)argLV; 307 UShort* argR = (UShort*)argRV; 308 UInt boolRes = 0; 309 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 310 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 311 for (hi = 0; hi < 8; hi++) { 312 UInt m = 1; 313 for (ni = 0; ni < 8; ni++) { 314 if ((validR & (1 << ni)) == 0) break; 315 UInt i = ni + hi; 316 if (i >= 8) break; 317 if (argL[i] != argR[ni]) { m = 0; break; } 318 } 319 boolRes |= (m << hi); 320 if ((validL & (1 << hi)) == 0) 321 // run off the end of the haystack 322 break; 323 } 324 325 // boolRes is "pre-invalidated" 326 UInt intRes1 = boolRes & 0xFF; 327 328 // generate I-format output 329 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 330 resV, resOSZACP, 331 intRes1, zmaskL, zmaskR, validL, pol, idx 332 ); 333 334 return True; 335 } 336 337 /*----------------------------------------*/ 338 /*-- ranges, unsigned wide data --*/ 339 /*----------------------------------------*/ 340 341 if (agg == 1/*ranges*/ 342 && fmt == 1/*uw*/) { 343 344 /* argL: string, argR: range-pairs */ 345 UInt ri, si; 346 UShort* argL = (UShort*)argLV; 347 UShort* argR = (UShort*)argRV; 348 UInt boolRes = 0; 349 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 350 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 351 for (si = 0; si < 8; si++) { 352 if ((validL & (1 << si)) == 0) 353 // run off the end of the string 354 break; 355 UInt m = 0; 356 for (ri = 0; ri < 8; ri += 2) { 357 if ((validR & (3 << ri)) != (3 << ri)) break; 358 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 359 m = 1; break; 360 } 361 } 362 boolRes |= (m << si); 363 } 364 365 // boolRes is "pre-invalidated" 366 UInt intRes1 = boolRes & 0xFF; 367 368 // generate I-format output 369 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 370 resV, resOSZACP, 371 intRes1, zmaskL, zmaskR, validL, pol, idx 372 ); 373 374 return True; 375 } 376 377 return False; 378 } 379 380 ////////////////////////////////////////////////////////// 381 // // 382 // ISTRI_4B // 383 // // 384 ////////////////////////////////////////////////////////// 385 386 UInt h_pcmpistri_4B ( V128* argL, V128* argR ) 387 { 388 V128 block[2]; 389 memcpy(&block[0], argL, sizeof(V128)); 390 memcpy(&block[1], argR, sizeof(V128)); 391 ULong res, flags; 392 __asm__ __volatile__( 393 "subq $1024, %%rsp" "\n\t" 394 "movdqu 0(%2), %%xmm2" "\n\t" 395 "movdqu 16(%2), %%xmm11" "\n\t" 396 "pcmpistri $0x4B, %%xmm2, %%xmm11" "\n\t" 397 "pushfq" "\n\t" 398 "popq %%rdx" "\n\t" 399 "movq %%rcx, %0" "\n\t" 400 "movq %%rdx, %1" "\n\t" 401 "addq $1024, %%rsp" "\n\t" 402 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 403 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 404 ); 405 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 406 } 407 408 UInt s_pcmpistri_4B ( V128* argLU, V128* argRU ) 409 { 410 V128 resV; 411 UInt resOSZACP, resECX; 412 Bool ok 413 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 414 zmask_from_V128(argLU), 415 zmask_from_V128(argRU), 416 0x4B, False/*!isSTRM*/ 417 ); 418 assert(ok); 419 resECX = resV.uInt[0]; 420 return (resOSZACP << 16) | resECX; 421 } 422 423 void istri_4B ( void ) 424 { 425 char* wot = "4B"; 426 UInt(*h)(V128*,V128*) = h_pcmpistri_4B; 427 UInt(*s)(V128*,V128*) = s_pcmpistri_4B; 428 429 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 430 431 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 432 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 433 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 434 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 435 436 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 437 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 438 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 439 440 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 441 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 442 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 443 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 444 445 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 446 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 447 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 448 449 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 450 451 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 452 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 453 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 454 455 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 456 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 457 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 458 459 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 460 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 461 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 462 463 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 464 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 465 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 466 467 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 468 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 469 } 470 471 ////////////////////////////////////////////////////////// 472 // // 473 // ISTRI_3B // 474 // // 475 ////////////////////////////////////////////////////////// 476 477 UInt h_pcmpistri_3B ( V128* argL, V128* argR ) 478 { 479 V128 block[2]; 480 memcpy(&block[0], argL, sizeof(V128)); 481 memcpy(&block[1], argR, sizeof(V128)); 482 ULong res, flags; 483 __asm__ __volatile__( 484 "subq $1024, %%rsp" "\n\t" 485 "movdqu 0(%2), %%xmm2" "\n\t" 486 "movdqu 16(%2), %%xmm11" "\n\t" 487 "pcmpistri $0x3B, %%xmm2, %%xmm11" "\n\t" 488 "pushfq" "\n\t" 489 "popq %%rdx" "\n\t" 490 "movq %%rcx, %0" "\n\t" 491 "movq %%rdx, %1" "\n\t" 492 "addq $1024, %%rsp" "\n\t" 493 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 494 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 495 ); 496 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 497 } 498 499 UInt s_pcmpistri_3B ( V128* argLU, V128* argRU ) 500 { 501 V128 resV; 502 UInt resOSZACP, resECX; 503 Bool ok 504 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 505 zmask_from_V128(argLU), 506 zmask_from_V128(argRU), 507 0x3B, False/*!isSTRM*/ 508 ); 509 assert(ok); 510 resECX = resV.uInt[0]; 511 return (resOSZACP << 16) | resECX; 512 } 513 514 void istri_3B ( void ) 515 { 516 char* wot = "3B"; 517 UInt(*h)(V128*,V128*) = h_pcmpistri_3B; 518 UInt(*s)(V128*,V128*) = s_pcmpistri_3B; 519 520 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 521 522 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 523 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 524 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 525 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 526 527 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 528 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 529 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 530 531 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 532 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 533 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 534 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 535 536 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 537 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 538 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 539 540 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 541 542 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 543 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 544 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 545 546 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 547 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 548 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 549 550 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 551 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 552 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 553 554 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 555 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 556 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 557 558 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 559 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 560 } 561 562 563 564 ////////////////////////////////////////////////////////// 565 // // 566 // ISTRI_0D // 567 // // 568 ////////////////////////////////////////////////////////// 569 570 __attribute__((noinline)) 571 UInt h_pcmpistri_0D ( V128* argL, V128* argR ) 572 { 573 V128 block[2]; 574 memcpy(&block[0], argL, sizeof(V128)); 575 memcpy(&block[1], argR, sizeof(V128)); 576 ULong res = 0, flags = 0; 577 __asm__ __volatile__( 578 "movdqu 0(%2), %%xmm2" "\n\t" 579 "movdqu 16(%2), %%xmm11" "\n\t" 580 "pcmpistri $0x0D, %%xmm2, %%xmm11" "\n\t" 581 //"pcmpistrm $0x0D, %%xmm2, %%xmm11" "\n\t" 582 //"movd %%xmm0, %%ecx" "\n\t" 583 "pushfq" "\n\t" 584 "popq %%rdx" "\n\t" 585 "movq %%rcx, %0" "\n\t" 586 "movq %%rdx, %1" "\n\t" 587 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 588 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 589 ); 590 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 591 } 592 593 UInt s_pcmpistri_0D ( V128* argLU, V128* argRU ) 594 { 595 V128 resV; 596 UInt resOSZACP, resECX; 597 Bool ok 598 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 599 zmask_from_V128(argLU), 600 zmask_from_V128(argRU), 601 0x0D, False/*!isSTRM*/ 602 ); 603 assert(ok); 604 resECX = resV.uInt[0]; 605 return (resOSZACP << 16) | resECX; 606 } 607 608 void istri_0D ( void ) 609 { 610 char* wot = "0D"; 611 UInt(*h)(V128*,V128*) = h_pcmpistri_0D; 612 UInt(*s)(V128*,V128*) = s_pcmpistri_0D; 613 614 try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 615 616 try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef"); 617 618 try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 619 try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef"); 620 try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef"); 621 622 try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd"); 623 624 try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd"); 625 try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd"); 626 try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd"); 627 628 try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd"); 629 try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd"); 630 try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd"); 631 632 try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd"); 633 try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd"); 634 635 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 636 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 637 try_istri(wot,h,s, "1111111111111234", "0000000000001111"); 638 639 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 640 try_istri(wot,h,s, "0a11111111111111", "000000000000000a"); 641 try_istri(wot,h,s, "0b11111111111111", "000000000000000a"); 642 643 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 644 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 645 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 646 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 647 } 648 649 650 ////////////////////////////////////////////////////////// 651 // // 652 // ISTRI_09 // 653 // // 654 ////////////////////////////////////////////////////////// 655 656 UInt h_pcmpistri_09 ( V128* argL, V128* argR ) 657 { 658 V128 block[2]; 659 memcpy(&block[0], argL, sizeof(V128)); 660 memcpy(&block[1], argR, sizeof(V128)); 661 ULong res, flags; 662 __asm__ __volatile__( 663 "subq $1024, %%rsp" "\n\t" 664 "movdqu 0(%2), %%xmm2" "\n\t" 665 "movdqu 16(%2), %%xmm11" "\n\t" 666 "pcmpistri $0x09, %%xmm2, %%xmm11" "\n\t" 667 "pushfq" "\n\t" 668 "popq %%rdx" "\n\t" 669 "movq %%rcx, %0" "\n\t" 670 "movq %%rdx, %1" "\n\t" 671 "addq $1024, %%rsp" "\n\t" 672 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 673 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 674 ); 675 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 676 } 677 678 UInt s_pcmpistri_09 ( V128* argLU, V128* argRU ) 679 { 680 V128 resV; 681 UInt resOSZACP, resECX; 682 Bool ok 683 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 684 zmask_from_V128(argLU), 685 zmask_from_V128(argRU), 686 0x09, False/*!isSTRM*/ 687 ); 688 assert(ok); 689 resECX = resV.uInt[0]; 690 return (resOSZACP << 16) | resECX; 691 } 692 693 void istri_09 ( void ) 694 { 695 char* wot = "09"; 696 UInt(*h)(V128*,V128*) = h_pcmpistri_09; 697 UInt(*s)(V128*,V128*) = s_pcmpistri_09; 698 699 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 700 701 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 702 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 703 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 704 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 705 706 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 707 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 708 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 709 710 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 711 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 712 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 713 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 714 715 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 716 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 717 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 718 719 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 720 721 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 722 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 723 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 724 725 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 726 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 727 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 728 729 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 730 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 731 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 732 733 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 734 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 735 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 736 737 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 738 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 739 } 740 741 742 743 ////////////////////////////////////////////////////////// 744 // // 745 // ISTRI_1B // 746 // // 747 ////////////////////////////////////////////////////////// 748 749 UInt h_pcmpistri_1B ( V128* argL, V128* argR ) 750 { 751 V128 block[2]; 752 memcpy(&block[0], argL, sizeof(V128)); 753 memcpy(&block[1], argR, sizeof(V128)); 754 ULong res, flags; 755 __asm__ __volatile__( 756 "subq $1024, %%rsp" "\n\t" 757 "movdqu 0(%2), %%xmm2" "\n\t" 758 "movdqu 16(%2), %%xmm11" "\n\t" 759 "pcmpistri $0x1B, %%xmm2, %%xmm11" "\n\t" 760 "pushfq" "\n\t" 761 "popq %%rdx" "\n\t" 762 "movq %%rcx, %0" "\n\t" 763 "movq %%rdx, %1" "\n\t" 764 "addq $1024, %%rsp" "\n\t" 765 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 766 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 767 ); 768 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 769 } 770 771 UInt s_pcmpistri_1B ( V128* argLU, V128* argRU ) 772 { 773 V128 resV; 774 UInt resOSZACP, resECX; 775 Bool ok 776 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 777 zmask_from_V128(argLU), 778 zmask_from_V128(argRU), 779 0x1B, False/*!isSTRM*/ 780 ); 781 assert(ok); 782 resECX = resV.uInt[0]; 783 return (resOSZACP << 16) | resECX; 784 } 785 786 void istri_1B ( void ) 787 { 788 char* wot = "1B"; 789 UInt(*h)(V128*,V128*) = h_pcmpistri_1B; 790 UInt(*s)(V128*,V128*) = s_pcmpistri_1B; 791 792 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 793 794 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 795 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 796 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 797 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 798 799 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 800 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 801 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 802 803 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 804 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 805 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 806 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 807 808 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 809 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 810 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 811 812 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 813 814 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 815 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 816 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 817 818 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 819 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 820 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 821 822 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 823 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 824 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 825 826 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 827 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 828 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 829 830 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 831 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 832 } 833 834 835 836 ////////////////////////////////////////////////////////// 837 // // 838 // ISTRI_03 // 839 // // 840 ////////////////////////////////////////////////////////// 841 842 UInt h_pcmpistri_03 ( V128* argL, V128* argR ) 843 { 844 V128 block[2]; 845 memcpy(&block[0], argL, sizeof(V128)); 846 memcpy(&block[1], argR, sizeof(V128)); 847 ULong res, flags; 848 __asm__ __volatile__( 849 "subq $1024, %%rsp" "\n\t" 850 "movdqu 0(%2), %%xmm2" "\n\t" 851 "movdqu 16(%2), %%xmm11" "\n\t" 852 "pcmpistri $0x03, %%xmm2, %%xmm11" "\n\t" 853 //"pcmpistrm $0x03, %%xmm2, %%xmm11" "\n\t" 854 //"movd %%xmm0, %%ecx" "\n\t" 855 "pushfq" "\n\t" 856 "popq %%rdx" "\n\t" 857 "movq %%rcx, %0" "\n\t" 858 "movq %%rdx, %1" "\n\t" 859 "addq $1024, %%rsp" "\n\t" 860 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 861 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 862 ); 863 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 864 } 865 866 UInt s_pcmpistri_03 ( V128* argLU, V128* argRU ) 867 { 868 V128 resV; 869 UInt resOSZACP, resECX; 870 Bool ok 871 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 872 zmask_from_V128(argLU), 873 zmask_from_V128(argRU), 874 0x03, False/*!isSTRM*/ 875 ); 876 assert(ok); 877 resECX = resV.uInt[0]; 878 return (resOSZACP << 16) | resECX; 879 } 880 881 void istri_03 ( void ) 882 { 883 char* wot = "03"; 884 UInt(*h)(V128*,V128*) = h_pcmpistri_03; 885 UInt(*s)(V128*,V128*) = s_pcmpistri_03; 886 887 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 888 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 889 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 890 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 891 892 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 893 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 894 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 895 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 896 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 897 898 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 899 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 900 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 901 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 902 903 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 904 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 905 906 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 907 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 908 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 909 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 910 911 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 912 913 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 914 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 915 } 916 917 918 ////////////////////////////////////////////////////////// 919 // // 920 // ISTRI_13 // 921 // // 922 ////////////////////////////////////////////////////////// 923 924 UInt h_pcmpistri_13 ( V128* argL, V128* argR ) 925 { 926 V128 block[2]; 927 memcpy(&block[0], argL, sizeof(V128)); 928 memcpy(&block[1], argR, sizeof(V128)); 929 ULong res, flags; 930 __asm__ __volatile__( 931 "subq $1024, %%rsp" "\n\t" 932 "movdqu 0(%2), %%xmm2" "\n\t" 933 "movdqu 16(%2), %%xmm11" "\n\t" 934 "pcmpistri $0x13, %%xmm2, %%xmm11" "\n\t" 935 //"pcmpistrm $0x13, %%xmm2, %%xmm11" "\n\t" 936 //"movd %%xmm0, %%ecx" "\n\t" 937 "pushfq" "\n\t" 938 "popq %%rdx" "\n\t" 939 "movq %%rcx, %0" "\n\t" 940 "movq %%rdx, %1" "\n\t" 941 "addq $1024, %%rsp" "\n\t" 942 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 943 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 944 ); 945 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 946 } 947 948 UInt s_pcmpistri_13 ( V128* argLU, V128* argRU ) 949 { 950 V128 resV; 951 UInt resOSZACP, resECX; 952 Bool ok 953 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 954 zmask_from_V128(argLU), 955 zmask_from_V128(argRU), 956 0x13, False/*!isSTRM*/ 957 ); 958 assert(ok); 959 resECX = resV.uInt[0]; 960 return (resOSZACP << 16) | resECX; 961 } 962 963 void istri_13 ( void ) 964 { 965 char* wot = "13"; 966 UInt(*h)(V128*,V128*) = h_pcmpistri_13; 967 UInt(*s)(V128*,V128*) = s_pcmpistri_13; 968 969 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 970 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 971 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 972 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 973 974 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 975 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 976 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 977 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 978 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 979 980 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 981 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 982 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 983 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 984 985 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 986 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 987 988 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 989 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 990 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 991 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 992 993 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 994 995 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 996 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 997 } 998 999 1000 1001 ////////////////////////////////////////////////////////// 1002 // // 1003 // ISTRI_45 // 1004 // // 1005 ////////////////////////////////////////////////////////// 1006 1007 UInt h_pcmpistri_45 ( V128* argL, V128* argR ) 1008 { 1009 V128 block[2]; 1010 memcpy(&block[0], argL, sizeof(V128)); 1011 memcpy(&block[1], argR, sizeof(V128)); 1012 ULong res, flags; 1013 __asm__ __volatile__( 1014 "subq $1024, %%rsp" "\n\t" 1015 "movdqu 0(%2), %%xmm2" "\n\t" 1016 "movdqu 16(%2), %%xmm11" "\n\t" 1017 "pcmpistri $0x45, %%xmm2, %%xmm11" "\n\t" 1018 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1019 //"movd %%xmm0, %%ecx" "\n\t" 1020 "pushfq" "\n\t" 1021 "popq %%rdx" "\n\t" 1022 "movq %%rcx, %0" "\n\t" 1023 "movq %%rdx, %1" "\n\t" 1024 "addq $1024, %%rsp" "\n\t" 1025 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1026 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1027 ); 1028 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1029 } 1030 1031 UInt s_pcmpistri_45 ( V128* argLU, V128* argRU ) 1032 { 1033 V128 resV; 1034 UInt resOSZACP, resECX; 1035 Bool ok 1036 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1037 zmask_from_V128(argLU), 1038 zmask_from_V128(argRU), 1039 0x45, False/*!isSTRM*/ 1040 ); 1041 assert(ok); 1042 resECX = resV.uInt[0]; 1043 return (resOSZACP << 16) | resECX; 1044 } 1045 1046 void istri_45 ( void ) 1047 { 1048 char* wot = "45"; 1049 UInt(*h)(V128*,V128*) = h_pcmpistri_45; 1050 UInt(*s)(V128*,V128*) = s_pcmpistri_45; 1051 1052 try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc"); 1053 try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb"); 1054 try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb"); 1055 try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb"); 1056 1057 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 1058 try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb"); 1059 try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb"); 1060 try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb"); 1061 try_istri(wot,h,s, "0000000000000000", "000000000000ccbb"); 1062 1063 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1064 1065 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 1066 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb"); 1067 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb"); 1068 1069 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb"); 1070 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb"); 1071 try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb"); 1072 1073 try_istri(wot,h,s, "0011223344556677", "0000997755442211"); 1074 try_istri(wot,h,s, "1122334455667711", "0000997755442211"); 1075 1076 try_istri(wot,h,s, "0011223344556677", "0000aa8866553322"); 1077 try_istri(wot,h,s, "1122334455667711", "0000aa8866553322"); 1078 } 1079 1080 1081 ////////////////////////////////////////////////////////// 1082 // // 1083 // ISTRI_01 // 1084 // // 1085 ////////////////////////////////////////////////////////// 1086 1087 UInt h_pcmpistri_01 ( V128* argL, V128* argR ) 1088 { 1089 V128 block[2]; 1090 memcpy(&block[0], argL, sizeof(V128)); 1091 memcpy(&block[1], argR, sizeof(V128)); 1092 ULong res, flags; 1093 __asm__ __volatile__( 1094 "subq $1024, %%rsp" "\n\t" 1095 "movdqu 0(%2), %%xmm2" "\n\t" 1096 "movdqu 16(%2), %%xmm11" "\n\t" 1097 "pcmpistri $0x01, %%xmm2, %%xmm11" "\n\t" 1098 //"pcmpistrm $0x01, %%xmm2, %%xmm11" "\n\t" 1099 //"movd %%xmm0, %%ecx" "\n\t" 1100 "pushfq" "\n\t" 1101 "popq %%rdx" "\n\t" 1102 "movq %%rcx, %0" "\n\t" 1103 "movq %%rdx, %1" "\n\t" 1104 "addq $1024, %%rsp" "\n\t" 1105 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1106 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1107 ); 1108 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1109 } 1110 1111 UInt s_pcmpistri_01 ( V128* argLU, V128* argRU ) 1112 { 1113 V128 resV; 1114 UInt resOSZACP, resECX; 1115 Bool ok 1116 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1117 zmask_from_V128(argLU), 1118 zmask_from_V128(argRU), 1119 0x01, False/*!isSTRM*/ 1120 ); 1121 assert(ok); 1122 resECX = resV.uInt[0]; 1123 return (resOSZACP << 16) | resECX; 1124 } 1125 1126 void istri_01 ( void ) 1127 { 1128 char* wot = "01"; 1129 UInt(*h)(V128*,V128*) = h_pcmpistri_01; 1130 UInt(*s)(V128*,V128*) = s_pcmpistri_01; 1131 1132 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 1133 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 1134 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 1135 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1136 1137 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 1138 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 1139 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 1140 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 1141 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 1142 1143 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 1144 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 1145 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 1146 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 1147 1148 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1149 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1150 1151 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1152 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1153 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 1154 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 1155 1156 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 1157 1158 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1159 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1160 } 1161 1162 1163 ////////////////////////////////////////////////////////// 1164 // // 1165 // ISTRI_39 // 1166 // // 1167 ////////////////////////////////////////////////////////// 1168 1169 UInt h_pcmpistri_39 ( V128* argL, V128* argR ) 1170 { 1171 V128 block[2]; 1172 memcpy(&block[0], argL, sizeof(V128)); 1173 memcpy(&block[1], argR, sizeof(V128)); 1174 ULong res, flags; 1175 __asm__ __volatile__( 1176 "subq $1024, %%rsp" "\n\t" 1177 "movdqu 0(%2), %%xmm2" "\n\t" 1178 "movdqu 16(%2), %%xmm11" "\n\t" 1179 "pcmpistri $0x39, %%xmm2, %%xmm11" "\n\t" 1180 "pushfq" "\n\t" 1181 "popq %%rdx" "\n\t" 1182 "movq %%rcx, %0" "\n\t" 1183 "movq %%rdx, %1" "\n\t" 1184 "addq $1024, %%rsp" "\n\t" 1185 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1186 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1187 ); 1188 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1189 } 1190 1191 UInt s_pcmpistri_39 ( V128* argLU, V128* argRU ) 1192 { 1193 V128 resV; 1194 UInt resOSZACP, resECX; 1195 Bool ok 1196 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1197 zmask_from_V128(argLU), 1198 zmask_from_V128(argRU), 1199 0x39, False/*!isSTRM*/ 1200 ); 1201 assert(ok); 1202 resECX = resV.uInt[0]; 1203 return (resOSZACP << 16) | resECX; 1204 } 1205 1206 void istri_39 ( void ) 1207 { 1208 char* wot = "39"; 1209 UInt(*h)(V128*,V128*) = h_pcmpistri_39; 1210 UInt(*s)(V128*,V128*) = s_pcmpistri_39; 1211 1212 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1213 1214 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1215 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1216 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1217 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1218 1219 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1220 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1221 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1222 1223 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1224 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1225 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1226 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1227 1228 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1229 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1230 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1231 1232 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1233 1234 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1235 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1236 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 1237 1238 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 1239 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1240 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 1241 1242 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1243 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 1244 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 1245 1246 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 1247 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 1248 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 1249 1250 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1251 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1252 } 1253 1254 1255 1256 ////////////////////////////////////////////////////////// 1257 // // 1258 // ISTRI_19 // 1259 // // 1260 ////////////////////////////////////////////////////////// 1261 1262 UInt h_pcmpistri_19 ( V128* argL, V128* argR ) 1263 { 1264 V128 block[2]; 1265 memcpy(&block[0], argL, sizeof(V128)); 1266 memcpy(&block[1], argR, sizeof(V128)); 1267 ULong res, flags; 1268 __asm__ __volatile__( 1269 "subq $1024, %%rsp" "\n\t" 1270 "movdqu 0(%2), %%xmm2" "\n\t" 1271 "movdqu 16(%2), %%xmm11" "\n\t" 1272 "pcmpistri $0x19, %%xmm2, %%xmm11" "\n\t" 1273 "pushfq" "\n\t" 1274 "popq %%rdx" "\n\t" 1275 "movq %%rcx, %0" "\n\t" 1276 "movq %%rdx, %1" "\n\t" 1277 "addq $1024, %%rsp" "\n\t" 1278 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1279 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1280 ); 1281 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1282 } 1283 1284 UInt s_pcmpistri_19 ( V128* argLU, V128* argRU ) 1285 { 1286 V128 resV; 1287 UInt resOSZACP, resECX; 1288 Bool ok 1289 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1290 zmask_from_V128(argLU), 1291 zmask_from_V128(argRU), 1292 0x19, False/*!isSTRM*/ 1293 ); 1294 assert(ok); 1295 resECX = resV.uInt[0]; 1296 return (resOSZACP << 16) | resECX; 1297 } 1298 1299 void istri_19 ( void ) 1300 { 1301 char* wot = "19"; 1302 UInt(*h)(V128*,V128*) = h_pcmpistri_19; 1303 UInt(*s)(V128*,V128*) = s_pcmpistri_19; 1304 1305 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1306 1307 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1308 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1309 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1310 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1311 1312 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1313 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1314 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1315 1316 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1317 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1318 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1319 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1320 1321 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1322 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1323 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1324 1325 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1326 1327 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1328 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1329 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 1330 1331 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 1332 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1333 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 1334 1335 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1336 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 1337 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 1338 1339 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 1340 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 1341 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 1342 1343 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1344 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1345 } 1346 1347 1348 1349 ////////////////////////////////////////////////////////// 1350 // // 1351 // main // 1352 // // 1353 ////////////////////////////////////////////////////////// 1354 1355 int main ( void ) 1356 { 1357 istri_4B(); 1358 istri_3B(); 1359 istri_09(); 1360 istri_1B(); 1361 istri_03(); 1362 istri_0D(); 1363 istri_13(); 1364 istri_45(); 1365 istri_01(); 1366 istri_39(); 1367 istri_19(); 1368 return 0; 1369 } 1370