1 2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6 #include <string.h> 7 #include <stdio.h> 8 #include <assert.h> 9 10 typedef unsigned int UInt; 11 typedef signed int Int; 12 typedef unsigned char UChar; 13 typedef unsigned short UShort; 14 typedef unsigned long long int ULong; 15 typedef UChar Bool; 16 #define False ((Bool)0) 17 #define True ((Bool)1) 18 19 //typedef unsigned char V128[16]; 20 typedef 21 union { 22 UChar uChar[16]; 23 UShort uShort[8]; 24 UInt uInt[4]; 25 UInt w32[4]; 26 } 27 V128; 28 29 #define SHIFT_O 11 30 #define SHIFT_S 7 31 #define SHIFT_Z 6 32 #define SHIFT_A 4 33 #define SHIFT_C 0 34 #define SHIFT_P 2 35 36 #define MASK_O (1ULL << SHIFT_O) 37 #define MASK_S (1ULL << SHIFT_S) 38 #define MASK_Z (1ULL << SHIFT_Z) 39 #define MASK_A (1ULL << SHIFT_A) 40 #define MASK_C (1ULL << SHIFT_C) 41 #define MASK_P (1ULL << SHIFT_P) 42 43 44 UInt clz32 ( UInt x ) 45 { 46 Int y, m, n; 47 y = -(x >> 16); 48 m = (y >> 16) & 16; 49 n = 16 - m; 50 x = x >> m; 51 y = x - 0x100; 52 m = (y >> 16) & 8; 53 n = n + m; 54 x = x << m; 55 y = x - 0x1000; 56 m = (y >> 16) & 4; 57 n = n + m; 58 x = x << m; 59 y = x - 0x4000; 60 m = (y >> 16) & 2; 61 n = n + m; 62 x = x << m; 63 y = x >> 14; 64 m = y & ~(y >> 1); 65 return n + 2 - m; 66 } 67 68 UInt ctz32 ( UInt x ) 69 { 70 return 32 - clz32((~x) & (x-1)); 71 } 72 73 void expand ( V128* dst, char* summary ) 74 { 75 Int i; 76 assert( strlen(summary) == 16 ); 77 for (i = 0; i < 16; i++) { 78 UChar xx = 0; 79 UChar x = summary[15-i]; 80 if (x >= '0' && x <= '9') { xx = x - '0'; } 81 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 82 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 83 else assert(0); 84 85 assert(xx < 16); 86 xx = (xx << 4) | xx; 87 assert(xx < 256); 88 dst->uChar[i] = xx; 89 } 90 } 91 92 void try_istri ( char* which, 93 UInt(*h_fn)(V128*,V128*), 94 UInt(*s_fn)(V128*,V128*), 95 char* summL, char* summR ) 96 { 97 assert(strlen(which) == 2); 98 V128 argL, argR; 99 expand(&argL, summL); 100 expand(&argR, summR); 101 UInt h_res = h_fn(&argL, &argR); 102 UInt s_res = s_fn(&argL, &argR); 103 printf("istri %s %s %s -> %08x %08x %s\n", 104 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 105 } 106 107 UInt zmask_from_V128 ( V128* arg ) 108 { 109 UInt i, res = 0; 110 for (i = 0; i < 8; i++) { 111 res |= ((arg->uShort[i] == 0) ? 1 : 0) << i; 112 } 113 return res; 114 } 115 116 ////////////////////////////////////////////////////////// 117 // // 118 // GENERAL // 119 // // 120 ////////////////////////////////////////////////////////// 121 122 123 /* Given partial results from a 16-bit pcmpXstrX operation (intRes1, 124 basically), generate an I- or M-format output value, also the new 125 OSZACP flags. */ 126 static 127 void PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV, 128 /*OUT*/UInt* resOSZACP, 129 UInt intRes1, 130 UInt zmaskL, UInt zmaskR, 131 UInt validL, 132 UInt pol, UInt idx ) 133 { 134 assert((pol >> 2) == 0); 135 assert((idx >> 1) == 0); 136 137 UInt intRes2 = 0; 138 switch (pol) { 139 case 0: intRes2 = intRes1; break; // pol + 140 case 1: intRes2 = ~intRes1; break; // pol - 141 case 2: intRes2 = intRes1; break; // pol m+ 142 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 143 } 144 intRes2 &= 0xFF; 145 146 // generate I-format output (an index in ECX) 147 // generate ecx value 148 UInt newECX = 0; 149 if (idx) { 150 // index of ms-1-bit 151 newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2)); 152 } else { 153 // index of ls-1-bit 154 newECX = intRes2 == 0 ? 8 : ctz32(intRes2); 155 } 156 157 resV->w32[0] = newECX; 158 resV->w32[1] = 0; 159 resV->w32[2] = 0; 160 resV->w32[3] = 0; 161 162 // generate new flags, common to all ISTRI and ISTRM cases 163 *resOSZACP // A, P are zero 164 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 165 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 166 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 167 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 168 } 169 170 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 171 variants on 16-bit characters. 172 173 For xSTRI variants, the new ECX value is placed in the 32 bits 174 pointed to by *resV, and the top 96 bits are zeroed. For xSTRM 175 variants, the result is a 128 bit value and is placed at *resV in 176 the obvious way. 177 178 For all variants, the new OSZACP value is placed at *resOSZACP. 179 180 argLV and argRV are the vector args. The caller must prepare a 181 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 182 must be 1 for each zero byte of of the respective arg. For ESTRx 183 variants this is derived from the explicit length indication, and 184 must be 0 in all places except at the bit index corresponding to 185 the valid length (0 .. 8). If the valid length is 8 then the 186 mask must be all zeroes. In all cases, bits 31:8 must be zero. 187 188 imm8 is the original immediate from the instruction. isSTRM 189 indicates whether this is a xSTRM or xSTRI variant, which controls 190 how much of *res is written. 191 192 If the given imm8 case can be handled, the return value is True. 193 If not, False is returned, and neither *res not *resOSZACP are 194 altered. 195 */ 196 197 Bool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV, 198 /*OUT*/UInt* resOSZACP, 199 V128* argLV, V128* argRV, 200 UInt zmaskL, UInt zmaskR, 201 UInt imm8, Bool isxSTRM ) 202 { 203 assert(imm8 < 0x80); 204 assert((zmaskL >> 8) == 0); 205 assert((zmaskR >> 8) == 0); 206 207 /* Explicitly reject any imm8 values that haven't been validated, 208 even if they would probably work. Life is too short to have 209 unvalidated cases in the code base. */ 210 switch (imm8) { 211 case 0x01: 212 case 0x03: case 0x09: case 0x0B: case 0x0D: case 0x13: 213 case 0x1B: case 0x39: case 0x3B: case 0x45: case 0x4B: 214 break; 215 default: 216 return False; 217 } 218 219 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 220 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 221 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 222 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 223 224 /*----------------------------------------*/ 225 /*-- strcmp on wide data --*/ 226 /*----------------------------------------*/ 227 228 if (agg == 2/*equal each, aka strcmp*/ 229 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 230 Int i; 231 UShort* argL = (UShort*)argLV; 232 UShort* argR = (UShort*)argRV; 233 UInt boolResII = 0; 234 for (i = 7; i >= 0; i--) { 235 UShort cL = argL[i]; 236 UShort cR = argR[i]; 237 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 238 } 239 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 240 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 241 242 // do invalidation, common to all equal-each cases 243 UInt intRes1 244 = (boolResII & validL & validR) // if both valid, use cmpres 245 | (~ (validL | validR)); // if both invalid, force 1 246 // else force 0 247 intRes1 &= 0xFF; 248 249 // generate I-format output 250 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 251 resV, resOSZACP, 252 intRes1, zmaskL, zmaskR, validL, pol, idx 253 ); 254 255 return True; 256 } 257 258 /*----------------------------------------*/ 259 /*-- set membership on wide data --*/ 260 /*----------------------------------------*/ 261 262 if (agg == 0/*equal any, aka find chars in a set*/ 263 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 264 /* argL: the string, argR: charset */ 265 UInt si, ci; 266 UShort* argL = (UShort*)argLV; 267 UShort* argR = (UShort*)argRV; 268 UInt boolRes = 0; 269 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 270 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 271 272 for (si = 0; si < 8; si++) { 273 if ((validL & (1 << si)) == 0) 274 // run off the end of the string. 275 break; 276 UInt m = 0; 277 for (ci = 0; ci < 8; ci++) { 278 if ((validR & (1 << ci)) == 0) break; 279 if (argR[ci] == argL[si]) { m = 1; break; } 280 } 281 boolRes |= (m << si); 282 } 283 284 // boolRes is "pre-invalidated" 285 UInt intRes1 = boolRes & 0xFF; 286 287 // generate I-format output 288 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 289 resV, resOSZACP, 290 intRes1, zmaskL, zmaskR, validL, pol, idx 291 ); 292 293 return True; 294 } 295 296 /*----------------------------------------*/ 297 /*-- substring search on wide data --*/ 298 /*----------------------------------------*/ 299 300 if (agg == 3/*equal ordered, aka substring search*/ 301 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 302 303 /* argL: haystack, argR: needle */ 304 UInt ni, hi; 305 UShort* argL = (UShort*)argLV; 306 UShort* argR = (UShort*)argRV; 307 UInt boolRes = 0; 308 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 309 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 310 for (hi = 0; hi < 8; hi++) { 311 UInt m = 1; 312 for (ni = 0; ni < 8; ni++) { 313 if ((validR & (1 << ni)) == 0) break; 314 UInt i = ni + hi; 315 if (i >= 8) break; 316 if (argL[i] != argR[ni]) { m = 0; break; } 317 } 318 boolRes |= (m << hi); 319 if ((validL & (1 << hi)) == 0) 320 // run off the end of the haystack 321 break; 322 } 323 324 // boolRes is "pre-invalidated" 325 UInt intRes1 = boolRes & 0xFF; 326 327 // generate I-format output 328 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 329 resV, resOSZACP, 330 intRes1, zmaskL, zmaskR, validL, pol, idx 331 ); 332 333 return True; 334 } 335 336 /*----------------------------------------*/ 337 /*-- ranges, unsigned wide data --*/ 338 /*----------------------------------------*/ 339 340 if (agg == 1/*ranges*/ 341 && fmt == 1/*uw*/) { 342 343 /* argL: string, argR: range-pairs */ 344 UInt ri, si; 345 UShort* argL = (UShort*)argLV; 346 UShort* argR = (UShort*)argRV; 347 UInt boolRes = 0; 348 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 349 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 350 for (si = 0; si < 8; si++) { 351 if ((validL & (1 << si)) == 0) 352 // run off the end of the string 353 break; 354 UInt m = 0; 355 for (ri = 0; ri < 8; ri += 2) { 356 if ((validR & (3 << ri)) != (3 << ri)) break; 357 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 358 m = 1; break; 359 } 360 } 361 boolRes |= (m << si); 362 } 363 364 // boolRes is "pre-invalidated" 365 UInt intRes1 = boolRes & 0xFF; 366 367 // generate I-format output 368 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 369 resV, resOSZACP, 370 intRes1, zmaskL, zmaskR, validL, pol, idx 371 ); 372 373 return True; 374 } 375 376 return False; 377 } 378 379 ////////////////////////////////////////////////////////// 380 // // 381 // ISTRI_4B // 382 // // 383 ////////////////////////////////////////////////////////// 384 385 UInt h_pcmpistri_4B ( V128* argL, V128* argR ) 386 { 387 V128 block[2]; 388 memcpy(&block[0], argL, sizeof(V128)); 389 memcpy(&block[1], argR, sizeof(V128)); 390 ULong res, flags; 391 __asm__ __volatile__( 392 "subq $1024, %%rsp" "\n\t" 393 "movdqu 0(%2), %%xmm2" "\n\t" 394 "movdqu 16(%2), %%xmm11" "\n\t" 395 "pcmpistri $0x4B, %%xmm2, %%xmm11" "\n\t" 396 "pushfq" "\n\t" 397 "popq %%rdx" "\n\t" 398 "movq %%rcx, %0" "\n\t" 399 "movq %%rdx, %1" "\n\t" 400 "addq $1024, %%rsp" "\n\t" 401 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 402 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 403 ); 404 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 405 } 406 407 UInt s_pcmpistri_4B ( V128* argLU, V128* argRU ) 408 { 409 V128 resV; 410 UInt resOSZACP, resECX; 411 Bool ok 412 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 413 zmask_from_V128(argLU), 414 zmask_from_V128(argRU), 415 0x4B, False/*!isSTRM*/ 416 ); 417 assert(ok); 418 resECX = resV.uInt[0]; 419 return (resOSZACP << 16) | resECX; 420 } 421 422 void istri_4B ( void ) 423 { 424 char* wot = "4B"; 425 UInt(*h)(V128*,V128*) = h_pcmpistri_4B; 426 UInt(*s)(V128*,V128*) = s_pcmpistri_4B; 427 428 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 429 430 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 431 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 432 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 433 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 434 435 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 436 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 437 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 438 439 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 440 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 441 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 442 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 443 444 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 445 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 446 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 447 448 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 449 450 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 451 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 452 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 453 454 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 455 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 456 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 457 458 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 459 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 460 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 461 462 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 463 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 464 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 465 466 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 467 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 468 } 469 470 ////////////////////////////////////////////////////////// 471 // // 472 // ISTRI_3B // 473 // // 474 ////////////////////////////////////////////////////////// 475 476 UInt h_pcmpistri_3B ( V128* argL, V128* argR ) 477 { 478 V128 block[2]; 479 memcpy(&block[0], argL, sizeof(V128)); 480 memcpy(&block[1], argR, sizeof(V128)); 481 ULong res, flags; 482 __asm__ __volatile__( 483 "subq $1024, %%rsp" "\n\t" 484 "movdqu 0(%2), %%xmm2" "\n\t" 485 "movdqu 16(%2), %%xmm11" "\n\t" 486 "pcmpistri $0x3B, %%xmm2, %%xmm11" "\n\t" 487 "pushfq" "\n\t" 488 "popq %%rdx" "\n\t" 489 "movq %%rcx, %0" "\n\t" 490 "movq %%rdx, %1" "\n\t" 491 "addq $1024, %%rsp" "\n\t" 492 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 493 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 494 ); 495 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 496 } 497 498 UInt s_pcmpistri_3B ( V128* argLU, V128* argRU ) 499 { 500 V128 resV; 501 UInt resOSZACP, resECX; 502 Bool ok 503 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 504 zmask_from_V128(argLU), 505 zmask_from_V128(argRU), 506 0x3B, False/*!isSTRM*/ 507 ); 508 assert(ok); 509 resECX = resV.uInt[0]; 510 return (resOSZACP << 16) | resECX; 511 } 512 513 void istri_3B ( void ) 514 { 515 char* wot = "3B"; 516 UInt(*h)(V128*,V128*) = h_pcmpistri_3B; 517 UInt(*s)(V128*,V128*) = s_pcmpistri_3B; 518 519 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 520 521 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 522 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 523 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 524 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 525 526 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 527 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 528 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 529 530 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 531 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 532 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 533 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 534 535 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 536 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 537 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 538 539 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 540 541 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 542 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 543 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 544 545 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 546 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 547 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 548 549 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 550 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 551 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 552 553 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 554 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 555 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 556 557 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 558 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 559 } 560 561 562 563 ////////////////////////////////////////////////////////// 564 // // 565 // ISTRI_0D // 566 // // 567 ////////////////////////////////////////////////////////// 568 569 __attribute__((noinline)) 570 UInt h_pcmpistri_0D ( V128* argL, V128* argR ) 571 { 572 V128 block[2]; 573 memcpy(&block[0], argL, sizeof(V128)); 574 memcpy(&block[1], argR, sizeof(V128)); 575 ULong res = 0, flags = 0; 576 __asm__ __volatile__( 577 "movdqu 0(%2), %%xmm2" "\n\t" 578 "movdqu 16(%2), %%xmm11" "\n\t" 579 "pcmpistri $0x0D, %%xmm2, %%xmm11" "\n\t" 580 //"pcmpistrm $0x0D, %%xmm2, %%xmm11" "\n\t" 581 //"movd %%xmm0, %%ecx" "\n\t" 582 "pushfq" "\n\t" 583 "popq %%rdx" "\n\t" 584 "movq %%rcx, %0" "\n\t" 585 "movq %%rdx, %1" "\n\t" 586 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 587 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 588 ); 589 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 590 } 591 592 UInt s_pcmpistri_0D ( V128* argLU, V128* argRU ) 593 { 594 V128 resV; 595 UInt resOSZACP, resECX; 596 Bool ok 597 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 598 zmask_from_V128(argLU), 599 zmask_from_V128(argRU), 600 0x0D, False/*!isSTRM*/ 601 ); 602 assert(ok); 603 resECX = resV.uInt[0]; 604 return (resOSZACP << 16) | resECX; 605 } 606 607 void istri_0D ( void ) 608 { 609 char* wot = "0D"; 610 UInt(*h)(V128*,V128*) = h_pcmpistri_0D; 611 UInt(*s)(V128*,V128*) = s_pcmpistri_0D; 612 613 try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 614 615 try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef"); 616 617 try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 618 try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef"); 619 try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef"); 620 621 try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd"); 622 623 try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd"); 624 try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd"); 625 try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd"); 626 627 try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd"); 628 try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd"); 629 try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd"); 630 631 try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd"); 632 try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd"); 633 634 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 635 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 636 try_istri(wot,h,s, "1111111111111234", "0000000000001111"); 637 638 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 639 try_istri(wot,h,s, "0a11111111111111", "000000000000000a"); 640 try_istri(wot,h,s, "0b11111111111111", "000000000000000a"); 641 642 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 643 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 644 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 645 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 646 } 647 648 649 ////////////////////////////////////////////////////////// 650 // // 651 // ISTRI_09 // 652 // // 653 ////////////////////////////////////////////////////////// 654 655 UInt h_pcmpistri_09 ( V128* argL, V128* argR ) 656 { 657 V128 block[2]; 658 memcpy(&block[0], argL, sizeof(V128)); 659 memcpy(&block[1], argR, sizeof(V128)); 660 ULong res, flags; 661 __asm__ __volatile__( 662 "subq $1024, %%rsp" "\n\t" 663 "movdqu 0(%2), %%xmm2" "\n\t" 664 "movdqu 16(%2), %%xmm11" "\n\t" 665 "pcmpistri $0x09, %%xmm2, %%xmm11" "\n\t" 666 "pushfq" "\n\t" 667 "popq %%rdx" "\n\t" 668 "movq %%rcx, %0" "\n\t" 669 "movq %%rdx, %1" "\n\t" 670 "addq $1024, %%rsp" "\n\t" 671 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 672 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 673 ); 674 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 675 } 676 677 UInt s_pcmpistri_09 ( V128* argLU, V128* argRU ) 678 { 679 V128 resV; 680 UInt resOSZACP, resECX; 681 Bool ok 682 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 683 zmask_from_V128(argLU), 684 zmask_from_V128(argRU), 685 0x09, False/*!isSTRM*/ 686 ); 687 assert(ok); 688 resECX = resV.uInt[0]; 689 return (resOSZACP << 16) | resECX; 690 } 691 692 void istri_09 ( void ) 693 { 694 char* wot = "09"; 695 UInt(*h)(V128*,V128*) = h_pcmpistri_09; 696 UInt(*s)(V128*,V128*) = s_pcmpistri_09; 697 698 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 699 700 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 701 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 702 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 703 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 704 705 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 706 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 707 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 708 709 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 710 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 711 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 712 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 713 714 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 715 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 716 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 717 718 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 719 720 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 721 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 722 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 723 724 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 725 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 726 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 727 728 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 729 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 730 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 731 732 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 733 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 734 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 735 736 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 737 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 738 } 739 740 741 742 ////////////////////////////////////////////////////////// 743 // // 744 // ISTRI_1B // 745 // // 746 ////////////////////////////////////////////////////////// 747 748 UInt h_pcmpistri_1B ( V128* argL, V128* argR ) 749 { 750 V128 block[2]; 751 memcpy(&block[0], argL, sizeof(V128)); 752 memcpy(&block[1], argR, sizeof(V128)); 753 ULong res, flags; 754 __asm__ __volatile__( 755 "subq $1024, %%rsp" "\n\t" 756 "movdqu 0(%2), %%xmm2" "\n\t" 757 "movdqu 16(%2), %%xmm11" "\n\t" 758 "pcmpistri $0x1B, %%xmm2, %%xmm11" "\n\t" 759 "pushfq" "\n\t" 760 "popq %%rdx" "\n\t" 761 "movq %%rcx, %0" "\n\t" 762 "movq %%rdx, %1" "\n\t" 763 "addq $1024, %%rsp" "\n\t" 764 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 765 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 766 ); 767 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 768 } 769 770 UInt s_pcmpistri_1B ( V128* argLU, V128* argRU ) 771 { 772 V128 resV; 773 UInt resOSZACP, resECX; 774 Bool ok 775 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 776 zmask_from_V128(argLU), 777 zmask_from_V128(argRU), 778 0x1B, False/*!isSTRM*/ 779 ); 780 assert(ok); 781 resECX = resV.uInt[0]; 782 return (resOSZACP << 16) | resECX; 783 } 784 785 void istri_1B ( void ) 786 { 787 char* wot = "1B"; 788 UInt(*h)(V128*,V128*) = h_pcmpistri_1B; 789 UInt(*s)(V128*,V128*) = s_pcmpistri_1B; 790 791 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 792 793 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 794 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 795 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 796 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 797 798 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 799 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 800 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 801 802 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 803 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 804 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 805 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 806 807 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 808 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 809 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 810 811 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 812 813 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 814 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 815 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 816 817 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 818 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 819 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 820 821 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 822 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 823 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 824 825 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 826 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 827 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 828 829 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 830 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 831 } 832 833 834 835 ////////////////////////////////////////////////////////// 836 // // 837 // ISTRI_03 // 838 // // 839 ////////////////////////////////////////////////////////// 840 841 UInt h_pcmpistri_03 ( V128* argL, V128* argR ) 842 { 843 V128 block[2]; 844 memcpy(&block[0], argL, sizeof(V128)); 845 memcpy(&block[1], argR, sizeof(V128)); 846 ULong res, flags; 847 __asm__ __volatile__( 848 "subq $1024, %%rsp" "\n\t" 849 "movdqu 0(%2), %%xmm2" "\n\t" 850 "movdqu 16(%2), %%xmm11" "\n\t" 851 "pcmpistri $0x03, %%xmm2, %%xmm11" "\n\t" 852 //"pcmpistrm $0x03, %%xmm2, %%xmm11" "\n\t" 853 //"movd %%xmm0, %%ecx" "\n\t" 854 "pushfq" "\n\t" 855 "popq %%rdx" "\n\t" 856 "movq %%rcx, %0" "\n\t" 857 "movq %%rdx, %1" "\n\t" 858 "addq $1024, %%rsp" "\n\t" 859 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 860 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 861 ); 862 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 863 } 864 865 UInt s_pcmpistri_03 ( V128* argLU, V128* argRU ) 866 { 867 V128 resV; 868 UInt resOSZACP, resECX; 869 Bool ok 870 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 871 zmask_from_V128(argLU), 872 zmask_from_V128(argRU), 873 0x03, False/*!isSTRM*/ 874 ); 875 assert(ok); 876 resECX = resV.uInt[0]; 877 return (resOSZACP << 16) | resECX; 878 } 879 880 void istri_03 ( void ) 881 { 882 char* wot = "03"; 883 UInt(*h)(V128*,V128*) = h_pcmpistri_03; 884 UInt(*s)(V128*,V128*) = s_pcmpistri_03; 885 886 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 887 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 888 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 889 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 890 891 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 892 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 893 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 894 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 895 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 896 897 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 898 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 899 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 900 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 901 902 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 903 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 904 905 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 906 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 907 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 908 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 909 910 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 911 912 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 913 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 914 } 915 916 917 ////////////////////////////////////////////////////////// 918 // // 919 // ISTRI_13 // 920 // // 921 ////////////////////////////////////////////////////////// 922 923 UInt h_pcmpistri_13 ( V128* argL, V128* argR ) 924 { 925 V128 block[2]; 926 memcpy(&block[0], argL, sizeof(V128)); 927 memcpy(&block[1], argR, sizeof(V128)); 928 ULong res, flags; 929 __asm__ __volatile__( 930 "subq $1024, %%rsp" "\n\t" 931 "movdqu 0(%2), %%xmm2" "\n\t" 932 "movdqu 16(%2), %%xmm11" "\n\t" 933 "pcmpistri $0x13, %%xmm2, %%xmm11" "\n\t" 934 //"pcmpistrm $0x13, %%xmm2, %%xmm11" "\n\t" 935 //"movd %%xmm0, %%ecx" "\n\t" 936 "pushfq" "\n\t" 937 "popq %%rdx" "\n\t" 938 "movq %%rcx, %0" "\n\t" 939 "movq %%rdx, %1" "\n\t" 940 "addq $1024, %%rsp" "\n\t" 941 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 942 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 943 ); 944 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 945 } 946 947 UInt s_pcmpistri_13 ( V128* argLU, V128* argRU ) 948 { 949 V128 resV; 950 UInt resOSZACP, resECX; 951 Bool ok 952 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 953 zmask_from_V128(argLU), 954 zmask_from_V128(argRU), 955 0x13, False/*!isSTRM*/ 956 ); 957 assert(ok); 958 resECX = resV.uInt[0]; 959 return (resOSZACP << 16) | resECX; 960 } 961 962 void istri_13 ( void ) 963 { 964 char* wot = "13"; 965 UInt(*h)(V128*,V128*) = h_pcmpistri_13; 966 UInt(*s)(V128*,V128*) = s_pcmpistri_13; 967 968 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 969 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 970 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 971 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 972 973 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 974 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 975 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 976 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 977 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 978 979 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 980 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 981 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 982 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 983 984 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 985 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 986 987 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 988 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 989 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 990 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 991 992 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 993 994 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 995 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 996 } 997 998 999 1000 ////////////////////////////////////////////////////////// 1001 // // 1002 // ISTRI_45 // 1003 // // 1004 ////////////////////////////////////////////////////////// 1005 1006 UInt h_pcmpistri_45 ( V128* argL, V128* argR ) 1007 { 1008 V128 block[2]; 1009 memcpy(&block[0], argL, sizeof(V128)); 1010 memcpy(&block[1], argR, sizeof(V128)); 1011 ULong res, flags; 1012 __asm__ __volatile__( 1013 "subq $1024, %%rsp" "\n\t" 1014 "movdqu 0(%2), %%xmm2" "\n\t" 1015 "movdqu 16(%2), %%xmm11" "\n\t" 1016 "pcmpistri $0x45, %%xmm2, %%xmm11" "\n\t" 1017 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1018 //"movd %%xmm0, %%ecx" "\n\t" 1019 "pushfq" "\n\t" 1020 "popq %%rdx" "\n\t" 1021 "movq %%rcx, %0" "\n\t" 1022 "movq %%rdx, %1" "\n\t" 1023 "addq $1024, %%rsp" "\n\t" 1024 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1025 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1026 ); 1027 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1028 } 1029 1030 UInt s_pcmpistri_45 ( V128* argLU, V128* argRU ) 1031 { 1032 V128 resV; 1033 UInt resOSZACP, resECX; 1034 Bool ok 1035 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1036 zmask_from_V128(argLU), 1037 zmask_from_V128(argRU), 1038 0x45, False/*!isSTRM*/ 1039 ); 1040 assert(ok); 1041 resECX = resV.uInt[0]; 1042 return (resOSZACP << 16) | resECX; 1043 } 1044 1045 void istri_45 ( void ) 1046 { 1047 char* wot = "45"; 1048 UInt(*h)(V128*,V128*) = h_pcmpistri_45; 1049 UInt(*s)(V128*,V128*) = s_pcmpistri_45; 1050 1051 try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc"); 1052 try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb"); 1053 try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb"); 1054 try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb"); 1055 1056 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 1057 try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb"); 1058 try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb"); 1059 try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb"); 1060 try_istri(wot,h,s, "0000000000000000", "000000000000ccbb"); 1061 1062 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1063 1064 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 1065 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb"); 1066 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb"); 1067 1068 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb"); 1069 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb"); 1070 try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb"); 1071 1072 try_istri(wot,h,s, "0011223344556677", "0000997755442211"); 1073 try_istri(wot,h,s, "1122334455667711", "0000997755442211"); 1074 1075 try_istri(wot,h,s, "0011223344556677", "0000aa8866553322"); 1076 try_istri(wot,h,s, "1122334455667711", "0000aa8866553322"); 1077 } 1078 1079 1080 ////////////////////////////////////////////////////////// 1081 // // 1082 // ISTRI_01 // 1083 // // 1084 ////////////////////////////////////////////////////////// 1085 1086 UInt h_pcmpistri_01 ( V128* argL, V128* argR ) 1087 { 1088 V128 block[2]; 1089 memcpy(&block[0], argL, sizeof(V128)); 1090 memcpy(&block[1], argR, sizeof(V128)); 1091 ULong res, flags; 1092 __asm__ __volatile__( 1093 "subq $1024, %%rsp" "\n\t" 1094 "movdqu 0(%2), %%xmm2" "\n\t" 1095 "movdqu 16(%2), %%xmm11" "\n\t" 1096 "pcmpistri $0x01, %%xmm2, %%xmm11" "\n\t" 1097 //"pcmpistrm $0x01, %%xmm2, %%xmm11" "\n\t" 1098 //"movd %%xmm0, %%ecx" "\n\t" 1099 "pushfq" "\n\t" 1100 "popq %%rdx" "\n\t" 1101 "movq %%rcx, %0" "\n\t" 1102 "movq %%rdx, %1" "\n\t" 1103 "addq $1024, %%rsp" "\n\t" 1104 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1105 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1106 ); 1107 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1108 } 1109 1110 UInt s_pcmpistri_01 ( V128* argLU, V128* argRU ) 1111 { 1112 V128 resV; 1113 UInt resOSZACP, resECX; 1114 Bool ok 1115 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1116 zmask_from_V128(argLU), 1117 zmask_from_V128(argRU), 1118 0x01, False/*!isSTRM*/ 1119 ); 1120 assert(ok); 1121 resECX = resV.uInt[0]; 1122 return (resOSZACP << 16) | resECX; 1123 } 1124 1125 void istri_01 ( void ) 1126 { 1127 char* wot = "01"; 1128 UInt(*h)(V128*,V128*) = h_pcmpistri_01; 1129 UInt(*s)(V128*,V128*) = s_pcmpistri_01; 1130 1131 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 1132 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 1133 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 1134 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1135 1136 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 1137 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 1138 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 1139 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 1140 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 1141 1142 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 1143 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 1144 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 1145 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 1146 1147 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1148 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1149 1150 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1151 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1152 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 1153 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 1154 1155 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 1156 1157 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1158 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1159 } 1160 1161 1162 ////////////////////////////////////////////////////////// 1163 // // 1164 // ISTRI_39 // 1165 // // 1166 ////////////////////////////////////////////////////////// 1167 1168 UInt h_pcmpistri_39 ( V128* argL, V128* argR ) 1169 { 1170 V128 block[2]; 1171 memcpy(&block[0], argL, sizeof(V128)); 1172 memcpy(&block[1], argR, sizeof(V128)); 1173 ULong res, flags; 1174 __asm__ __volatile__( 1175 "subq $1024, %%rsp" "\n\t" 1176 "movdqu 0(%2), %%xmm2" "\n\t" 1177 "movdqu 16(%2), %%xmm11" "\n\t" 1178 "pcmpistri $0x39, %%xmm2, %%xmm11" "\n\t" 1179 "pushfq" "\n\t" 1180 "popq %%rdx" "\n\t" 1181 "movq %%rcx, %0" "\n\t" 1182 "movq %%rdx, %1" "\n\t" 1183 "addq $1024, %%rsp" "\n\t" 1184 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1185 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1186 ); 1187 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1188 } 1189 1190 UInt s_pcmpistri_39 ( V128* argLU, V128* argRU ) 1191 { 1192 V128 resV; 1193 UInt resOSZACP, resECX; 1194 Bool ok 1195 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1196 zmask_from_V128(argLU), 1197 zmask_from_V128(argRU), 1198 0x39, False/*!isSTRM*/ 1199 ); 1200 assert(ok); 1201 resECX = resV.uInt[0]; 1202 return (resOSZACP << 16) | resECX; 1203 } 1204 1205 void istri_39 ( void ) 1206 { 1207 char* wot = "39"; 1208 UInt(*h)(V128*,V128*) = h_pcmpistri_39; 1209 UInt(*s)(V128*,V128*) = s_pcmpistri_39; 1210 1211 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1212 1213 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1214 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1215 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1216 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1217 1218 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1219 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1220 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1221 1222 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1223 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1224 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1225 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1226 1227 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1228 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1229 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1230 1231 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1232 1233 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1234 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1235 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 1236 1237 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 1238 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1239 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 1240 1241 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1242 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 1243 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 1244 1245 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 1246 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 1247 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 1248 1249 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1250 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1251 } 1252 1253 1254 1255 ////////////////////////////////////////////////////////// 1256 // // 1257 // main // 1258 // // 1259 ////////////////////////////////////////////////////////// 1260 1261 int main ( void ) 1262 { 1263 istri_4B(); 1264 istri_3B(); 1265 istri_09(); 1266 istri_1B(); 1267 istri_03(); 1268 istri_0D(); 1269 istri_13(); 1270 istri_45(); 1271 istri_01(); 1272 istri_39(); 1273 return 0; 1274 } 1275