Home | History | Annotate | Download | only in amd64
      1 
      2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
      3    pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
      4    aspect. */
      5 
      6 #include <string.h>
      7 #include <stdio.h>
      8 #include <assert.h>
      9 
     10 typedef  unsigned int   UInt;
     11 typedef  signed int     Int;
     12 typedef  unsigned char  UChar;
     13 typedef  signed char    Char;
     14 typedef  unsigned long long int ULong;
     15 typedef  UChar          Bool;
     16 #define False ((Bool)0)
     17 #define True  ((Bool)1)
     18 
     19 //typedef  unsigned char  V128[16];
     20 typedef
     21    union {
     22       UChar uChar[16];
     23       UInt  uInt[4];
     24    }
     25    V128;
     26 
     27 #define SHIFT_O   11
     28 #define SHIFT_S   7
     29 #define SHIFT_Z   6
     30 #define SHIFT_A   4
     31 #define SHIFT_C   0
     32 #define SHIFT_P   2
     33 
     34 #define MASK_O    (1ULL << SHIFT_O)
     35 #define MASK_S    (1ULL << SHIFT_S)
     36 #define MASK_Z    (1ULL << SHIFT_Z)
     37 #define MASK_A    (1ULL << SHIFT_A)
     38 #define MASK_C    (1ULL << SHIFT_C)
     39 #define MASK_P    (1ULL << SHIFT_P)
     40 
     41 
     42 UInt clz32 ( UInt x )
     43 {
     44    Int y, m, n;
     45    y = -(x >> 16);
     46    m = (y >> 16) & 16;
     47    n = 16 - m;
     48    x = x >> m;
     49    y = x - 0x100;
     50    m = (y >> 16) & 8;
     51    n = n + m;
     52    x = x << m;
     53    y = x - 0x1000;
     54    m = (y >> 16) & 4;
     55    n = n + m;
     56    x = x << m;
     57    y = x - 0x4000;
     58    m = (y >> 16) & 2;
     59    n = n + m;
     60    x = x << m;
     61    y = x >> 14;
     62    m = y & ~(y >> 1);
     63    return n + 2 - m;
     64 }
     65 
     66 UInt ctz32 ( UInt x )
     67 {
     68    return 32 - clz32((~x) & (x-1));
     69 }
     70 
     71 void expand ( V128* dst, char* summary )
     72 {
     73    Int i;
     74    assert( strlen(summary) == 16 );
     75    for (i = 0; i < 16; i++) {
     76       UChar xx = 0;
     77       UChar x = summary[15-i];
     78       if      (x >= '0' && x <= '9') { xx = x - '0'; }
     79       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
     80       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
     81       else assert(0);
     82 
     83       assert(xx < 16);
     84       xx = (xx << 4) | xx;
     85       assert(xx < 256);
     86       dst->uChar[i] = xx;
     87    }
     88 }
     89 
     90 void try_istri ( char* which,
     91                  UInt(*h_fn)(V128*,V128*),
     92                  UInt(*s_fn)(V128*,V128*),
     93                  char* summL, char* summR )
     94 {
     95    assert(strlen(which) == 2);
     96    V128 argL, argR;
     97    expand(&argL, summL);
     98    expand(&argR, summR);
     99    UInt h_res = h_fn(&argL, &argR);
    100    UInt s_res = s_fn(&argL, &argR);
    101    printf("istri %s  %s %s -> %08x %08x %s\n",
    102           which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
    103 }
    104 
    105 UInt zmask_from_V128 ( V128* arg )
    106 {
    107    UInt i, res = 0;
    108    for (i = 0; i < 16; i++) {
    109       res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
    110    }
    111    return res;
    112 }
    113 
    114 //////////////////////////////////////////////////////////
    115 //                                                      //
    116 //                       GENERAL                        //
    117 //                                                      //
    118 //////////////////////////////////////////////////////////
    119 
    120 
    121 /* Given partial results from a pcmpXstrX operation (intRes1,
    122    basically), generate an I format (index value for ECX) output, and
    123    also the new OSZACP flags.
    124 */
    125 static
    126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
    127                                     /*OUT*/UInt* resOSZACP,
    128                                     UInt intRes1,
    129                                     UInt zmaskL, UInt zmaskR,
    130                                     UInt validL,
    131                                     UInt pol, UInt idx )
    132 {
    133    assert((pol >> 2) == 0);
    134    assert((idx >> 1) == 0);
    135 
    136    UInt intRes2 = 0;
    137    switch (pol) {
    138       case 0: intRes2 = intRes1;          break; // pol +
    139       case 1: intRes2 = ~intRes1;         break; // pol -
    140       case 2: intRes2 = intRes1;          break; // pol m+
    141       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
    142    }
    143    intRes2 &= 0xFFFF;
    144 
    145    // generate ecx value
    146    UInt newECX = 0;
    147    if (idx) {
    148      // index of ms-1-bit
    149      newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
    150    } else {
    151      // index of ls-1-bit
    152      newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
    153    }
    154 
    155    *(UInt*)(&resV[0]) = newECX;
    156 
    157    // generate new flags, common to all ISTRI and ISTRM cases
    158    *resOSZACP    // A, P are zero
    159      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
    160      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
    161      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
    162      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
    163 }
    164 
    165 
    166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
    167    variants.
    168 
    169    For xSTRI variants, the new ECX value is placed in the 32 bits
    170    pointed to by *resV.  For xSTRM variants, the result is a 128 bit
    171    value and is placed at *resV in the obvious way.
    172 
    173    For all variants, the new OSZACP value is placed at *resOSZACP.
    174 
    175    argLV and argRV are the vector args.  The caller must prepare a
    176    16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
    177    must be 1 for each zero byte of of the respective arg.  For ESTRx
    178    variants this is derived from the explicit length indication, and
    179    must be 0 in all places except at the bit index corresponding to
    180    the valid length (0 .. 16).  If the valid length is 16 then the
    181    mask must be all zeroes.  In all cases, bits 31:16 must be zero.
    182 
    183    imm8 is the original immediate from the instruction.  isSTRM
    184    indicates whether this is a xSTRM or xSTRI variant, which controls
    185    how much of *res is written.
    186 
    187    If the given imm8 case can be handled, the return value is True.
    188    If not, False is returned, and neither *res not *resOSZACP are
    189    altered.
    190 */
    191 
    192 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
    193                      /*OUT*/UInt* resOSZACP,
    194                      V128* argLV,  V128* argRV,
    195                      UInt zmaskL, UInt zmaskR,
    196                      UInt imm8,   Bool isSTRM )
    197 {
    198    assert(imm8 < 0x80);
    199    assert((zmaskL >> 16) == 0);
    200    assert((zmaskR >> 16) == 0);
    201 
    202    /* Explicitly reject any imm8 values that haven't been validated,
    203       even if they would probably work.  Life is too short to have
    204       unvalidated cases in the code base. */
    205    switch (imm8) {
    206       case 0x00: case 0x02:
    207       case 0x08: case 0x0A: case 0x0C: case 0x0E:
    208       case 0x10: case 0x12: case 0x14:
    209       case 0x18: case 0x1A:
    210       case 0x30:            case 0x34:
    211       case 0x38: case 0x3A:
    212       case 0x40: case 0x42: case 0x44: case 0x46:
    213                  case 0x4A:
    214                  case 0x62:
    215       case 0x70: case 0x72:
    216          break;
    217       default:
    218          return False;
    219    }
    220 
    221    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
    222    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
    223    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
    224    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
    225 
    226    /*----------------------------------------*/
    227    /*-- strcmp on byte data                --*/
    228    /*----------------------------------------*/
    229 
    230    if (agg == 2/*equal each, aka strcmp*/
    231        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
    232        && !isSTRM) {
    233       Int    i;
    234       UChar* argL = (UChar*)argLV;
    235       UChar* argR = (UChar*)argRV;
    236       UInt boolResII = 0;
    237       for (i = 15; i >= 0; i--) {
    238          UChar cL  = argL[i];
    239          UChar cR  = argR[i];
    240          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
    241       }
    242       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
    243       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
    244 
    245       // do invalidation, common to all equal-each cases
    246       UInt intRes1
    247          = (boolResII & validL & validR)  // if both valid, use cmpres
    248            | (~ (validL | validR));       // if both invalid, force 1
    249                                           // else force 0
    250       intRes1 &= 0xFFFF;
    251 
    252       // generate I-format output
    253       pcmpXstrX_WRK_gen_output_fmt_I(
    254          resV, resOSZACP,
    255          intRes1, zmaskL, zmaskR, validL, pol, idx
    256       );
    257 
    258       return True;
    259    }
    260 
    261    /*----------------------------------------*/
    262    /*-- set membership on byte data        --*/
    263    /*----------------------------------------*/
    264 
    265    if (agg == 0/*equal any, aka find chars in a set*/
    266        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
    267        && !isSTRM) {
    268       /* argL: the string,  argR: charset */
    269       UInt   si, ci;
    270       UChar* argL    = (UChar*)argLV;
    271       UChar* argR    = (UChar*)argRV;
    272       UInt   boolRes = 0;
    273       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
    274       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
    275 
    276       for (si = 0; si < 16; si++) {
    277          if ((validL & (1 << si)) == 0)
    278             // run off the end of the string.
    279             break;
    280          UInt m = 0;
    281          for (ci = 0; ci < 16; ci++) {
    282             if ((validR & (1 << ci)) == 0) break;
    283             if (argR[ci] == argL[si]) { m = 1; break; }
    284          }
    285          boolRes |= (m << si);
    286       }
    287 
    288       // boolRes is "pre-invalidated"
    289       UInt intRes1 = boolRes & 0xFFFF;
    290 
    291       // generate I-format output
    292       pcmpXstrX_WRK_gen_output_fmt_I(
    293          resV, resOSZACP,
    294          intRes1, zmaskL, zmaskR, validL, pol, idx
    295       );
    296 
    297       return True;
    298    }
    299 
    300    /*----------------------------------------*/
    301    /*-- substring search on byte data      --*/
    302    /*----------------------------------------*/
    303 
    304    if (agg == 3/*equal ordered, aka substring search*/
    305        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
    306        && !isSTRM) {
    307 
    308       /* argL: haystack,  argR: needle */
    309       UInt   ni, hi;
    310       UChar* argL    = (UChar*)argLV;
    311       UChar* argR    = (UChar*)argRV;
    312       UInt   boolRes = 0;
    313       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
    314       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
    315       for (hi = 0; hi < 16; hi++) {
    316          UInt m = 1;
    317          for (ni = 0; ni < 16; ni++) {
    318             if ((validR & (1 << ni)) == 0) break;
    319             UInt i = ni + hi;
    320             if (i >= 16) break;
    321             if (argL[i] != argR[ni]) { m = 0; break; }
    322          }
    323          boolRes |= (m << hi);
    324          if ((validL & (1 << hi)) == 0)
    325             // run off the end of the haystack
    326             break;
    327       }
    328 
    329       // boolRes is "pre-invalidated"
    330       UInt intRes1 = boolRes & 0xFFFF;
    331 
    332       // generate I-format output
    333       pcmpXstrX_WRK_gen_output_fmt_I(
    334          resV, resOSZACP,
    335          intRes1, zmaskL, zmaskR, validL, pol, idx
    336       );
    337 
    338       return True;
    339    }
    340 
    341    /*----------------------------------------*/
    342    /*-- ranges, unsigned byte data         --*/
    343    /*----------------------------------------*/
    344 
    345    if (agg == 1/*ranges*/
    346        && fmt == 0/*ub*/
    347        && !isSTRM) {
    348 
    349       /* argL: string,  argR: range-pairs */
    350       UInt   ri, si;
    351       UChar* argL    = (UChar*)argLV;
    352       UChar* argR    = (UChar*)argRV;
    353       UInt   boolRes = 0;
    354       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
    355       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
    356       for (si = 0; si < 16; si++) {
    357          if ((validL & (1 << si)) == 0)
    358             // run off the end of the string
    359             break;
    360          UInt m = 0;
    361          for (ri = 0; ri < 16; ri += 2) {
    362             if ((validR & (3 << ri)) != (3 << ri)) break;
    363             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
    364                m = 1; break;
    365             }
    366          }
    367          boolRes |= (m << si);
    368       }
    369 
    370       // boolRes is "pre-invalidated"
    371       UInt intRes1 = boolRes & 0xFFFF;
    372 
    373       // generate I-format output
    374       pcmpXstrX_WRK_gen_output_fmt_I(
    375          resV, resOSZACP,
    376          intRes1, zmaskL, zmaskR, validL, pol, idx
    377       );
    378 
    379       return True;
    380    }
    381 
    382    /*----------------------------------------*/
    383    /*-- ranges, signed byte data           --*/
    384    /*----------------------------------------*/
    385 
    386    if (agg == 1/*ranges*/
    387        && fmt == 2/*sb*/
    388        && !isSTRM) {
    389 
    390       /* argL: string,  argR: range-pairs */
    391       UInt   ri, si;
    392       Char*  argL    = (Char*)argLV;
    393       Char*  argR    = (Char*)argRV;
    394       UInt   boolRes = 0;
    395       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
    396       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
    397       for (si = 0; si < 16; si++) {
    398          if ((validL & (1 << si)) == 0)
    399             // run off the end of the string
    400             break;
    401          UInt m = 0;
    402          for (ri = 0; ri < 16; ri += 2) {
    403             if ((validR & (3 << ri)) != (3 << ri)) break;
    404             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
    405                m = 1; break;
    406             }
    407          }
    408          boolRes |= (m << si);
    409       }
    410 
    411       // boolRes is "pre-invalidated"
    412       UInt intRes1 = boolRes & 0xFFFF;
    413 
    414       // generate I-format output
    415       pcmpXstrX_WRK_gen_output_fmt_I(
    416          resV, resOSZACP,
    417          intRes1, zmaskL, zmaskR, validL, pol, idx
    418       );
    419 
    420       return True;
    421    }
    422 
    423    return False;
    424 }
    425 
    426 
    427 //////////////////////////////////////////////////////////
    428 //                                                      //
    429 //                       ISTRI_4A                       //
    430 //                                                      //
    431 //////////////////////////////////////////////////////////
    432 
    433 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
    434 {
    435    V128 block[2];
    436    memcpy(&block[0], argL, sizeof(V128));
    437    memcpy(&block[1], argR, sizeof(V128));
    438    ULong res, flags;
    439    __asm__ __volatile__(
    440       "subq      $1024,  %%rsp"             "\n\t"
    441       "movdqu    0(%2),  %%xmm2"            "\n\t"
    442       "movdqu    16(%2), %%xmm11"           "\n\t"
    443       "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
    444       "pushfq"                              "\n\t"
    445       "popq      %%rdx"                     "\n\t"
    446       "movq      %%rcx,  %0"                "\n\t"
    447       "movq      %%rdx,  %1"                "\n\t"
    448       "addq      $1024,  %%rsp"             "\n\t"
    449       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
    450       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
    451    );
    452    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
    453 }
    454 
    455 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
    456 {
    457    V128 resV;
    458    UInt resOSZACP, resECX;
    459    Bool ok
    460       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
    461                        zmask_from_V128(argLU),
    462                        zmask_from_V128(argRU),
    463                        0x4A, False/*!isSTRM*/
    464         );
    465    assert(ok);
    466    resECX = resV.uInt[0];
    467    return (resOSZACP << 16) | resECX;
    468 }
    469 
    470 void istri_4A ( void )
    471 {
    472    char* wot = "4A";
    473    UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
    474    UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
    475 
    476    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
    477 
    478    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    479    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    480    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
    481    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
    482 
    483    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
    484    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
    485    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
    486 
    487    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    488    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    489    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    490    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    491 
    492    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    493    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
    494    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
    495 
    496    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    497 
    498    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    499    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    500    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
    501 
    502    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
    503    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    504    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
    505 
    506    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    507    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
    508    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
    509 
    510    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
    511    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
    512    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
    513 
    514    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
    515    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
    516 }
    517 
    518 //////////////////////////////////////////////////////////
    519 //                                                      //
    520 //                       ISTRI_3A                       //
    521 //                                                      //
    522 //////////////////////////////////////////////////////////
    523 
    524 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
    525 {
    526    V128 block[2];
    527    memcpy(&block[0], argL, sizeof(V128));
    528    memcpy(&block[1], argR, sizeof(V128));
    529    ULong res, flags;
    530    __asm__ __volatile__(
    531       "subq      $1024,  %%rsp"             "\n\t"
    532       "movdqu    0(%2),  %%xmm2"            "\n\t"
    533       "movdqu    16(%2), %%xmm11"           "\n\t"
    534       "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
    535       "pushfq"                              "\n\t"
    536       "popq      %%rdx"                     "\n\t"
    537       "movq      %%rcx,  %0"                "\n\t"
    538       "movq      %%rdx,  %1"                "\n\t"
    539       "addq      $1024,  %%rsp"             "\n\t"
    540       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
    541       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
    542    );
    543    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
    544 }
    545 
    546 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
    547 {
    548    V128 resV;
    549    UInt resOSZACP, resECX;
    550    Bool ok
    551       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
    552                        zmask_from_V128(argLU),
    553                        zmask_from_V128(argRU),
    554                        0x3A, False/*!isSTRM*/
    555         );
    556    assert(ok);
    557    resECX = resV.uInt[0];
    558    return (resOSZACP << 16) | resECX;
    559 }
    560 
    561 void istri_3A ( void )
    562 {
    563    char* wot = "3A";
    564    UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
    565    UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
    566 
    567    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
    568 
    569    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    570    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    571    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
    572    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
    573 
    574    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
    575    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
    576    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
    577 
    578    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    579    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    580    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    581    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    582 
    583    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    584    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
    585    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
    586 
    587    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    588 
    589    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    590    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    591    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
    592 
    593    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
    594    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    595    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
    596 
    597    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    598    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
    599    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
    600 
    601    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
    602    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
    603    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
    604 
    605    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
    606    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
    607 }
    608 
    609 
    610 
    611 //////////////////////////////////////////////////////////
    612 //                                                      //
    613 //                       ISTRI_0C                       //
    614 //                                                      //
    615 //////////////////////////////////////////////////////////
    616 
    617 __attribute__((noinline))
    618 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
    619 {
    620    V128 block[2];
    621    memcpy(&block[0], argL, sizeof(V128));
    622    memcpy(&block[1], argR, sizeof(V128));
    623    ULong res = 0, flags = 0;
    624    __asm__ __volatile__(
    625       "movdqu    0(%2),  %%xmm2"            "\n\t"
    626       "movdqu    16(%2), %%xmm11"           "\n\t"
    627       "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
    628       //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
    629       //"movd %%xmm0, %%ecx" "\n\t"
    630       "pushfq"                              "\n\t"
    631       "popq      %%rdx"                     "\n\t"
    632       "movq      %%rcx,  %0"                "\n\t"
    633       "movq      %%rdx,  %1"                "\n\t"
    634       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
    635       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
    636    );
    637    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
    638 }
    639 
    640 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
    641 {
    642    V128 resV;
    643    UInt resOSZACP, resECX;
    644    Bool ok
    645       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
    646                        zmask_from_V128(argLU),
    647                        zmask_from_V128(argRU),
    648                        0x0C, False/*!isSTRM*/
    649         );
    650    assert(ok);
    651    resECX = resV.uInt[0];
    652    return (resOSZACP << 16) | resECX;
    653 }
    654 
    655 void istri_0C ( void )
    656 {
    657    char* wot = "0C";
    658    UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
    659    UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
    660 
    661    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
    662 
    663    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
    664 
    665    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
    666    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
    667    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
    668 
    669    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
    670 
    671    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
    672    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
    673    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
    674    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
    675    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
    676 
    677    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
    678    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
    679    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
    680 
    681    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
    682    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
    683 
    684    try_istri(wot,h,s, "1111111111111234", "0000000000000000");
    685    try_istri(wot,h,s, "1111111111111234", "0000000000000001");
    686    try_istri(wot,h,s, "1111111111111234", "0000000000000011");
    687 
    688    try_istri(wot,h,s, "1111111111111234", "1111111111111234");
    689    try_istri(wot,h,s, "a111111111111111", "000000000000000a");
    690    try_istri(wot,h,s, "b111111111111111", "000000000000000a");
    691 
    692    try_istri(wot,h,s, "b111111111111111", "0000000000000000");
    693    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
    694    try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
    695    try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
    696 }
    697 
    698 
    699 //////////////////////////////////////////////////////////
    700 //                                                      //
    701 //                       ISTRI_08                       //
    702 //                                                      //
    703 //////////////////////////////////////////////////////////
    704 
    705 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
    706 {
    707    V128 block[2];
    708    memcpy(&block[0], argL, sizeof(V128));
    709    memcpy(&block[1], argR, sizeof(V128));
    710    ULong res, flags;
    711    __asm__ __volatile__(
    712       "subq      $1024,  %%rsp"             "\n\t"
    713       "movdqu    0(%2),  %%xmm2"            "\n\t"
    714       "movdqu    16(%2), %%xmm11"           "\n\t"
    715       "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
    716       "pushfq"                              "\n\t"
    717       "popq      %%rdx"                     "\n\t"
    718       "movq      %%rcx,  %0"                "\n\t"
    719       "movq      %%rdx,  %1"                "\n\t"
    720       "addq      $1024,  %%rsp"             "\n\t"
    721       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
    722       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
    723    );
    724    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
    725 }
    726 
    727 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
    728 {
    729    V128 resV;
    730    UInt resOSZACP, resECX;
    731    Bool ok
    732       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
    733                        zmask_from_V128(argLU),
    734                        zmask_from_V128(argRU),
    735                        0x08, False/*!isSTRM*/
    736         );
    737    assert(ok);
    738    resECX = resV.uInt[0];
    739    return (resOSZACP << 16) | resECX;
    740 }
    741 
    742 void istri_08 ( void )
    743 {
    744    char* wot = "08";
    745    UInt(*h)(V128*,V128*) = h_pcmpistri_08;
    746    UInt(*s)(V128*,V128*) = s_pcmpistri_08;
    747 
    748    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
    749 
    750    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    751    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    752    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
    753    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
    754 
    755    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
    756    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
    757    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
    758 
    759    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    760    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    761    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    762    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    763 
    764    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    765    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
    766    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
    767 
    768    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    769 
    770    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    771    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    772    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
    773 
    774    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
    775    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    776    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
    777 
    778    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    779    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
    780    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
    781 
    782    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
    783    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
    784    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
    785 
    786    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
    787    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
    788 }
    789 
    790 
    791 
    792 //////////////////////////////////////////////////////////
    793 //                                                      //
    794 //                       ISTRI_18                       //
    795 //                                                      //
    796 //////////////////////////////////////////////////////////
    797 
    798 UInt h_pcmpistri_18 ( V128* argL, V128* argR )
    799 {
    800    V128 block[2];
    801    memcpy(&block[0], argL, sizeof(V128));
    802    memcpy(&block[1], argR, sizeof(V128));
    803    ULong res, flags;
    804    __asm__ __volatile__(
    805       "subq      $1024,  %%rsp"             "\n\t"
    806       "movdqu    0(%2),  %%xmm2"            "\n\t"
    807       "movdqu    16(%2), %%xmm11"           "\n\t"
    808       "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
    809       "pushfq"                              "\n\t"
    810       "popq      %%rdx"                     "\n\t"
    811       "movq      %%rcx,  %0"                "\n\t"
    812       "movq      %%rdx,  %1"                "\n\t"
    813       "addq      $1024,  %%rsp"             "\n\t"
    814       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
    815       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
    816    );
    817    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
    818 }
    819 
    820 UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
    821 {
    822    V128 resV;
    823    UInt resOSZACP, resECX;
    824    Bool ok
    825       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
    826                        zmask_from_V128(argLU),
    827                        zmask_from_V128(argRU),
    828                        0x18, False/*!isSTRM*/
    829         );
    830    assert(ok);
    831    resECX = resV.uInt[0];
    832    return (resOSZACP << 16) | resECX;
    833 }
    834 
    835 void istri_18 ( void )
    836 {
    837    char* wot = "18";
    838    UInt(*h)(V128*,V128*) = h_pcmpistri_18;
    839    UInt(*s)(V128*,V128*) = s_pcmpistri_18;
    840 
    841    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
    842 
    843    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    844    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    845    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
    846    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
    847 
    848    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
    849    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
    850    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
    851 
    852    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    853    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    854    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    855    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    856 
    857    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    858    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
    859    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
    860 
    861    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    862 
    863    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    864    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    865    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
    866 
    867    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
    868    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    869    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
    870 
    871    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    872    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
    873    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
    874 
    875    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
    876    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
    877    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
    878 
    879    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
    880    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
    881 }
    882 
    883 
    884 
    885 //////////////////////////////////////////////////////////
    886 //                                                      //
    887 //                       ISTRI_1A                       //
    888 //                                                      //
    889 //////////////////////////////////////////////////////////
    890 
    891 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
    892 {
    893    V128 block[2];
    894    memcpy(&block[0], argL, sizeof(V128));
    895    memcpy(&block[1], argR, sizeof(V128));
    896    ULong res, flags;
    897    __asm__ __volatile__(
    898       "subq      $1024,  %%rsp"             "\n\t"
    899       "movdqu    0(%2),  %%xmm2"            "\n\t"
    900       "movdqu    16(%2), %%xmm11"           "\n\t"
    901       "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
    902       "pushfq"                              "\n\t"
    903       "popq      %%rdx"                     "\n\t"
    904       "movq      %%rcx,  %0"                "\n\t"
    905       "movq      %%rdx,  %1"                "\n\t"
    906       "addq      $1024,  %%rsp"             "\n\t"
    907       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
    908       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
    909    );
    910    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
    911 }
    912 
    913 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
    914 {
    915    V128 resV;
    916    UInt resOSZACP, resECX;
    917    Bool ok
    918       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
    919                        zmask_from_V128(argLU),
    920                        zmask_from_V128(argRU),
    921                        0x1A, False/*!isSTRM*/
    922         );
    923    assert(ok);
    924    resECX = resV.uInt[0];
    925    return (resOSZACP << 16) | resECX;
    926 }
    927 
    928 void istri_1A ( void )
    929 {
    930    char* wot = "1A";
    931    UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
    932    UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
    933 
    934    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
    935 
    936    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    937    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    938    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
    939    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
    940 
    941    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
    942    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
    943    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
    944 
    945    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    946    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    947    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    948    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    949 
    950    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    951    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
    952    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
    953 
    954    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
    955 
    956    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    957    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    958    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
    959 
    960    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
    961    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
    962    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
    963 
    964    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
    965    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
    966    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
    967 
    968    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
    969    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
    970    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
    971 
    972    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
    973    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
    974 }
    975 
    976 
    977 
    978 //////////////////////////////////////////////////////////
    979 //                                                      //
    980 //                       ISTRI_02                       //
    981 //                                                      //
    982 //////////////////////////////////////////////////////////
    983 
    984 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
    985 {
    986    V128 block[2];
    987    memcpy(&block[0], argL, sizeof(V128));
    988    memcpy(&block[1], argR, sizeof(V128));
    989    ULong res, flags;
    990    __asm__ __volatile__(
    991       "subq      $1024,  %%rsp"             "\n\t"
    992       "movdqu    0(%2),  %%xmm2"            "\n\t"
    993       "movdqu    16(%2), %%xmm11"           "\n\t"
    994       "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
    995 //"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
    996 //"movd %%xmm0, %%ecx" "\n\t"
    997       "pushfq"                              "\n\t"
    998       "popq      %%rdx"                     "\n\t"
    999       "movq      %%rcx,  %0"                "\n\t"
   1000       "movq      %%rdx,  %1"                "\n\t"
   1001       "addq      $1024,  %%rsp"             "\n\t"
   1002       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1003       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1004    );
   1005    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1006 }
   1007 
   1008 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
   1009 {
   1010    V128 resV;
   1011    UInt resOSZACP, resECX;
   1012    Bool ok
   1013       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1014                        zmask_from_V128(argLU),
   1015                        zmask_from_V128(argRU),
   1016                        0x02, False/*!isSTRM*/
   1017         );
   1018    assert(ok);
   1019    resECX = resV.uInt[0];
   1020    return (resOSZACP << 16) | resECX;
   1021 }
   1022 
   1023 void istri_02 ( void )
   1024 {
   1025    char* wot = "02";
   1026    UInt(*h)(V128*,V128*) = h_pcmpistri_02;
   1027    UInt(*s)(V128*,V128*) = s_pcmpistri_02;
   1028 
   1029    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   1030    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   1031    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   1032    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   1033 
   1034    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1035    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   1036    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   1037    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   1038    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   1039 
   1040    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1041    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   1042    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   1043    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   1044 
   1045    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1046    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1047 
   1048    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   1049    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   1050    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   1051    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   1052 
   1053    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   1054 
   1055    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   1056    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   1057 }
   1058 
   1059 
   1060 //////////////////////////////////////////////////////////
   1061 //                                                      //
   1062 //                       ISTRI_12                       //
   1063 //                                                      //
   1064 //////////////////////////////////////////////////////////
   1065 
   1066 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
   1067 {
   1068    V128 block[2];
   1069    memcpy(&block[0], argL, sizeof(V128));
   1070    memcpy(&block[1], argR, sizeof(V128));
   1071    ULong res, flags;
   1072    __asm__ __volatile__(
   1073       "subq      $1024,  %%rsp"             "\n\t"
   1074       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1075       "movdqu    16(%2), %%xmm11"           "\n\t"
   1076       "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
   1077 //"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
   1078 //"movd %%xmm0, %%ecx" "\n\t"
   1079       "pushfq"                              "\n\t"
   1080       "popq      %%rdx"                     "\n\t"
   1081       "movq      %%rcx,  %0"                "\n\t"
   1082       "movq      %%rdx,  %1"                "\n\t"
   1083       "addq      $1024,  %%rsp"             "\n\t"
   1084       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1085       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1086    );
   1087    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1088 }
   1089 
   1090 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
   1091 {
   1092    V128 resV;
   1093    UInt resOSZACP, resECX;
   1094    Bool ok
   1095       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1096                        zmask_from_V128(argLU),
   1097                        zmask_from_V128(argRU),
   1098                        0x12, False/*!isSTRM*/
   1099         );
   1100    assert(ok);
   1101    resECX = resV.uInt[0];
   1102    return (resOSZACP << 16) | resECX;
   1103 }
   1104 
   1105 void istri_12 ( void )
   1106 {
   1107    char* wot = "12";
   1108    UInt(*h)(V128*,V128*) = h_pcmpistri_12;
   1109    UInt(*s)(V128*,V128*) = s_pcmpistri_12;
   1110 
   1111    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   1112    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   1113    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   1114    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   1115 
   1116    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1117    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   1118    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   1119    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   1120    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   1121 
   1122    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1123    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   1124    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   1125    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   1126 
   1127    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1128    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1129 
   1130    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   1131    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   1132    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   1133    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   1134 
   1135    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   1136 
   1137    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   1138    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   1139 }
   1140 
   1141 
   1142 
   1143 //////////////////////////////////////////////////////////
   1144 //                                                      //
   1145 //                       ISTRI_44                       //
   1146 //                                                      //
   1147 //////////////////////////////////////////////////////////
   1148 
   1149 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
   1150 {
   1151    V128 block[2];
   1152    memcpy(&block[0], argL, sizeof(V128));
   1153    memcpy(&block[1], argR, sizeof(V128));
   1154    ULong res, flags;
   1155    __asm__ __volatile__(
   1156       "subq      $1024,  %%rsp"             "\n\t"
   1157       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1158       "movdqu    16(%2), %%xmm11"           "\n\t"
   1159       "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
   1160 //"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
   1161 //"movd %%xmm0, %%ecx" "\n\t"
   1162       "pushfq"                              "\n\t"
   1163       "popq      %%rdx"                     "\n\t"
   1164       "movq      %%rcx,  %0"                "\n\t"
   1165       "movq      %%rdx,  %1"                "\n\t"
   1166       "addq      $1024,  %%rsp"             "\n\t"
   1167       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1168       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1169    );
   1170    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1171 }
   1172 
   1173 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
   1174 {
   1175    V128 resV;
   1176    UInt resOSZACP, resECX;
   1177    Bool ok
   1178       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1179                        zmask_from_V128(argLU),
   1180                        zmask_from_V128(argRU),
   1181                        0x44, False/*!isSTRM*/
   1182         );
   1183    assert(ok);
   1184    resECX = resV.uInt[0];
   1185    return (resOSZACP << 16) | resECX;
   1186 }
   1187 
   1188 void istri_44 ( void )
   1189 {
   1190    char* wot = "44";
   1191    UInt(*h)(V128*,V128*) = h_pcmpistri_44;
   1192    UInt(*s)(V128*,V128*) = s_pcmpistri_44;
   1193 
   1194    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
   1195    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
   1196    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
   1197    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
   1198 
   1199    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1200    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
   1201    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
   1202    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
   1203    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
   1204 
   1205    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1206 
   1207    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1208    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
   1209    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
   1210 
   1211    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
   1212    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
   1213    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
   1214 
   1215    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   1216    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
   1217 
   1218    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   1219    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
   1220 
   1221    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   1222    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   1223    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   1224    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
   1225 }
   1226 
   1227 
   1228 //////////////////////////////////////////////////////////
   1229 //                                                      //
   1230 //                       ISTRI_00                       //
   1231 //                                                      //
   1232 //////////////////////////////////////////////////////////
   1233 
   1234 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
   1235 {
   1236    V128 block[2];
   1237    memcpy(&block[0], argL, sizeof(V128));
   1238    memcpy(&block[1], argR, sizeof(V128));
   1239    ULong res, flags;
   1240    __asm__ __volatile__(
   1241       "subq      $1024,  %%rsp"             "\n\t"
   1242       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1243       "movdqu    16(%2), %%xmm11"           "\n\t"
   1244       "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
   1245 //"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
   1246 //"movd %%xmm0, %%ecx" "\n\t"
   1247       "pushfq"                              "\n\t"
   1248       "popq      %%rdx"                     "\n\t"
   1249       "movq      %%rcx,  %0"                "\n\t"
   1250       "movq      %%rdx,  %1"                "\n\t"
   1251       "addq      $1024,  %%rsp"             "\n\t"
   1252       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1253       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1254    );
   1255    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1256 }
   1257 
   1258 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
   1259 {
   1260    V128 resV;
   1261    UInt resOSZACP, resECX;
   1262    Bool ok
   1263       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1264                        zmask_from_V128(argLU),
   1265                        zmask_from_V128(argRU),
   1266                        0x00, False/*!isSTRM*/
   1267         );
   1268    assert(ok);
   1269    resECX = resV.uInt[0];
   1270    return (resOSZACP << 16) | resECX;
   1271 }
   1272 
   1273 void istri_00 ( void )
   1274 {
   1275    char* wot = "00";
   1276    UInt(*h)(V128*,V128*) = h_pcmpistri_00;
   1277    UInt(*s)(V128*,V128*) = s_pcmpistri_00;
   1278 
   1279    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   1280    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   1281    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   1282    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   1283 
   1284    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1285    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   1286    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   1287    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   1288    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   1289 
   1290    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1291    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   1292    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   1293    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   1294 
   1295    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1296    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1297 
   1298    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   1299    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   1300    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   1301    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   1302 
   1303    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   1304 
   1305    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   1306    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   1307 }
   1308 
   1309 
   1310 //////////////////////////////////////////////////////////
   1311 //                                                      //
   1312 //                       ISTRI_38                       //
   1313 //                                                      //
   1314 //////////////////////////////////////////////////////////
   1315 
   1316 UInt h_pcmpistri_38 ( V128* argL, V128* argR )
   1317 {
   1318    V128 block[2];
   1319    memcpy(&block[0], argL, sizeof(V128));
   1320    memcpy(&block[1], argR, sizeof(V128));
   1321    ULong res, flags;
   1322    __asm__ __volatile__(
   1323       "subq      $1024,  %%rsp"             "\n\t"
   1324       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1325       "movdqu    16(%2), %%xmm11"           "\n\t"
   1326       "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
   1327       "pushfq"                              "\n\t"
   1328       "popq      %%rdx"                     "\n\t"
   1329       "movq      %%rcx,  %0"                "\n\t"
   1330       "movq      %%rdx,  %1"                "\n\t"
   1331       "addq      $1024,  %%rsp"             "\n\t"
   1332       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1333       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1334    );
   1335    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1336 }
   1337 
   1338 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
   1339 {
   1340    V128 resV;
   1341    UInt resOSZACP, resECX;
   1342    Bool ok
   1343       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1344                        zmask_from_V128(argLU),
   1345                        zmask_from_V128(argRU),
   1346                        0x38, False/*!isSTRM*/
   1347         );
   1348    assert(ok);
   1349    resECX = resV.uInt[0];
   1350    return (resOSZACP << 16) | resECX;
   1351 }
   1352 
   1353 void istri_38 ( void )
   1354 {
   1355    char* wot = "38";
   1356    UInt(*h)(V128*,V128*) = h_pcmpistri_38;
   1357    UInt(*s)(V128*,V128*) = s_pcmpistri_38;
   1358 
   1359    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1360 
   1361    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1362    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1363    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
   1364    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
   1365 
   1366    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
   1367    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
   1368    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
   1369 
   1370    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1371    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1372    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1373    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1374 
   1375    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1376    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
   1377    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
   1378 
   1379    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1380 
   1381    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
   1382    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
   1383    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
   1384 
   1385    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
   1386    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
   1387    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
   1388 
   1389    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
   1390    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
   1391    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
   1392 
   1393    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
   1394    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
   1395    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
   1396 
   1397    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
   1398    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
   1399 }
   1400 
   1401 
   1402 
   1403 //////////////////////////////////////////////////////////
   1404 //                                                      //
   1405 //                       ISTRI_46                       //
   1406 //                                                      //
   1407 //////////////////////////////////////////////////////////
   1408 
   1409 UInt h_pcmpistri_46 ( V128* argL, V128* argR )
   1410 {
   1411    V128 block[2];
   1412    memcpy(&block[0], argL, sizeof(V128));
   1413    memcpy(&block[1], argR, sizeof(V128));
   1414    ULong res, flags;
   1415    __asm__ __volatile__(
   1416       "subq      $1024,  %%rsp"             "\n\t"
   1417       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1418       "movdqu    16(%2), %%xmm11"           "\n\t"
   1419       "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
   1420       "pushfq"                              "\n\t"
   1421       "popq      %%rdx"                     "\n\t"
   1422       "movq      %%rcx,  %0"                "\n\t"
   1423       "movq      %%rdx,  %1"                "\n\t"
   1424       "addq      $1024,  %%rsp"             "\n\t"
   1425       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1426       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1427    );
   1428    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1429 }
   1430 
   1431 UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
   1432 {
   1433    V128 resV;
   1434    UInt resOSZACP, resECX;
   1435    Bool ok
   1436       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1437                        zmask_from_V128(argLU),
   1438                        zmask_from_V128(argRU),
   1439                        0x46, False/*!isSTRM*/
   1440         );
   1441    assert(ok);
   1442    resECX = resV.uInt[0];
   1443    return (resOSZACP << 16) | resECX;
   1444 }
   1445 
   1446 void istri_46 ( void )
   1447 {
   1448    char* wot = "46";
   1449    UInt(*h)(V128*,V128*) = h_pcmpistri_46;
   1450    UInt(*s)(V128*,V128*) = s_pcmpistri_46;
   1451 
   1452    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
   1453    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
   1454    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
   1455    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
   1456 
   1457    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1458    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
   1459    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
   1460    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
   1461    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
   1462 
   1463    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1464 
   1465    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1466    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
   1467    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
   1468 
   1469    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
   1470    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
   1471    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
   1472 
   1473    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   1474    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
   1475 
   1476    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   1477    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
   1478 
   1479    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   1480    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   1481    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   1482    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
   1483 }
   1484 
   1485 
   1486 //////////////////////////////////////////////////////////
   1487 //                                                      //
   1488 //                       ISTRI_30                       //
   1489 //                                                      //
   1490 //////////////////////////////////////////////////////////
   1491 
   1492 UInt h_pcmpistri_30 ( V128* argL, V128* argR )
   1493 {
   1494    V128 block[2];
   1495    memcpy(&block[0], argL, sizeof(V128));
   1496    memcpy(&block[1], argR, sizeof(V128));
   1497    ULong res, flags;
   1498    __asm__ __volatile__(
   1499       "subq      $1024,  %%rsp"             "\n\t"
   1500       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1501       "movdqu    16(%2), %%xmm11"           "\n\t"
   1502       "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
   1503       "pushfq"                              "\n\t"
   1504       "popq      %%rdx"                     "\n\t"
   1505       "movq      %%rcx,  %0"                "\n\t"
   1506       "movq      %%rdx,  %1"                "\n\t"
   1507       "addq      $1024,  %%rsp"             "\n\t"
   1508       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1509       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1510    );
   1511    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1512 }
   1513 
   1514 UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
   1515 {
   1516    V128 resV;
   1517    UInt resOSZACP, resECX;
   1518    Bool ok
   1519       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1520                        zmask_from_V128(argLU),
   1521                        zmask_from_V128(argRU),
   1522                        0x30, False/*!isSTRM*/
   1523         );
   1524    assert(ok);
   1525    resECX = resV.uInt[0];
   1526    return (resOSZACP << 16) | resECX;
   1527 }
   1528 
   1529 void istri_30 ( void )
   1530 {
   1531    char* wot = "30";
   1532    UInt(*h)(V128*,V128*) = h_pcmpistri_30;
   1533    UInt(*s)(V128*,V128*) = s_pcmpistri_30;
   1534 
   1535    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   1536    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   1537    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   1538    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   1539 
   1540    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1541    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   1542    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   1543    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   1544    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   1545 
   1546    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1547    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   1548    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   1549    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   1550 
   1551    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1552    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1553 
   1554    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   1555    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   1556    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   1557    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   1558 
   1559    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   1560 
   1561    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   1562    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   1563 }
   1564 
   1565 
   1566 //////////////////////////////////////////////////////////
   1567 //                                                      //
   1568 //                       ISTRI_40                       //
   1569 //                                                      //
   1570 //////////////////////////////////////////////////////////
   1571 
   1572 UInt h_pcmpistri_40 ( V128* argL, V128* argR )
   1573 {
   1574    V128 block[2];
   1575    memcpy(&block[0], argL, sizeof(V128));
   1576    memcpy(&block[1], argR, sizeof(V128));
   1577    ULong res, flags;
   1578    __asm__ __volatile__(
   1579       "subq      $1024,  %%rsp"             "\n\t"
   1580       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1581       "movdqu    16(%2), %%xmm11"           "\n\t"
   1582       "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
   1583       "pushfq"                              "\n\t"
   1584       "popq      %%rdx"                     "\n\t"
   1585       "movq      %%rcx,  %0"                "\n\t"
   1586       "movq      %%rdx,  %1"                "\n\t"
   1587       "addq      $1024,  %%rsp"             "\n\t"
   1588       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1589       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1590    );
   1591    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1592 }
   1593 
   1594 UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
   1595 {
   1596    V128 resV;
   1597    UInt resOSZACP, resECX;
   1598    Bool ok
   1599       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1600                        zmask_from_V128(argLU),
   1601                        zmask_from_V128(argRU),
   1602                        0x40, False/*!isSTRM*/
   1603         );
   1604    assert(ok);
   1605    resECX = resV.uInt[0];
   1606    return (resOSZACP << 16) | resECX;
   1607 }
   1608 
   1609 void istri_40 ( void )
   1610 {
   1611    char* wot = "40";
   1612    UInt(*h)(V128*,V128*) = h_pcmpistri_40;
   1613    UInt(*s)(V128*,V128*) = s_pcmpistri_40;
   1614 
   1615    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   1616    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   1617    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   1618    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   1619 
   1620    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1621    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   1622    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   1623    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   1624    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   1625 
   1626    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1627    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   1628    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   1629    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   1630 
   1631    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1632    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1633 
   1634    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   1635    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   1636    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   1637    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   1638 
   1639    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   1640 
   1641    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   1642    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   1643 }
   1644 
   1645 
   1646 //////////////////////////////////////////////////////////
   1647 //                                                      //
   1648 //                       ISTRI_42                       //
   1649 //                                                      //
   1650 //////////////////////////////////////////////////////////
   1651 
   1652 UInt h_pcmpistri_42 ( V128* argL, V128* argR )
   1653 {
   1654    V128 block[2];
   1655    memcpy(&block[0], argL, sizeof(V128));
   1656    memcpy(&block[1], argR, sizeof(V128));
   1657    ULong res, flags;
   1658    __asm__ __volatile__(
   1659       "subq      $1024,  %%rsp"             "\n\t"
   1660       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1661       "movdqu    16(%2), %%xmm11"           "\n\t"
   1662       "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
   1663       "pushfq"                              "\n\t"
   1664       "popq      %%rdx"                     "\n\t"
   1665       "movq      %%rcx,  %0"                "\n\t"
   1666       "movq      %%rdx,  %1"                "\n\t"
   1667       "addq      $1024,  %%rsp"             "\n\t"
   1668       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1669       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1670    );
   1671    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1672 }
   1673 
   1674 UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
   1675 {
   1676    V128 resV;
   1677    UInt resOSZACP, resECX;
   1678    Bool ok
   1679       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1680                        zmask_from_V128(argLU),
   1681                        zmask_from_V128(argRU),
   1682                        0x42, False/*!isSTRM*/
   1683         );
   1684    assert(ok);
   1685    resECX = resV.uInt[0];
   1686    return (resOSZACP << 16) | resECX;
   1687 }
   1688 
   1689 void istri_42 ( void )
   1690 {
   1691    char* wot = "42";
   1692    UInt(*h)(V128*,V128*) = h_pcmpistri_42;
   1693    UInt(*s)(V128*,V128*) = s_pcmpistri_42;
   1694 
   1695    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   1696    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   1697    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   1698    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   1699 
   1700    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1701    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   1702    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   1703    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   1704    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   1705 
   1706    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   1707    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   1708    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   1709    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   1710 
   1711    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1712    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   1713 
   1714    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   1715    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   1716    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   1717    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   1718 
   1719    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   1720 
   1721    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   1722    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   1723 }
   1724 
   1725 
   1726 //////////////////////////////////////////////////////////
   1727 //                                                      //
   1728 //                       ISTRI_0E                       //
   1729 //                                                      //
   1730 //////////////////////////////////////////////////////////
   1731 
   1732 __attribute__((noinline))
   1733 UInt h_pcmpistri_0E ( V128* argL, V128* argR )
   1734 {
   1735    V128 block[2];
   1736    memcpy(&block[0], argL, sizeof(V128));
   1737    memcpy(&block[1], argR, sizeof(V128));
   1738    ULong res = 0, flags = 0;
   1739    __asm__ __volatile__(
   1740       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1741       "movdqu    16(%2), %%xmm11"           "\n\t"
   1742       "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
   1743       "pushfq"                              "\n\t"
   1744       "popq      %%rdx"                     "\n\t"
   1745       "movq      %%rcx,  %0"                "\n\t"
   1746       "movq      %%rdx,  %1"                "\n\t"
   1747       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1748       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1749    );
   1750    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1751 }
   1752 
   1753 UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
   1754 {
   1755    V128 resV;
   1756    UInt resOSZACP, resECX;
   1757    Bool ok
   1758       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1759                        zmask_from_V128(argLU),
   1760                        zmask_from_V128(argRU),
   1761                        0x0E, False/*!isSTRM*/
   1762         );
   1763    assert(ok);
   1764    resECX = resV.uInt[0];
   1765    return (resOSZACP << 16) | resECX;
   1766 }
   1767 
   1768 void istri_0E ( void )
   1769 {
   1770    char* wot = "0E";
   1771    UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
   1772    UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
   1773 
   1774    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
   1775 
   1776    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
   1777 
   1778    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
   1779    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
   1780    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
   1781 
   1782    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
   1783 
   1784    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
   1785    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
   1786    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
   1787    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
   1788    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
   1789 
   1790    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
   1791    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
   1792    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
   1793 
   1794    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
   1795    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
   1796 
   1797    try_istri(wot,h,s, "1111111111111234", "0000000000000000");
   1798    try_istri(wot,h,s, "1111111111111234", "0000000000000001");
   1799    try_istri(wot,h,s, "1111111111111234", "0000000000000011");
   1800 
   1801    try_istri(wot,h,s, "1111111111111234", "1111111111111234");
   1802    try_istri(wot,h,s, "a111111111111111", "000000000000000a");
   1803    try_istri(wot,h,s, "b111111111111111", "000000000000000a");
   1804 
   1805    try_istri(wot,h,s, "b111111111111111", "0000000000000000");
   1806    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1807    try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
   1808    try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
   1809 }
   1810 
   1811 
   1812 //////////////////////////////////////////////////////////
   1813 //                                                      //
   1814 //                       ISTRI_34                       //
   1815 //                                                      //
   1816 //////////////////////////////////////////////////////////
   1817 
   1818 UInt h_pcmpistri_34 ( V128* argL, V128* argR )
   1819 {
   1820    V128 block[2];
   1821    memcpy(&block[0], argL, sizeof(V128));
   1822    memcpy(&block[1], argR, sizeof(V128));
   1823    ULong res, flags;
   1824    __asm__ __volatile__(
   1825       "subq      $1024,  %%rsp"             "\n\t"
   1826       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1827       "movdqu    16(%2), %%xmm11"           "\n\t"
   1828       "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
   1829       "pushfq"                              "\n\t"
   1830       "popq      %%rdx"                     "\n\t"
   1831       "movq      %%rcx,  %0"                "\n\t"
   1832       "movq      %%rdx,  %1"                "\n\t"
   1833       "addq      $1024,  %%rsp"             "\n\t"
   1834       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1835       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1836    );
   1837    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1838 }
   1839 
   1840 UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
   1841 {
   1842    V128 resV;
   1843    UInt resOSZACP, resECX;
   1844    Bool ok
   1845       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1846                        zmask_from_V128(argLU),
   1847                        zmask_from_V128(argRU),
   1848                        0x34, False/*!isSTRM*/
   1849         );
   1850    assert(ok);
   1851    resECX = resV.uInt[0];
   1852    return (resOSZACP << 16) | resECX;
   1853 }
   1854 
   1855 void istri_34 ( void )
   1856 {
   1857    char* wot = "34";
   1858    UInt(*h)(V128*,V128*) = h_pcmpistri_34;
   1859    UInt(*s)(V128*,V128*) = s_pcmpistri_34;
   1860 
   1861    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
   1862    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
   1863    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
   1864    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
   1865 
   1866    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1867    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
   1868    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
   1869    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
   1870    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
   1871 
   1872    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1873 
   1874    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1875    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
   1876    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
   1877 
   1878    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
   1879    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
   1880    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
   1881 
   1882    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   1883    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
   1884 
   1885    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   1886    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
   1887 
   1888    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   1889    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   1890    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   1891    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
   1892 }
   1893 
   1894 
   1895 //////////////////////////////////////////////////////////
   1896 //                                                      //
   1897 //                       ISTRI_14                       //
   1898 //                                                      //
   1899 //////////////////////////////////////////////////////////
   1900 
   1901 UInt h_pcmpistri_14 ( V128* argL, V128* argR )
   1902 {
   1903    V128 block[2];
   1904    memcpy(&block[0], argL, sizeof(V128));
   1905    memcpy(&block[1], argR, sizeof(V128));
   1906    ULong res, flags;
   1907    __asm__ __volatile__(
   1908       "subq      $1024,  %%rsp"             "\n\t"
   1909       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1910       "movdqu    16(%2), %%xmm11"           "\n\t"
   1911       "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
   1912       "pushfq"                              "\n\t"
   1913       "popq      %%rdx"                     "\n\t"
   1914       "movq      %%rcx,  %0"                "\n\t"
   1915       "movq      %%rdx,  %1"                "\n\t"
   1916       "addq      $1024,  %%rsp"             "\n\t"
   1917       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   1918       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   1919    );
   1920    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   1921 }
   1922 
   1923 UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
   1924 {
   1925    V128 resV;
   1926    UInt resOSZACP, resECX;
   1927    Bool ok
   1928       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   1929                        zmask_from_V128(argLU),
   1930                        zmask_from_V128(argRU),
   1931                        0x14, False/*!isSTRM*/
   1932         );
   1933    assert(ok);
   1934    resECX = resV.uInt[0];
   1935    return (resOSZACP << 16) | resECX;
   1936 }
   1937 
   1938 void istri_14 ( void )
   1939 {
   1940    char* wot = "14";
   1941    UInt(*h)(V128*,V128*) = h_pcmpistri_14;
   1942    UInt(*s)(V128*,V128*) = s_pcmpistri_14;
   1943 
   1944    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
   1945    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
   1946    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
   1947    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
   1948 
   1949    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1950    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
   1951    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
   1952    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
   1953    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
   1954 
   1955    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   1956 
   1957    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
   1958    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
   1959    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
   1960 
   1961    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
   1962    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
   1963    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
   1964 
   1965    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
   1966    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
   1967 
   1968    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
   1969    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
   1970 
   1971    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
   1972    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
   1973    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
   1974    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
   1975 }
   1976 
   1977 
   1978 //////////////////////////////////////////////////////////
   1979 //                                                      //
   1980 //                       ISTRI_70                       //
   1981 //                                                      //
   1982 //////////////////////////////////////////////////////////
   1983 
   1984 UInt h_pcmpistri_70 ( V128* argL, V128* argR )
   1985 {
   1986    V128 block[2];
   1987    memcpy(&block[0], argL, sizeof(V128));
   1988    memcpy(&block[1], argR, sizeof(V128));
   1989    ULong res, flags;
   1990    __asm__ __volatile__(
   1991       "subq      $1024,  %%rsp"             "\n\t"
   1992       "movdqu    0(%2),  %%xmm2"            "\n\t"
   1993       "movdqu    16(%2), %%xmm11"           "\n\t"
   1994       "pcmpistri $0x70,  %%xmm2, %%xmm11"   "\n\t"
   1995       "pushfq"                              "\n\t"
   1996       "popq      %%rdx"                     "\n\t"
   1997       "movq      %%rcx,  %0"                "\n\t"
   1998       "movq      %%rdx,  %1"                "\n\t"
   1999       "addq      $1024,  %%rsp"             "\n\t"
   2000       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   2001       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   2002    );
   2003    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   2004 }
   2005 
   2006 UInt s_pcmpistri_70 ( V128* argLU, V128* argRU )
   2007 {
   2008    V128 resV;
   2009    UInt resOSZACP, resECX;
   2010    Bool ok
   2011       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   2012                        zmask_from_V128(argLU),
   2013                        zmask_from_V128(argRU),
   2014                        0x70, False/*!isSTRM*/
   2015         );
   2016    assert(ok);
   2017    resECX = resV.uInt[0];
   2018    return (resOSZACP << 16) | resECX;
   2019 }
   2020 
   2021 void istri_70 ( void )
   2022 {
   2023    char* wot = "70";
   2024    UInt(*h)(V128*,V128*) = h_pcmpistri_70;
   2025    UInt(*s)(V128*,V128*) = s_pcmpistri_70;
   2026 
   2027    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   2028    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   2029    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   2030    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   2031 
   2032    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2033    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   2034    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   2035    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   2036    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   2037 
   2038    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2039    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   2040    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   2041    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   2042 
   2043    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   2044    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   2045 
   2046    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   2047    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   2048    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   2049    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   2050 
   2051    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   2052 
   2053    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   2054    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   2055 }
   2056 
   2057 
   2058 //////////////////////////////////////////////////////////
   2059 //                                                      //
   2060 //                       ISTRI_62                       //
   2061 //                                                      //
   2062 //////////////////////////////////////////////////////////
   2063 
   2064 UInt h_pcmpistri_62 ( V128* argL, V128* argR )
   2065 {
   2066    V128 block[2];
   2067    memcpy(&block[0], argL, sizeof(V128));
   2068    memcpy(&block[1], argR, sizeof(V128));
   2069    ULong res, flags;
   2070    __asm__ __volatile__(
   2071       "subq      $1024,  %%rsp"             "\n\t"
   2072       "movdqu    0(%2),  %%xmm2"            "\n\t"
   2073       "movdqu    16(%2), %%xmm11"           "\n\t"
   2074       "pcmpistri $0x62,  %%xmm2, %%xmm11"   "\n\t"
   2075       "pushfq"                              "\n\t"
   2076       "popq      %%rdx"                     "\n\t"
   2077       "movq      %%rcx,  %0"                "\n\t"
   2078       "movq      %%rdx,  %1"                "\n\t"
   2079       "addq      $1024,  %%rsp"             "\n\t"
   2080       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   2081       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   2082    );
   2083    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   2084 }
   2085 
   2086 UInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
   2087 {
   2088    V128 resV;
   2089    UInt resOSZACP, resECX;
   2090    Bool ok
   2091       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   2092                        zmask_from_V128(argLU),
   2093                        zmask_from_V128(argRU),
   2094                        0x62, False/*!isSTRM*/
   2095         );
   2096    assert(ok);
   2097    resECX = resV.uInt[0];
   2098    return (resOSZACP << 16) | resECX;
   2099 }
   2100 
   2101 void istri_62 ( void )
   2102 {
   2103    char* wot = "62";
   2104    UInt(*h)(V128*,V128*) = h_pcmpistri_62;
   2105    UInt(*s)(V128*,V128*) = s_pcmpistri_62;
   2106 
   2107    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   2108    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   2109    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   2110    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   2111 
   2112    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2113    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   2114    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   2115    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   2116    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   2117 
   2118    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2119    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   2120    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   2121    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   2122 
   2123    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   2124    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   2125 
   2126    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   2127    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   2128    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   2129    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   2130 
   2131    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   2132 
   2133    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   2134    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   2135 }
   2136 
   2137 
   2138 //////////////////////////////////////////////////////////
   2139 //                                                      //
   2140 //                       ISTRI_72                       //
   2141 //                                                      //
   2142 //////////////////////////////////////////////////////////
   2143 
   2144 UInt h_pcmpistri_72 ( V128* argL, V128* argR )
   2145 {
   2146    V128 block[2];
   2147    memcpy(&block[0], argL, sizeof(V128));
   2148    memcpy(&block[1], argR, sizeof(V128));
   2149    ULong res, flags;
   2150    __asm__ __volatile__(
   2151       "subq      $1024,  %%rsp"             "\n\t"
   2152       "movdqu    0(%2),  %%xmm2"            "\n\t"
   2153       "movdqu    16(%2), %%xmm11"           "\n\t"
   2154       "pcmpistri $0x72,  %%xmm2, %%xmm11"   "\n\t"
   2155       "pushfq"                              "\n\t"
   2156       "popq      %%rdx"                     "\n\t"
   2157       "movq      %%rcx,  %0"                "\n\t"
   2158       "movq      %%rdx,  %1"                "\n\t"
   2159       "addq      $1024,  %%rsp"             "\n\t"
   2160       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   2161       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   2162    );
   2163    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   2164 }
   2165 
   2166 UInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
   2167 {
   2168    V128 resV;
   2169    UInt resOSZACP, resECX;
   2170    Bool ok
   2171       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   2172                        zmask_from_V128(argLU),
   2173                        zmask_from_V128(argRU),
   2174                        0x72, False/*!isSTRM*/
   2175         );
   2176    assert(ok);
   2177    resECX = resV.uInt[0];
   2178    return (resOSZACP << 16) | resECX;
   2179 }
   2180 
   2181 void istri_72 ( void )
   2182 {
   2183    char* wot = "72";
   2184    UInt(*h)(V128*,V128*) = h_pcmpistri_72;
   2185    UInt(*s)(V128*,V128*) = s_pcmpistri_72;
   2186 
   2187    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   2188    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   2189    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   2190    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   2191 
   2192    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2193    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   2194    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   2195    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   2196    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   2197 
   2198    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2199    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   2200    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   2201    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   2202 
   2203    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   2204    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   2205 
   2206    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   2207    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   2208    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   2209    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   2210 
   2211    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   2212 
   2213    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   2214    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   2215 }
   2216 
   2217 
   2218 //////////////////////////////////////////////////////////
   2219 //                                                      //
   2220 //                       ISTRI_10                       //
   2221 //                                                      //
   2222 //////////////////////////////////////////////////////////
   2223 
   2224 UInt h_pcmpistri_10 ( V128* argL, V128* argR )
   2225 {
   2226    V128 block[2];
   2227    memcpy(&block[0], argL, sizeof(V128));
   2228    memcpy(&block[1], argR, sizeof(V128));
   2229    ULong res, flags;
   2230    __asm__ __volatile__(
   2231       "subq      $1024,  %%rsp"             "\n\t"
   2232       "movdqu    0(%2),  %%xmm2"            "\n\t"
   2233       "movdqu    16(%2), %%xmm11"           "\n\t"
   2234       "pcmpistri $0x10,  %%xmm2, %%xmm11"   "\n\t"
   2235 //"pcmpistrm $0x10, %%xmm2, %%xmm11"   "\n\t"
   2236 //"movd %%xmm0, %%ecx" "\n\t"
   2237       "pushfq"                              "\n\t"
   2238       "popq      %%rdx"                     "\n\t"
   2239       "movq      %%rcx,  %0"                "\n\t"
   2240       "movq      %%rdx,  %1"                "\n\t"
   2241       "addq      $1024,  %%rsp"             "\n\t"
   2242       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
   2243       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
   2244    );
   2245    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
   2246 }
   2247 
   2248 UInt s_pcmpistri_10 ( V128* argLU, V128* argRU )
   2249 {
   2250    V128 resV;
   2251    UInt resOSZACP, resECX;
   2252    Bool ok
   2253       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
   2254                        zmask_from_V128(argLU),
   2255                        zmask_from_V128(argRU),
   2256                        0x10, False/*!isSTRM*/
   2257         );
   2258    assert(ok);
   2259    resECX = resV.uInt[0];
   2260    return (resOSZACP << 16) | resECX;
   2261 }
   2262 
   2263 void istri_10 ( void )
   2264 {
   2265    char* wot = "10";
   2266    UInt(*h)(V128*,V128*) = h_pcmpistri_10;
   2267    UInt(*s)(V128*,V128*) = s_pcmpistri_10;
   2268 
   2269    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
   2270    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
   2271    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
   2272    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
   2273 
   2274    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2275    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
   2276    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
   2277    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
   2278    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
   2279 
   2280    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
   2281    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
   2282    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
   2283    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
   2284 
   2285    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
   2286    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
   2287 
   2288    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
   2289    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
   2290    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
   2291    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
   2292 
   2293    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
   2294 
   2295    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
   2296    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
   2297 }
   2298 
   2299 
   2300 //////////////////////////////////////////////////////////
   2301 //                                                      //
   2302 //                         main                         //
   2303 //                                                      //
   2304 //////////////////////////////////////////////////////////
   2305 
   2306 int main ( void )
   2307 {
   2308    istri_4A();
   2309    istri_3A();
   2310    istri_08();
   2311    istri_18();
   2312    istri_1A();
   2313    istri_02();
   2314    istri_0C();
   2315    istri_12();
   2316    istri_44();
   2317    istri_00();
   2318    istri_38();
   2319    istri_46();
   2320    istri_30();
   2321    istri_40();
   2322    istri_42();
   2323    istri_0E();
   2324    istri_14();
   2325    istri_34();
   2326    istri_70();
   2327    istri_62();
   2328    istri_72();
   2329    istri_10();
   2330    return 0;
   2331 }
   2332