Home | History | Annotate | Download | only in amd64
      1 
      2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
      3    check the core arithmetic in any detail. This file checks the 16-bit
      4    character versions (w is for wide) */
      5 
      6 #include <string.h>
      7 #include <stdio.h>
      8 #include <assert.h>
      9 
     10 typedef  unsigned char  V128[16];
     11 typedef  unsigned int   UInt;
     12 typedef  signed int     Int;
     13 typedef  unsigned char  UChar;
     14 typedef  unsigned long long int ULong;
     15 typedef  UChar          Bool;
     16 #define False ((Bool)0)
     17 #define True  ((Bool)1)
     18 
     19 void show_V128 ( V128* vec )
     20 {
     21    Int i;
     22    for (i = 15; i >= 0; i--)
     23       printf("%02x", (UInt)( (*vec)[i] ));
     24 }
     25 
     26 void expand ( V128* dst, char* summary )
     27 {
     28    Int i;
     29    assert( strlen(summary) == 16 );
     30    for (i = 0; i < 16; i++) {
     31       UChar xx = 0;
     32       UChar x = summary[15-i];
     33       if      (x >= '0' && x <= '9') { xx = x - '0'; }
     34       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
     35       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
     36       else assert(0);
     37 
     38       assert(xx < 16);
     39       xx = (xx << 4) | xx;
     40       assert(xx < 256);
     41       (*dst)[i] = xx;
     42    }
     43 }
     44 
     45 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
     46 {
     47    V128 argL, argR;
     48    expand( &argL, summL );
     49    expand( &argR, summR );
     50    printf("\n");
     51    printf("rdx %016llx  argL ", rdxIN);
     52    show_V128(&argL);
     53    printf("  rax %016llx  argR ", raxIN);
     54    show_V128(&argR);
     55    printf("\n");
     56 
     57    ULong block[ 2/*in:argL*/          // 0  0
     58                 + 2/*in:argR*/        // 2  16
     59                 + 1/*in:rdx*/         // 4  32
     60                 + 1/*in:rax*/         // 5  40
     61                 + 2/*inout:xmm0*/     // 6  48
     62                 + 1/*inout:rcx*/      // 8  64
     63                 + 1/*out:rflags*/ ];  // 9  72
     64    assert(sizeof(block) == 80);
     65 
     66    UChar* blockC = (UChar*)&block[0];
     67 
     68    /* ---------------- ISTRI_4B ---------------- */
     69    memset(blockC, 0x55, 80);
     70    memcpy(blockC + 0,  &argL,  16);
     71    memcpy(blockC + 16, &argR,  16);
     72    memcpy(blockC + 24, &rdxIN, 8);
     73    memcpy(blockC + 32, &raxIN, 8);
     74    memcpy(blockC + 40, &rdxIN, 8);
     75    __asm__ __volatile__(
     76       "movupd    0(%0), %%xmm2"           "\n\t"
     77       "movupd    16(%0), %%xmm13"         "\n\t"
     78       "movq      32(%0), %%rdx"           "\n\t"
     79       "movq      40(%0), %%rax"           "\n\t"
     80       "movupd    48(%0), %%xmm0"          "\n\t"
     81       "movw      64(%0), %%cx"            "\n\t"
     82       "pcmpistri $0x4B, %%xmm2, %%xmm13"  "\n\t"
     83       "movupd    %%xmm0, 48(%0)"          "\n\t"
     84       "movw      %%cx, 64(%0)"            "\n\t"
     85       "pushfq"                            "\n\t"
     86       "popq      %%r15"                   "\n\t"
     87       "movq      %%r15, 72(%0)"           "\n\t"
     88       : /*out*/
     89       : /*in*/"r"(blockC)
     90       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
     91    );
     92    printf("  istri $0x4B:  ");
     93    printf("    xmm0 ");
     94    show_V128( (V128*)(blockC+48) );
     95    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
     96 
     97    /* ---------------- ISTRI_0B ---------------- */
     98    memset(blockC, 0x55, 80);
     99    memcpy(blockC + 0,  &argL,  16);
    100    memcpy(blockC + 16, &argR,  16);
    101    memcpy(blockC + 24, &rdxIN, 8);
    102    memcpy(blockC + 32, &raxIN, 8);
    103    memcpy(blockC + 40, &rdxIN, 8);
    104    __asm__ __volatile__(
    105       "movupd    0(%0), %%xmm2"           "\n\t"
    106       "movupd    16(%0), %%xmm13"         "\n\t"
    107       "movq      32(%0), %%rdx"           "\n\t"
    108       "movq      40(%0), %%rax"           "\n\t"
    109       "movupd    48(%0), %%xmm0"          "\n\t"
    110       "movw      64(%0), %%cx"            "\n\t"
    111       "pcmpistri $0x0B, %%xmm2, %%xmm13"  "\n\t"
    112       "movupd    %%xmm0, 48(%0)"          "\n\t"
    113       "movw      %%cx, 64(%0)"            "\n\t"
    114       "pushfq"                            "\n\t"
    115       "popq      %%r15"                   "\n\t"
    116       "movq      %%r15, 72(%0)"           "\n\t"
    117       : /*out*/
    118       : /*in*/"r"(blockC)
    119       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    120    );
    121    printf("  istri $0x0B:  ");
    122    printf("    xmm0 ");
    123    show_V128( (V128*)(blockC+48) );
    124    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    125 
    126    /* ---------------- ISTRM_4B ---------------- */
    127    memset(blockC, 0x55, 80);
    128    memcpy(blockC + 0,  &argL,  16);
    129    memcpy(blockC + 16, &argR,  16);
    130    memcpy(blockC + 24, &rdxIN, 8);
    131    memcpy(blockC + 32, &raxIN, 8);
    132    memcpy(blockC + 40, &rdxIN, 8);
    133    __asm__ __volatile__(
    134       "movupd    0(%0), %%xmm2"           "\n\t"
    135       "movupd    16(%0), %%xmm13"         "\n\t"
    136       "movq      32(%0), %%rdx"           "\n\t"
    137       "movq      40(%0), %%rax"           "\n\t"
    138       "movupd    48(%0), %%xmm0"          "\n\t"
    139       "movw      64(%0), %%cx"            "\n\t"
    140       "pcmpistrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
    141       "movupd    %%xmm0, 48(%0)"          "\n\t"
    142       "movw      %%cx, 64(%0)"            "\n\t"
    143       "pushfq"                            "\n\t"
    144       "popq      %%r15"                   "\n\t"
    145       "movq      %%r15, 72(%0)"           "\n\t"
    146       : /*out*/
    147       : /*in*/"r"(blockC)
    148       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    149    );
    150    printf("  istrm $0x4B:  ");
    151    printf("    xmm0 ");
    152    show_V128( (V128*)(blockC+48) );
    153    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    154 
    155    /* ---------------- ISTRM_0B ---------------- */
    156    memset(blockC, 0x55, 80);
    157    memcpy(blockC + 0,  &argL,  16);
    158    memcpy(blockC + 16, &argR,  16);
    159    memcpy(blockC + 24, &rdxIN, 8);
    160    memcpy(blockC + 32, &raxIN, 8);
    161    memcpy(blockC + 40, &rdxIN, 8);
    162    __asm__ __volatile__(
    163       "movupd    0(%0), %%xmm2"           "\n\t"
    164       "movupd    16(%0), %%xmm13"         "\n\t"
    165       "movq      32(%0), %%rdx"           "\n\t"
    166       "movq      40(%0), %%rax"           "\n\t"
    167       "movupd    48(%0), %%xmm0"          "\n\t"
    168       "movw      64(%0), %%cx"            "\n\t"
    169       "pcmpistrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
    170       "movupd    %%xmm0, 48(%0)"          "\n\t"
    171       "movw      %%cx, 64(%0)"            "\n\t"
    172       "pushfq"                            "\n\t"
    173       "popq      %%r15"                   "\n\t"
    174       "movq      %%r15, 72(%0)"           "\n\t"
    175       : /*out*/
    176       : /*in*/"r"(blockC)
    177       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    178    );
    179    printf("  istrm $0x0B:  ");
    180    printf("    xmm0 ");
    181    show_V128( (V128*)(blockC+48) );
    182    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    183 
    184    /* ---------------- ESTRI_4B ---------------- */
    185    memset(blockC, 0x55, 80);
    186    memcpy(blockC + 0,  &argL,  16);
    187    memcpy(blockC + 16, &argR,  16);
    188    memcpy(blockC + 24, &rdxIN, 8);
    189    memcpy(blockC + 32, &raxIN, 8);
    190    memcpy(blockC + 40, &rdxIN, 8);
    191    __asm__ __volatile__(
    192       "movupd    0(%0), %%xmm2"           "\n\t"
    193       "movupd    16(%0), %%xmm13"         "\n\t"
    194       "movq      32(%0), %%rdx"           "\n\t"
    195       "movq      40(%0), %%rax"           "\n\t"
    196       "movupd    48(%0), %%xmm0"          "\n\t"
    197       "movw      64(%0), %%cx"            "\n\t"
    198       "pcmpestri $0x4B, %%xmm2, %%xmm13"  "\n\t"
    199       "movupd    %%xmm0, 48(%0)"          "\n\t"
    200       "movw      %%cx, 64(%0)"            "\n\t"
    201       "pushfq"                            "\n\t"
    202       "popq      %%r15"                   "\n\t"
    203       "movq      %%r15, 72(%0)"           "\n\t"
    204       : /*out*/
    205       : /*in*/"r"(blockC)
    206       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    207    );
    208    printf("  estri $0x4B:  ");
    209    printf("    xmm0 ");
    210    show_V128( (V128*)(blockC+48) );
    211    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    212 
    213    /* ---------------- ESTRI_0B ---------------- */
    214    memset(blockC, 0x55, 80);
    215    memcpy(blockC + 0,  &argL,  16);
    216    memcpy(blockC + 16, &argR,  16);
    217    memcpy(blockC + 24, &rdxIN, 8);
    218    memcpy(blockC + 32, &raxIN, 8);
    219    memcpy(blockC + 40, &rdxIN, 8);
    220    __asm__ __volatile__(
    221       "movupd    0(%0), %%xmm2"           "\n\t"
    222       "movupd    16(%0), %%xmm13"         "\n\t"
    223       "movq      32(%0), %%rdx"           "\n\t"
    224       "movq      40(%0), %%rax"           "\n\t"
    225       "movupd    48(%0), %%xmm0"          "\n\t"
    226       "movw      64(%0), %%cx"            "\n\t"
    227       "pcmpestri $0x0B, %%xmm2, %%xmm13"  "\n\t"
    228       "movupd    %%xmm0, 48(%0)"          "\n\t"
    229       "movw      %%cx, 64(%0)"            "\n\t"
    230       "pushfq"                            "\n\t"
    231       "popq      %%r15"                   "\n\t"
    232       "movq      %%r15, 72(%0)"           "\n\t"
    233       : /*out*/
    234       : /*in*/"r"(blockC)
    235       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    236    );
    237    printf("  estri $0x0B:  ");
    238    printf("    xmm0 ");
    239    show_V128( (V128*)(blockC+48) );
    240    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    241 
    242    /* ---------------- ESTRM_4B ---------------- */
    243    memset(blockC, 0x55, 80);
    244    memcpy(blockC + 0,  &argL,  16);
    245    memcpy(blockC + 16, &argR,  16);
    246    memcpy(blockC + 24, &rdxIN, 8);
    247    memcpy(blockC + 32, &raxIN, 8);
    248    memcpy(blockC + 40, &rdxIN, 8);
    249    __asm__ __volatile__(
    250       "movupd    0(%0), %%xmm2"           "\n\t"
    251       "movupd    16(%0), %%xmm13"         "\n\t"
    252       "movq      32(%0), %%rdx"           "\n\t"
    253       "movq      40(%0), %%rax"           "\n\t"
    254       "movupd    48(%0), %%xmm0"          "\n\t"
    255       "movw      64(%0), %%cx"            "\n\t"
    256       "pcmpestrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
    257       "movupd    %%xmm0, 48(%0)"          "\n\t"
    258       "movw      %%cx, 64(%0)"            "\n\t"
    259       "pushfq"                            "\n\t"
    260       "popq      %%r15"                   "\n\t"
    261       "movq      %%r15, 72(%0)"           "\n\t"
    262       : /*out*/
    263       : /*in*/"r"(blockC)
    264       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    265    );
    266    printf("  estrm $0x4B:  ");
    267    printf("    xmm0 ");
    268    show_V128( (V128*)(blockC+48) );
    269    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    270 
    271    /* ---------------- ESTRM_0B ---------------- */
    272    memset(blockC, 0x55, 80);
    273    memcpy(blockC + 0,  &argL,  16);
    274    memcpy(blockC + 16, &argR,  16);
    275    memcpy(blockC + 24, &rdxIN, 8);
    276    memcpy(blockC + 32, &raxIN, 8);
    277    memcpy(blockC + 40, &rdxIN, 8);
    278    __asm__ __volatile__(
    279       "movupd    0(%0), %%xmm2"           "\n\t"
    280       "movupd    16(%0), %%xmm13"         "\n\t"
    281       "movq      32(%0), %%rdx"           "\n\t"
    282       "movq      40(%0), %%rax"           "\n\t"
    283       "movupd    48(%0), %%xmm0"          "\n\t"
    284       "movw      64(%0), %%cx"            "\n\t"
    285       "pcmpestrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
    286       "movupd    %%xmm0, 48(%0)"          "\n\t"
    287       "movw      %%cx, 64(%0)"            "\n\t"
    288       "pushfq"                            "\n\t"
    289       "popq      %%r15"                   "\n\t"
    290       "movq      %%r15, 72(%0)"           "\n\t"
    291       : /*out*/
    292       : /*in*/"r"(blockC)
    293       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    294    );
    295    printf("  estrm $0x0B:  ");
    296    printf("    xmm0 ");
    297    show_V128( (V128*)(blockC+48) );
    298    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    299 
    300 
    301 
    302 
    303 }
    304 
    305 int main ( void )
    306 {
    307    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
    308    one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
    309 
    310    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
    311    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
    312    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
    313 
    314    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
    315    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
    316    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
    317    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
    318 
    319    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
    320    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
    321    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
    322    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
    323 
    324    one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
    325    one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
    326    one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
    327    one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
    328 
    329    one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
    330    one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
    331    one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
    332    one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
    333 
    334    return 0;
    335 }
    336