Home | History | Annotate | Download | only in amd64
      1 
      2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
      3    check the core arithmetic in any detail.  */
      4 
      5 #include <string.h>
      6 #include <stdio.h>
      7 #include <assert.h>
      8 
      9 typedef  unsigned char  V128[16];
     10 typedef  unsigned int   UInt;
     11 typedef  signed int     Int;
     12 typedef  unsigned char  UChar;
     13 typedef  unsigned long long int ULong;
     14 typedef  UChar          Bool;
     15 #define False ((Bool)0)
     16 #define True  ((Bool)1)
     17 
     18 void show_V128 ( V128* vec )
     19 {
     20    Int i;
     21    for (i = 15; i >= 0; i--)
     22       printf("%02x", (UInt)( (*vec)[i] ));
     23 }
     24 
     25 void expand ( V128* dst, char* summary )
     26 {
     27    Int i;
     28    assert( strlen(summary) == 16 );
     29    for (i = 0; i < 16; i++) {
     30       UChar xx = 0;
     31       UChar x = summary[15-i];
     32       if      (x >= '0' && x <= '9') { xx = x - '0'; }
     33       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
     34       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
     35       else assert(0);
     36 
     37       assert(xx < 16);
     38       xx = (xx << 4) | xx;
     39       assert(xx < 256);
     40       (*dst)[i] = xx;
     41    }
     42 }
     43 
     44 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
     45 {
     46    V128 argL, argR;
     47    expand( &argL, summL );
     48    expand( &argR, summR );
     49    printf("\n");
     50    printf("rdx %016llx  argL ", rdxIN);
     51    show_V128(&argL);
     52    printf("  rax %016llx  argR ", raxIN);
     53    show_V128(&argR);
     54    printf("\n");
     55 
     56    ULong block[ 2/*in:argL*/          // 0  0
     57                 + 2/*in:argR*/        // 2  16
     58                 + 1/*in:rdx*/         // 4  32
     59                 + 1/*in:rax*/         // 5  40
     60                 + 2/*inout:xmm0*/     // 6  48
     61                 + 1/*inout:rcx*/      // 8  64
     62                 + 1/*out:rflags*/ ];  // 9  72
     63    assert(sizeof(block) == 80);
     64 
     65    UChar* blockC = (UChar*)&block[0];
     66 
     67    /* ---------------- ISTRI_4A ---------------- */
     68    memset(blockC, 0x55, 80);
     69    memcpy(blockC + 0,  &argL,  16);
     70    memcpy(blockC + 16, &argR,  16);
     71    memcpy(blockC + 24, &rdxIN, 8);
     72    memcpy(blockC + 32, &raxIN, 8);
     73    memcpy(blockC + 40, &rdxIN, 8);
     74    __asm__ __volatile__(
     75       "movupd    0(%0), %%xmm2"           "\n\t"
     76       "movupd    16(%0), %%xmm13"         "\n\t"
     77       "movq      32(%0), %%rdx"           "\n\t"
     78       "movq      40(%0), %%rax"           "\n\t"
     79       "movupd    48(%0), %%xmm0"          "\n\t"
     80       "movw      64(%0), %%cx"            "\n\t"
     81       "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
     82       "movupd    %%xmm0, 48(%0)"          "\n\t"
     83       "movw      %%cx, 64(%0)"            "\n\t"
     84       "pushfq"                            "\n\t"
     85       "popq      %%r15"                   "\n\t"
     86       "movq      %%r15, 72(%0)"           "\n\t"
     87       : /*out*/
     88       : /*in*/"r"(blockC)
     89       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
     90    );
     91    printf("  istri $0x4A:  ");
     92    printf("    xmm0 ");
     93    show_V128( (V128*)(blockC+48) );
     94    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
     95 
     96    /* ---------------- ISTRI_0A ---------------- */
     97    memset(blockC, 0x55, 80);
     98    memcpy(blockC + 0,  &argL,  16);
     99    memcpy(blockC + 16, &argR,  16);
    100    memcpy(blockC + 24, &rdxIN, 8);
    101    memcpy(blockC + 32, &raxIN, 8);
    102    memcpy(blockC + 40, &rdxIN, 8);
    103    __asm__ __volatile__(
    104       "movupd    0(%0), %%xmm2"           "\n\t"
    105       "movupd    16(%0), %%xmm13"         "\n\t"
    106       "movq      32(%0), %%rdx"           "\n\t"
    107       "movq      40(%0), %%rax"           "\n\t"
    108       "movupd    48(%0), %%xmm0"          "\n\t"
    109       "movw      64(%0), %%cx"            "\n\t"
    110       "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
    111       "movupd    %%xmm0, 48(%0)"          "\n\t"
    112       "movw      %%cx, 64(%0)"            "\n\t"
    113       "pushfq"                            "\n\t"
    114       "popq      %%r15"                   "\n\t"
    115       "movq      %%r15, 72(%0)"           "\n\t"
    116       : /*out*/
    117       : /*in*/"r"(blockC)
    118       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    119    );
    120    printf("  istri $0x0A:  ");
    121    printf("    xmm0 ");
    122    show_V128( (V128*)(blockC+48) );
    123    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    124 
    125    /* ---------------- ISTRM_4A ---------------- */
    126    memset(blockC, 0x55, 80);
    127    memcpy(blockC + 0,  &argL,  16);
    128    memcpy(blockC + 16, &argR,  16);
    129    memcpy(blockC + 24, &rdxIN, 8);
    130    memcpy(blockC + 32, &raxIN, 8);
    131    memcpy(blockC + 40, &rdxIN, 8);
    132    __asm__ __volatile__(
    133       "movupd    0(%0), %%xmm2"           "\n\t"
    134       "movupd    16(%0), %%xmm13"         "\n\t"
    135       "movq      32(%0), %%rdx"           "\n\t"
    136       "movq      40(%0), %%rax"           "\n\t"
    137       "movupd    48(%0), %%xmm0"          "\n\t"
    138       "movw      64(%0), %%cx"            "\n\t"
    139       "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
    140       "movupd    %%xmm0, 48(%0)"          "\n\t"
    141       "movw      %%cx, 64(%0)"            "\n\t"
    142       "pushfq"                            "\n\t"
    143       "popq      %%r15"                   "\n\t"
    144       "movq      %%r15, 72(%0)"           "\n\t"
    145       : /*out*/
    146       : /*in*/"r"(blockC)
    147       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    148    );
    149    printf("  istrm $0x4A:  ");
    150    printf("    xmm0 ");
    151    show_V128( (V128*)(blockC+48) );
    152    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    153 
    154    /* ---------------- ISTRM_0A ---------------- */
    155    memset(blockC, 0x55, 80);
    156    memcpy(blockC + 0,  &argL,  16);
    157    memcpy(blockC + 16, &argR,  16);
    158    memcpy(blockC + 24, &rdxIN, 8);
    159    memcpy(blockC + 32, &raxIN, 8);
    160    memcpy(blockC + 40, &rdxIN, 8);
    161    __asm__ __volatile__(
    162       "movupd    0(%0), %%xmm2"           "\n\t"
    163       "movupd    16(%0), %%xmm13"         "\n\t"
    164       "movq      32(%0), %%rdx"           "\n\t"
    165       "movq      40(%0), %%rax"           "\n\t"
    166       "movupd    48(%0), %%xmm0"          "\n\t"
    167       "movw      64(%0), %%cx"            "\n\t"
    168       "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
    169       "movupd    %%xmm0, 48(%0)"          "\n\t"
    170       "movw      %%cx, 64(%0)"            "\n\t"
    171       "pushfq"                            "\n\t"
    172       "popq      %%r15"                   "\n\t"
    173       "movq      %%r15, 72(%0)"           "\n\t"
    174       : /*out*/
    175       : /*in*/"r"(blockC)
    176       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    177    );
    178    printf("  istrm $0x0A:  ");
    179    printf("    xmm0 ");
    180    show_V128( (V128*)(blockC+48) );
    181    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    182 
    183    /* ---------------- ESTRI_4A ---------------- */
    184    memset(blockC, 0x55, 80);
    185    memcpy(blockC + 0,  &argL,  16);
    186    memcpy(blockC + 16, &argR,  16);
    187    memcpy(blockC + 24, &rdxIN, 8);
    188    memcpy(blockC + 32, &raxIN, 8);
    189    memcpy(blockC + 40, &rdxIN, 8);
    190    __asm__ __volatile__(
    191       "movupd    0(%0), %%xmm2"           "\n\t"
    192       "movupd    16(%0), %%xmm13"         "\n\t"
    193       "movq      32(%0), %%rdx"           "\n\t"
    194       "movq      40(%0), %%rax"           "\n\t"
    195       "movupd    48(%0), %%xmm0"          "\n\t"
    196       "movw      64(%0), %%cx"            "\n\t"
    197       "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
    198       "movupd    %%xmm0, 48(%0)"          "\n\t"
    199       "movw      %%cx, 64(%0)"            "\n\t"
    200       "pushfq"                            "\n\t"
    201       "popq      %%r15"                   "\n\t"
    202       "movq      %%r15, 72(%0)"           "\n\t"
    203       : /*out*/
    204       : /*in*/"r"(blockC)
    205       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    206    );
    207    printf("  estri $0x4A:  ");
    208    printf("    xmm0 ");
    209    show_V128( (V128*)(blockC+48) );
    210    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    211 
    212    /* ---------------- ESTRI_0A ---------------- */
    213    memset(blockC, 0x55, 80);
    214    memcpy(blockC + 0,  &argL,  16);
    215    memcpy(blockC + 16, &argR,  16);
    216    memcpy(blockC + 24, &rdxIN, 8);
    217    memcpy(blockC + 32, &raxIN, 8);
    218    memcpy(blockC + 40, &rdxIN, 8);
    219    __asm__ __volatile__(
    220       "movupd    0(%0), %%xmm2"           "\n\t"
    221       "movupd    16(%0), %%xmm13"         "\n\t"
    222       "movq      32(%0), %%rdx"           "\n\t"
    223       "movq      40(%0), %%rax"           "\n\t"
    224       "movupd    48(%0), %%xmm0"          "\n\t"
    225       "movw      64(%0), %%cx"            "\n\t"
    226       "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
    227       "movupd    %%xmm0, 48(%0)"          "\n\t"
    228       "movw      %%cx, 64(%0)"            "\n\t"
    229       "pushfq"                            "\n\t"
    230       "popq      %%r15"                   "\n\t"
    231       "movq      %%r15, 72(%0)"           "\n\t"
    232       : /*out*/
    233       : /*in*/"r"(blockC)
    234       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    235    );
    236    printf("  estri $0x0A:  ");
    237    printf("    xmm0 ");
    238    show_V128( (V128*)(blockC+48) );
    239    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    240 
    241    /* ---------------- ESTRM_4A ---------------- */
    242    memset(blockC, 0x55, 80);
    243    memcpy(blockC + 0,  &argL,  16);
    244    memcpy(blockC + 16, &argR,  16);
    245    memcpy(blockC + 24, &rdxIN, 8);
    246    memcpy(blockC + 32, &raxIN, 8);
    247    memcpy(blockC + 40, &rdxIN, 8);
    248    __asm__ __volatile__(
    249       "movupd    0(%0), %%xmm2"           "\n\t"
    250       "movupd    16(%0), %%xmm13"         "\n\t"
    251       "movq      32(%0), %%rdx"           "\n\t"
    252       "movq      40(%0), %%rax"           "\n\t"
    253       "movupd    48(%0), %%xmm0"          "\n\t"
    254       "movw      64(%0), %%cx"            "\n\t"
    255       "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
    256       "movupd    %%xmm0, 48(%0)"          "\n\t"
    257       "movw      %%cx, 64(%0)"            "\n\t"
    258       "pushfq"                            "\n\t"
    259       "popq      %%r15"                   "\n\t"
    260       "movq      %%r15, 72(%0)"           "\n\t"
    261       : /*out*/
    262       : /*in*/"r"(blockC)
    263       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    264    );
    265    printf("  estrm $0x4A:  ");
    266    printf("    xmm0 ");
    267    show_V128( (V128*)(blockC+48) );
    268    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    269 
    270    /* ---------------- ESTRM_0A ---------------- */
    271    memset(blockC, 0x55, 80);
    272    memcpy(blockC + 0,  &argL,  16);
    273    memcpy(blockC + 16, &argR,  16);
    274    memcpy(blockC + 24, &rdxIN, 8);
    275    memcpy(blockC + 32, &raxIN, 8);
    276    memcpy(blockC + 40, &rdxIN, 8);
    277    __asm__ __volatile__(
    278       "movupd    0(%0), %%xmm2"           "\n\t"
    279       "movupd    16(%0), %%xmm13"         "\n\t"
    280       "movq      32(%0), %%rdx"           "\n\t"
    281       "movq      40(%0), %%rax"           "\n\t"
    282       "movupd    48(%0), %%xmm0"          "\n\t"
    283       "movw      64(%0), %%cx"            "\n\t"
    284       "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
    285       "movupd    %%xmm0, 48(%0)"          "\n\t"
    286       "movw      %%cx, 64(%0)"            "\n\t"
    287       "pushfq"                            "\n\t"
    288       "popq      %%r15"                   "\n\t"
    289       "movq      %%r15, 72(%0)"           "\n\t"
    290       : /*out*/
    291       : /*in*/"r"(blockC)
    292       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
    293    );
    294    printf("  estrm $0x0A:  ");
    295    printf("    xmm0 ");
    296    show_V128( (V128*)(blockC+48) );
    297    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
    298 
    299 
    300 
    301 
    302 }
    303 
    304 int main ( void )
    305 {
    306    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
    307    one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
    308 
    309    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
    310    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
    311    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
    312 
    313    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
    314    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
    315    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
    316    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
    317 
    318    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
    319    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
    320    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
    321    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
    322 
    323    one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
    324    one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
    325    one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
    326    one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
    327 
    328    one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
    329    one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
    330    one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
    331    one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
    332 
    333    return 0;
    334 }
    335