Home | History | Annotate | Download | only in amd64
      1 
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <assert.h>
      5 #include "tests/asm.h"
      6 #include "tests/malloc.h"
      7 #include <string.h>
      8 
      9 #define XSAVE_AREA_SIZE 832
     10 
     11 typedef  unsigned char           UChar;
     12 typedef  unsigned int            UInt;
     13 typedef  unsigned long long int  ULong;
     14 
     15 typedef  unsigned long int       UWord;
     16 
     17 typedef  unsigned char  Bool;
     18 #define  True   ((Bool)1)
     19 #define  False  ((Bool)0)
     20 
     21 const unsigned int vec0[8]
     22    = { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
     23        0x15263748, 0x91929394, 0x19293949, 0x48372615 };
     24 
     25 const unsigned int vec1[8]
     26    = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
     27        0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
     28 
     29 const unsigned int vecZ[8]
     30    = { 0, 0, 0, 0, 0, 0, 0, 0 };
     31 
     32 /* A version of memset that doesn't use XMM or YMM registers. */
     33 static __attribute__((noinline))
     34 void* my_memset(void* s, int c, size_t n)
     35 {
     36    size_t i;
     37    for (i = 0; i < n; i++) {
     38       ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c;
     39       /* Defeat any attempt at autovectorisation */
     40       __asm__ __volatile__("" ::: "cc","memory");
     41    }
     42    return s;
     43 }
     44 
     45 /* Ditto for memcpy */
     46 static __attribute__((noinline))
     47 void* my_memcpy(void *dest, const void *src, size_t n)
     48 {
     49    size_t i;
     50    for (i = 0; i < n; i++) {
     51       ((unsigned char*)dest)[i] = ((unsigned char*)src)[i];
     52       __asm__ __volatile__("" ::: "cc","memory");
     53    }
     54    return dest;
     55 }
     56 
     57 static void* memalign_zeroed64(size_t size)
     58 {
     59    char* p = memalign64(size);
     60    if (p && size > 0) {
     61       my_memset(p, 0, size);
     62    }
     63    return p;
     64 }
     65 
     66 __attribute__((noinline))
     67 static void do_xsave ( void* p, UInt rfbm )
     68 {
     69    assert(rfbm <= 7);
     70    __asm__ __volatile__(
     71       "movq %0, %%rax;  xorq %%rdx, %%rdx;  xsave (%1)"
     72          : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
     73          : /*TRASH*/ "memory", "rax", "rdx"
     74    );
     75 }
     76 
     77 __attribute__((noinline))
     78 static void do_xrstor ( void* p, UInt rfbm )
     79 {
     80    assert(rfbm <= 7);
     81    __asm__ __volatile__(
     82       "movq %0, %%rax;  xorq %%rdx, %%rdx;  xrstor (%1)"
     83          : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
     84          : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
     85    );
     86 }
     87 
     88 /* set up the FP, SSE and AVX state, and then dump it. */
     89 static void do_setup_then_xsave ( void* p, UInt rfbm )
     90 {
     91    __asm__ __volatile__("finit");
     92    __asm__ __volatile__("fldpi");
     93    __asm__ __volatile__("fld1");
     94    __asm__ __volatile__("fldln2");
     95    __asm__ __volatile__("fldlg2");
     96    __asm__ __volatile__("fld %st(3)");
     97    __asm__ __volatile__("fld %st(3)");
     98    __asm__ __volatile__("fld1");
     99    __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" );
    100    __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" );
    101    __asm__ __volatile__("vxorps  %ymm2, %ymm2, %ymm2");
    102    __asm__ __volatile__("vmovaps %ymm0, %ymm3");
    103    __asm__ __volatile__("vmovaps %ymm1, %ymm4");
    104    __asm__ __volatile__("vmovaps %ymm2, %ymm5");
    105    __asm__ __volatile__("vmovaps %ymm0, %ymm6");
    106    __asm__ __volatile__("vmovaps %ymm1, %ymm7");
    107    __asm__ __volatile__("vmovaps %ymm1, %ymm8");
    108    __asm__ __volatile__("vmovaps %ymm2, %ymm9");
    109    __asm__ __volatile__("vmovaps %ymm0, %ymm10");
    110    __asm__ __volatile__("vmovaps %ymm1, %ymm11");
    111    __asm__ __volatile__("vmovaps %ymm1, %ymm12");
    112    __asm__ __volatile__("vmovaps %ymm2, %ymm13");
    113    __asm__ __volatile__("vmovaps %ymm0, %ymm14");
    114    __asm__ __volatile__("vmovaps %ymm1, %ymm15");
    115    do_xsave(p, rfbm);
    116 }
    117 
    118 static int isFPLsbs ( int i )
    119 {
    120    int q;
    121    q = 32; if (i == q || i == q+1) return 1;
    122    q = 48; if (i == q || i == q+1) return 1;
    123    q = 64; if (i == q || i == q+1) return 1;
    124    q = 80; if (i == q || i == q+1) return 1;
    125    q = 96; if (i == q || i == q+1) return 1;
    126    q = 112; if (i == q || i == q+1) return 1;
    127    q = 128; if (i == q || i == q+1) return 1;
    128    q = 144; if (i == q || i == q+1) return 1;
    129    return 0;
    130 }
    131 
    132 static void show ( unsigned char* buf, Bool hideBits64to79 )
    133 {
    134    int i;
    135    for (i = 0; i < XSAVE_AREA_SIZE; i++) {
    136       if ((i % 16) == 0)
    137          fprintf(stderr, "%3d   ", i);
    138       if (hideBits64to79 && isFPLsbs(i))
    139 	 fprintf(stderr, "xx ");
    140       else
    141          fprintf(stderr, "%02x ", buf[i]);
    142       if (i > 0 && ((i % 16) == 15))
    143          fprintf(stderr, "\n");
    144    }
    145 }
    146 
    147 static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx,
    148                     UInt index, UInt ecx_in )
    149 {
    150    UInt a,b,c,d;
    151    asm volatile ("cpuid"
    152                  : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
    153                  : "0" (index), "2"(ecx_in) );
    154    *eax = a; *ebx = b; *ecx = c; *edx = d;
    155    //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
    156    //        index,ecx_in, a,b,c,d );
    157 }
    158 
    159 static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in )
    160 {
    161    UInt a,d;
    162    asm volatile ("xgetbv"
    163                  : "=a" (a), "=d" (d) \
    164                  : "c"(ecx_in) );
    165    *eax = a; *edx = d;
    166 }
    167 
    168 static void check_for_xsave ( void )
    169 {
    170    UInt eax, ebx, ecx, edx;
    171    Bool ok = True;
    172 
    173    eax = ebx = ecx = edx = 0;
    174    cpuid(&eax, &ebx, &ecx, &edx, 1,0);
    175    //fprintf(stderr, "cpuid(1).ecx[26=xsave]   = %u\n", (ecx >> 26) & 1);
    176    ok = ok && (((ecx >> 26) & 1) == 1);
    177 
    178    eax = ebx = ecx = edx = 0;
    179    cpuid(&eax, &ebx, &ecx, &edx, 1,0);
    180    //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
    181    ok = ok && (((ecx >> 27) & 1) == 1);
    182 
    183    eax = ebx = ecx = edx = 0;
    184    xgetbv(&eax, &edx, 0);
    185    //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
    186    ok = ok && (edx == 0) && (eax == 7);
    187 
    188    if (ok) return;
    189 
    190    fprintf(stderr,
    191            "This program must be run on a CPU that supports AVX and XSAVE.\n");
    192    exit(1);
    193 }
    194 
    195 
    196 void test_xsave ( Bool hideBits64to79 )
    197 {
    198    /* Testing XSAVE:
    199 
    200       For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
    201       and AVX registers with some values, do XSAVE to dump it, and
    202       print the resulting buffer. */
    203 
    204    UInt rfbm;
    205    for (rfbm = 0; rfbm <= 7; rfbm++) {
    206       UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
    207 
    208       my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
    209       saved_img[512] = 0;
    210       do_setup_then_xsave(saved_img, rfbm);
    211 
    212       fprintf(stderr,
    213               "------------------ XSAVE, rfbm = %u ------------------\n", rfbm);
    214       show(saved_img, hideBits64to79);
    215       fprintf(stderr, "\n");
    216 
    217       free(saved_img);
    218    }
    219 }
    220 
    221 
    222 void test_xrstor ( Bool hideBits64to79 )
    223 {
    224    /* Testing XRSTOR is more complex than testing XSAVE, because the
    225       loaded value(s) depend not only on what bits are requested (by
    226       RBFM) but also on what bits are actually present in the image
    227       (defined by XSTATE_BV).  So we have to test all 64 (8 x 8)
    228       combinations.
    229 
    230       The approach is to fill a memory buffer with data, do XRSTOR
    231       from the buffer, them dump all components with XSAVE in a new
    232       buffer, and print the result.  This is complicated by the fact
    233       that we need to be able to see which parts of the state (in
    234       registers) are neither overwritten nor zeroed by the restore.
    235       Hence the registers must be pre-filled with values which are
    236       neither zero nor the data to be loaded.  We choose to use 0x55
    237       where possible. */
    238 
    239    UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE);
    240    my_memset(fives, 0x55, XSAVE_AREA_SIZE);
    241    /* Set MXCSR so that the insn doesn't fault */
    242    fives[24] = 0x80;
    243    fives[25] = 0x1f;
    244    fives[26] = 0;
    245    fives[27] = 0;
    246    /* Ditto for the XSAVE header area.  Also set XSTATE_BV. */
    247    fives[512] = 7;
    248    UInt i;
    249    for (i = 1; i <= 23; i++) fives[512+i] = 0;
    250    /* Fill the x87 register values with something that VEX's
    251       80-vs-64-bit kludging won't mess up -- an 80 bit number which is
    252       representable also as 64 bit: 123456789.0123 */
    253    for (i = 0; i <= 7; i++) {
    254       UChar* p = &fives[32 + 16 * i];
    255       p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0;
    256       p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40;
    257    }
    258    /* And mark the tags for all 8 dumped regs as "valid". */
    259    fives[4/*FTW*/] = 0xFF;
    260 
    261    /* (1) (see comment in loop below) */
    262    UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE);
    263    do_setup_then_xsave(standard_test_data, 7);
    264 
    265    UInt xstate_bv, rfbm;
    266    for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) {
    267       for (rfbm = 0; rfbm <= 7; rfbm++) {
    268    //{ xstate_bv = 7;
    269    //      { rfbm = 6;
    270          /* 1.  Copy the "standard test data" into registers, and dump
    271                 it with XSAVE.  This gives us an image we can try
    272                 restoring from.
    273 
    274             2.  Set the register state to all-0x55s (as far as is
    275                 possible), so we can see which parts get overwritten
    276                 and which parts get zeroed on the test restore.
    277 
    278             3.  Do the restore from the image prepared in (1).
    279 
    280             4.  Dump the state with XSAVE and print it.
    281          */
    282 
    283          /* (3a).  We can't use |standard_test_data| directly, since we
    284             need to put in the required |xstate_bv| value.  So make a
    285             copy and modify that instead. */
    286          UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE);
    287          my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE);
    288          img_to_restore_from[512] = xstate_bv;
    289 
    290          /* (4a) */
    291          UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
    292          my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
    293          saved_img[512] = 0;
    294 
    295          /* (2) */
    296          do_xrstor(fives, 7);
    297 
    298          // X87, SSE, AVX state LIVE
    299 
    300          /* (3b) */
    301          /* and this is what we're actually trying to test */
    302          do_xrstor(img_to_restore_from, rfbm);
    303 
    304          // X87, SSE, AVX state LIVE
    305 
    306          /* (4b) */
    307          do_xsave(saved_img, 7);
    308 
    309          fprintf(stderr,
    310                  "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
    311                 xstate_bv, rfbm);
    312          show(saved_img, hideBits64to79);
    313          fprintf(stderr, "\n");
    314 
    315          free(saved_img);
    316          free(img_to_restore_from);
    317       }
    318    }
    319 }
    320 
    321 
    322 int main ( int argc, char** argv )
    323 {
    324    Bool hideBits64to79 = argc > 1;
    325    fprintf(stderr, "Re-run with any arg to suppress least-significant\n"
    326                    "   16 bits of 80-bit FP numbers\n");
    327 
    328    check_for_xsave();
    329 
    330    if (1)
    331    test_xsave(hideBits64to79);
    332 
    333    if (1)
    334    test_xrstor(hideBits64to79);
    335 
    336    return 0;
    337 }
    338