Home | History | Annotate | Download | only in amd64
      1 #include <config.h>
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include "tests/asm.h"
      5 #include "tests/malloc.h"
      6 #include <string.h>
      7 
      8 const unsigned int vec0[4]
      9    = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
     10 
     11 const unsigned int vec1[4]
     12    = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
     13 
     14 const unsigned int vecZ[4]
     15    = { 0, 0, 0, 0 };
     16 
     17 __attribute__((noinline))
     18 void do_fxsave ( void* p, int rexw ) {
     19    if (rexw) {
     20 #ifdef HAVE_AS_AMD64_FXSAVE64
     21       asm __volatile__("fxsave64 (%0)" : : "r" (p) : "memory" );
     22 #else
     23       asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
     24 #endif
     25    } else {
     26       asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
     27    }
     28 }
     29 
     30 __attribute__((noinline))
     31 void do_fxrstor ( void* p, int rexw ) {
     32    if (rexw) {
     33 #ifdef HAVE_AS_AMD64_FXSAVE64
     34       asm __volatile__("fxrstor64 (%0)" : : "r" (p) : "memory" );
     35 #else
     36       asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
     37 #endif
     38    } else {
     39       asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
     40    }
     41 }
     42 
     43 void do_zeroise ( void )
     44 {
     45    asm __volatile__("finit");
     46    asm __volatile__(
     47     "fldz\n\t"
     48     "fldz\n\t"
     49     "fldz\n\t"
     50     "fldz\n\t"
     51     "fldz\n\t"
     52     "fldz\n\t"
     53     "fldz\n\t"
     54     "fldz\n\t"
     55     "finit\n");
     56 #ifndef VGP_amd64_darwin
     57    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
     58    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
     59    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
     60    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
     61    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
     62    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
     63    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
     64    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
     65    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
     66    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
     67    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
     68    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
     69    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
     70    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
     71    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
     72    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
     73 #else
     74    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
     75    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
     76    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
     77    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
     78    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
     79    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
     80    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
     81    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
     82    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
     83    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
     84    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
     85    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
     86    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
     87    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
     88    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
     89    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
     90 #endif
     91    asm __volatile__(
     92       "pushq $0\n\t"
     93       "ldmxcsr 0(%rsp)\n\t"
     94       "addq $8,%rsp\n");
     95 }
     96 
     97 /* set up the FP and SSE state, and then dump it. */
     98 void do_setup_then_fxsave ( void* p, int rexw )
     99 {
    100    asm __volatile__("finit");
    101    asm __volatile__("fldpi");
    102    asm __volatile__("fld1");
    103    asm __volatile__("fldln2");
    104    asm __volatile__("fldlg2");
    105    asm __volatile__("fld %st(3)");
    106    asm __volatile__("fld %st(3)");
    107    asm __volatile__("fld1");
    108    asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
    109    asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
    110    asm __volatile__("xorps  %xmm2, %xmm2");
    111    asm __volatile__("movaps %xmm0, %xmm3");
    112    asm __volatile__("movaps %xmm1, %xmm4");
    113    asm __volatile__("movaps %xmm2, %xmm5");
    114    asm __volatile__("movaps %xmm0, %xmm6");
    115    asm __volatile__("movaps %xmm1, %xmm7");
    116    asm __volatile__("movaps %xmm1, %xmm8");
    117    asm __volatile__("movaps %xmm2, %xmm9");
    118    asm __volatile__("movaps %xmm0, %xmm10");
    119    asm __volatile__("movaps %xmm1, %xmm11");
    120    asm __volatile__("movaps %xmm1, %xmm12");
    121    asm __volatile__("movaps %xmm2, %xmm13");
    122    asm __volatile__("movaps %xmm0, %xmm14");
    123    asm __volatile__("movaps %xmm1, %xmm15");
    124    do_fxsave(p, rexw);
    125 }
    126 
    127 int isFPLsbs ( int i )
    128 {
    129    int q;
    130    q = 32; if (i == q || i == q+1) return 1;
    131    q = 48; if (i == q || i == q+1) return 1;
    132    q = 64; if (i == q || i == q+1) return 1;
    133    q = 80; if (i == q || i == q+1) return 1;
    134    q = 96; if (i == q || i == q+1) return 1;
    135    q = 112; if (i == q || i == q+1) return 1;
    136    q = 128; if (i == q || i == q+1) return 1;
    137    q = 144; if (i == q || i == q+1) return 1;
    138    return 0;
    139 }
    140 
    141 void show ( unsigned char* buf, int xx )
    142 {
    143    int i;
    144    for (i = 0; i < 512; i++) {
    145       if ((i % 16) == 0)
    146          printf("%3d   ", i);
    147       if (xx && isFPLsbs(i))
    148 	 printf("xx ");
    149       else
    150          printf("%02x ", buf[i]);
    151       if (i > 0 && ((i % 16) == 15))
    152           printf("\n");
    153    }
    154 }
    155 
    156 
    157 int main ( int argc, char** argv )
    158 {
    159    unsigned char* buf1 = memalign16(512);
    160    unsigned char* buf2 = memalign16(512);
    161    unsigned char* buf3 = memalign16(512);
    162    int xx = argc > 1;
    163    printf("Re-run with any arg to suppress least-significant\n"
    164           "   16 bits of FP numbers\n");
    165 
    166    printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
    167 
    168    memset(buf1, 0x55, 512);
    169    memset(buf2, 0x55, 512);
    170    memset(buf3, 0x55, 512);
    171 
    172    /* Load up x87/xmm state and dump it. */
    173    do_setup_then_fxsave(buf1, 0);
    174    printf("\nBEFORE\n");
    175    show(buf1, xx);
    176 
    177    /* Zeroise x87/xmm state and dump it, to show that the
    178       regs have been cleared out. */
    179    do_zeroise();
    180    do_fxsave(buf2, 0);
    181    printf("\nZEROED\n");
    182    show(buf2, xx);
    183 
    184    /* Reload x87/xmm state from buf1 and dump it in buf3. */
    185    do_fxrstor(buf1, 0);
    186    do_fxsave(buf3, 0);
    187    printf("\nRESTORED\n");
    188    show(buf3, xx);
    189 
    190    printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
    191 
    192    memset(buf1, 0x55, 512);
    193    memset(buf2, 0x55, 512);
    194    memset(buf3, 0x55, 512);
    195 
    196    /* Load up x87/xmm state and dump it. */
    197    do_setup_then_fxsave(buf1, 1);
    198    printf("\nBEFORE\n");
    199    show(buf1, xx);
    200 
    201    /* Zeroise x87/xmm state and dump it, to show that the
    202       regs have been cleared out. */
    203    do_zeroise();
    204    do_fxsave(buf2, 1);
    205    printf("\nZEROED\n");
    206    show(buf2, xx);
    207 
    208    /* Reload x87/xmm state from buf1 and dump it in buf3. */
    209    do_fxrstor(buf1, 1);
    210    do_fxsave(buf3, 1);
    211    printf("\nRESTORED\n");
    212    show(buf3, xx);
    213 
    214 
    215    free(buf1); free(buf2); free(buf3);
    216 
    217    return 0;
    218 }
    219