1 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <assert.h> 5 #include "tests/asm.h" 6 #include "tests/malloc.h" 7 #include <string.h> 8 9 #define XSAVE_AREA_SIZE 832 10 11 typedef unsigned char UChar; 12 typedef unsigned int UInt; 13 typedef unsigned long long int ULong; 14 15 typedef unsigned long int UWord; 16 17 typedef unsigned char Bool; 18 #define True ((Bool)1) 19 #define False ((Bool)0) 20 21 const unsigned int vec0[8] 22 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321, 23 0x15263748, 0x91929394, 0x19293949, 0x48372615 }; 24 25 const unsigned int vec1[8] 26 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA, 27 0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB }; 28 29 const unsigned int vecZ[8] 30 = { 0, 0, 0, 0, 0, 0, 0, 0 }; 31 32 /* A version of memset that doesn't use XMM or YMM registers. */ 33 static __attribute__((noinline)) 34 void* my_memset(void* s, int c, size_t n) 35 { 36 size_t i; 37 for (i = 0; i < n; i++) { 38 ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c; 39 /* Defeat any attempt at autovectorisation */ 40 __asm__ __volatile__("" ::: "cc","memory"); 41 } 42 return s; 43 } 44 45 /* Ditto for memcpy */ 46 static __attribute__((noinline)) 47 void* my_memcpy(void *dest, const void *src, size_t n) 48 { 49 size_t i; 50 for (i = 0; i < n; i++) { 51 ((unsigned char*)dest)[i] = ((unsigned char*)src)[i]; 52 __asm__ __volatile__("" ::: "cc","memory"); 53 } 54 return dest; 55 } 56 57 static void* memalign_zeroed64(size_t size) 58 { 59 char* p = memalign64(size); 60 if (p && size > 0) { 61 my_memset(p, 0, size); 62 } 63 return p; 64 } 65 66 __attribute__((noinline)) 67 static void do_xsave ( void* p, UInt rfbm ) 68 { 69 assert(rfbm <= 7); 70 __asm__ __volatile__( 71 "movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)" 72 : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p) 73 : /*TRASH*/ "memory", "rax", "rdx" 74 ); 75 } 76 77 __attribute__((noinline)) 78 static void do_xrstor ( void* p, UInt rfbm ) 79 { 80 assert(rfbm <= 7); 81 __asm__ __volatile__( 82 "movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)" 83 : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p) 84 : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */ 85 ); 86 } 87 88 /* set up the FP, SSE and AVX state, and then dump it. */ 89 static void do_setup_then_xsave ( void* p, UInt rfbm ) 90 { 91 __asm__ __volatile__("finit"); 92 __asm__ __volatile__("fldpi"); 93 __asm__ __volatile__("fld1"); 94 __asm__ __volatile__("fldln2"); 95 __asm__ __volatile__("fldlg2"); 96 __asm__ __volatile__("fld %st(3)"); 97 __asm__ __volatile__("fld %st(3)"); 98 __asm__ __volatile__("fld1"); 99 __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" ); 100 __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" ); 101 __asm__ __volatile__("vxorps %ymm2, %ymm2, %ymm2"); 102 __asm__ __volatile__("vmovaps %ymm0, %ymm3"); 103 __asm__ __volatile__("vmovaps %ymm1, %ymm4"); 104 __asm__ __volatile__("vmovaps %ymm2, %ymm5"); 105 __asm__ __volatile__("vmovaps %ymm0, %ymm6"); 106 __asm__ __volatile__("vmovaps %ymm1, %ymm7"); 107 __asm__ __volatile__("vmovaps %ymm1, %ymm8"); 108 __asm__ __volatile__("vmovaps %ymm2, %ymm9"); 109 __asm__ __volatile__("vmovaps %ymm0, %ymm10"); 110 __asm__ __volatile__("vmovaps %ymm1, %ymm11"); 111 __asm__ __volatile__("vmovaps %ymm1, %ymm12"); 112 __asm__ __volatile__("vmovaps %ymm2, %ymm13"); 113 __asm__ __volatile__("vmovaps %ymm0, %ymm14"); 114 __asm__ __volatile__("vmovaps %ymm1, %ymm15"); 115 do_xsave(p, rfbm); 116 } 117 118 static int isFPLsbs ( int i ) 119 { 120 int q; 121 q = 32; if (i == q || i == q+1) return 1; 122 q = 48; if (i == q || i == q+1) return 1; 123 q = 64; if (i == q || i == q+1) return 1; 124 q = 80; if (i == q || i == q+1) return 1; 125 q = 96; if (i == q || i == q+1) return 1; 126 q = 112; if (i == q || i == q+1) return 1; 127 q = 128; if (i == q || i == q+1) return 1; 128 q = 144; if (i == q || i == q+1) return 1; 129 return 0; 130 } 131 132 static void show ( unsigned char* buf, Bool hideBits64to79 ) 133 { 134 int i; 135 for (i = 0; i < XSAVE_AREA_SIZE; i++) { 136 if ((i % 16) == 0) 137 fprintf(stderr, "%3d ", i); 138 if (hideBits64to79 && isFPLsbs(i)) 139 fprintf(stderr, "xx "); 140 else 141 fprintf(stderr, "%02x ", buf[i]); 142 if (i > 0 && ((i % 16) == 15)) 143 fprintf(stderr, "\n"); 144 } 145 } 146 147 static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx, 148 UInt index, UInt ecx_in ) 149 { 150 UInt a,b,c,d; 151 asm volatile ("cpuid" 152 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ 153 : "0" (index), "2"(ecx_in) ); 154 *eax = a; *ebx = b; *ecx = c; *edx = d; 155 //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n", 156 // index,ecx_in, a,b,c,d ); 157 } 158 159 static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in ) 160 { 161 UInt a,d; 162 asm volatile ("xgetbv" 163 : "=a" (a), "=d" (d) \ 164 : "c"(ecx_in) ); 165 *eax = a; *edx = d; 166 } 167 168 static void check_for_xsave ( void ) 169 { 170 UInt eax, ebx, ecx, edx; 171 Bool ok = True; 172 173 eax = ebx = ecx = edx = 0; 174 cpuid(&eax, &ebx, &ecx, &edx, 1,0); 175 //fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1); 176 ok = ok && (((ecx >> 26) & 1) == 1); 177 178 eax = ebx = ecx = edx = 0; 179 cpuid(&eax, &ebx, &ecx, &edx, 1,0); 180 //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1); 181 ok = ok && (((ecx >> 27) & 1) == 1); 182 183 eax = ebx = ecx = edx = 0; 184 xgetbv(&eax, &edx, 0); 185 //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax); 186 ok = ok && (edx == 0) && (eax == 7); 187 188 if (ok) return; 189 190 fprintf(stderr, 191 "This program must be run on a CPU that supports AVX and XSAVE.\n"); 192 exit(1); 193 } 194 195 196 void test_xsave ( Bool hideBits64to79 ) 197 { 198 /* Testing XSAVE: 199 200 For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE 201 and AVX registers with some values, do XSAVE to dump it, and 202 print the resulting buffer. */ 203 204 UInt rfbm; 205 for (rfbm = 0; rfbm <= 7; rfbm++) { 206 UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE); 207 208 my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE); 209 saved_img[512] = 0; 210 do_setup_then_xsave(saved_img, rfbm); 211 212 fprintf(stderr, 213 "------------------ XSAVE, rfbm = %u ------------------\n", rfbm); 214 show(saved_img, hideBits64to79); 215 fprintf(stderr, "\n"); 216 217 free(saved_img); 218 } 219 } 220 221 222 void test_xrstor ( Bool hideBits64to79 ) 223 { 224 /* Testing XRSTOR is more complex than testing XSAVE, because the 225 loaded value(s) depend not only on what bits are requested (by 226 RBFM) but also on what bits are actually present in the image 227 (defined by XSTATE_BV). So we have to test all 64 (8 x 8) 228 combinations. 229 230 The approach is to fill a memory buffer with data, do XRSTOR 231 from the buffer, them dump all components with XSAVE in a new 232 buffer, and print the result. This is complicated by the fact 233 that we need to be able to see which parts of the state (in 234 registers) are neither overwritten nor zeroed by the restore. 235 Hence the registers must be pre-filled with values which are 236 neither zero nor the data to be loaded. We choose to use 0x55 237 where possible. */ 238 239 UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE); 240 my_memset(fives, 0x55, XSAVE_AREA_SIZE); 241 /* Set MXCSR so that the insn doesn't fault */ 242 fives[24] = 0x80; 243 fives[25] = 0x1f; 244 fives[26] = 0; 245 fives[27] = 0; 246 /* Ditto for the XSAVE header area. Also set XSTATE_BV. */ 247 fives[512] = 7; 248 UInt i; 249 for (i = 1; i <= 23; i++) fives[512+i] = 0; 250 /* Fill the x87 register values with something that VEX's 251 80-vs-64-bit kludging won't mess up -- an 80 bit number which is 252 representable also as 64 bit: 123456789.0123 */ 253 for (i = 0; i <= 7; i++) { 254 UChar* p = &fives[32 + 16 * i]; 255 p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0; 256 p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40; 257 } 258 /* And mark the tags for all 8 dumped regs as "valid". */ 259 fives[4/*FTW*/] = 0xFF; 260 261 /* (1) (see comment in loop below) */ 262 UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE); 263 do_setup_then_xsave(standard_test_data, 7); 264 265 UInt xstate_bv, rfbm; 266 for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) { 267 for (rfbm = 0; rfbm <= 7; rfbm++) { 268 //{ xstate_bv = 7; 269 // { rfbm = 6; 270 /* 1. Copy the "standard test data" into registers, and dump 271 it with XSAVE. This gives us an image we can try 272 restoring from. 273 274 2. Set the register state to all-0x55s (as far as is 275 possible), so we can see which parts get overwritten 276 and which parts get zeroed on the test restore. 277 278 3. Do the restore from the image prepared in (1). 279 280 4. Dump the state with XSAVE and print it. 281 */ 282 283 /* (3a). We can't use |standard_test_data| directly, since we 284 need to put in the required |xstate_bv| value. So make a 285 copy and modify that instead. */ 286 UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE); 287 my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE); 288 img_to_restore_from[512] = xstate_bv; 289 290 /* (4a) */ 291 UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE); 292 my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE); 293 saved_img[512] = 0; 294 295 /* (2) */ 296 do_xrstor(fives, 7); 297 298 // X87, SSE, AVX state LIVE 299 300 /* (3b) */ 301 /* and this is what we're actually trying to test */ 302 do_xrstor(img_to_restore_from, rfbm); 303 304 // X87, SSE, AVX state LIVE 305 306 /* (4b) */ 307 do_xsave(saved_img, 7); 308 309 fprintf(stderr, 310 "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n", 311 xstate_bv, rfbm); 312 show(saved_img, hideBits64to79); 313 fprintf(stderr, "\n"); 314 315 free(saved_img); 316 free(img_to_restore_from); 317 } 318 } 319 } 320 321 322 int main ( int argc, char** argv ) 323 { 324 Bool hideBits64to79 = argc > 1; 325 fprintf(stderr, "Re-run with any arg to suppress least-significant\n" 326 " 16 bits of 80-bit FP numbers\n"); 327 328 check_for_xsave(); 329 330 if (1) 331 test_xsave(hideBits64to79); 332 333 if (1) 334 test_xrstor(hideBits64to79); 335 336 return 0; 337 } 338