1 // This program is a thorough test of the LOADVn/STOREVn shadow memory 2 // operations. 3 4 #include <assert.h> 5 #include <stdlib.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include "tests/sys_mman.h" 9 #include "memcheck/memcheck.h" 10 11 // All the sizes here are in *bytes*, not bits. 12 13 typedef unsigned char U1; 14 typedef unsigned short U2; 15 typedef unsigned int U4; 16 typedef unsigned long long U8; 17 18 typedef float F4; 19 typedef double F8; 20 21 typedef unsigned long UWord; 22 23 #define PAGE_SIZE 4096ULL 24 25 26 // XXX: should check the error cases for SET/GET_VBITS also 27 28 // For the byte 'x', build a value of 'size' bytes from that byte, eg: 29 // size 1 --> x 30 // size 2 --> xx 31 // size 4 --> xxxx 32 // size 8 --> xxxxxxxx 33 // where the 0 bits are seen by Memcheck as defined, and the 1 bits are 34 // seen as undefined (ie. the value of each bit matches its V bit, ie. the 35 // resulting value is the same as its metavalue). 36 // 37 U8 build(int size, U1 byte) 38 { 39 int i; 40 U8 mask = 0; 41 U8 shres; 42 U8 res = 0xffffffffffffffffULL, res2; 43 (void)VALGRIND_MAKE_MEM_UNDEFINED(&res, 8); 44 assert(1 == size || 2 == size || 4 == size || 8 == size); 45 46 for (i = 0; i < size; i++) { 47 mask <<= 8; 48 mask |= (U8)byte; 49 } 50 51 res &= mask; 52 53 // res is now considered partially defined, but we know exactly what its 54 // value is (it happens to be the same as its metavalue). 55 56 (void)VALGRIND_GET_VBITS(&res, &shres, 8); 57 res2 = res; 58 (void)VALGRIND_MAKE_MEM_DEFINED(&res2, 8); // avoid the 'undefined' warning 59 assert(res2 == shres); 60 return res; 61 } 62 63 U1 make_defined ( U1 x ) 64 { 65 volatile U1 xx = x; 66 (void)VALGRIND_MAKE_MEM_DEFINED(&xx, 1); 67 return xx; 68 } 69 70 void check(U1* arr, int n, char* who) 71 { 72 int i; 73 U1* shadow = malloc(n); 74 U1 arr_i; 75 U8 sum = 0; 76 (void)VALGRIND_GET_VBITS(arr, shadow, n); 77 for (i = 0; i < n; i++) { 78 arr_i = make_defined(arr[i]); 79 if (arr_i != shadow[i]) { 80 fprintf(stderr, "\n\nFAILURE: %s, byte %d -- " 81 "is 0x%x, should be 0x%x\n\n", 82 who, i, shadow[i], arr[i]); 83 exit(1); 84 } 85 sum += (U8)arr_i; 86 } 87 free(shadow); 88 printf("test passed, sum = %llu (%9.5f per byte)\n", 89 sum, (F8)sum / (F8)n); 90 } 91 92 static inline U4 randomU4 ( void ) 93 { 94 static U4 n = 0; 95 /* From "Numerical Recipes in C" 2nd Edition */ 96 n = 1664525UL * n + 1013904223UL; 97 return n; 98 } 99 100 static inline U1 randomU1 ( void ) 101 { 102 return 0xFF & (randomU4() >> 13); 103 } 104 105 // NB! 300000 is really not enough to shake out all failures. 106 // Increasing it by a factor of 256 is, but makes the test take 107 // the best part of an hour. 108 #define N_BYTES (300000 /* * 256 */) 109 #define N_EVENTS (5 * N_BYTES) 110 111 112 void do_test_at ( U1* arr ) 113 { 114 int i; 115 116 U4 mv1 = 0, mv2 = 0, mv4 = 0, mv8 = 0, mv4f = 0, mv8f = 0; 117 118 /* Fill arr with random bytes whose shadows match them. */ 119 if (0) printf("-------- arr = %p\n", arr); 120 121 printf("initialising\n"); 122 for (i = 0; i < N_BYTES; i++) 123 arr[i] = (U1)build(1, randomU1()); 124 125 printf("post-initialisation check\n"); 126 check(arr, N_BYTES, "after initialisation"); 127 128 /* Now do huge numbers of memory copies. */ 129 printf("doing copies\n"); 130 for (i = 0; i < N_EVENTS; i++) { 131 U4 ty, src, dst; 132 ty = (randomU4() >> 13) % 5; 133 tryagain: 134 src = (randomU4() >> 1) % N_BYTES; 135 dst = (randomU4() >> 3) % N_BYTES; 136 switch (ty) { 137 case 0: { // U1 138 *(U1*)(arr+dst) = *(U1*)(arr+src); 139 mv1++; 140 break; 141 } 142 case 1: { // U2 143 if (src+2 >= N_BYTES || dst+2 >= N_BYTES) 144 goto tryagain; 145 *(U2*)(arr+dst) = *(U2*)(arr+src); 146 mv2++; 147 break; 148 } 149 case 2: { // U4 150 if (src+4 >= N_BYTES || dst+4 >= N_BYTES) 151 goto tryagain; 152 *(U4*)(arr+dst) = *(U4*)(arr+src); 153 mv4++; 154 break; 155 } 156 case 3: { // U8 157 if (src+8 >= N_BYTES || dst+8 >= N_BYTES) 158 goto tryagain; 159 *(U8*)(arr+dst) = *(U8*)(arr+src); 160 mv8++; 161 break; 162 } 163 /* Don't bother with 32-bit floats. These cause 164 horrible complications, as discussed in sh-mem.c. */ 165 /* 166 case 4: { // F4 167 if (src+4 >= N_BYTES || dst+4 >= N_BYTES) 168 goto tryagain; 169 *(F4*)(arr+dst) = *(F4*)(arr+src); 170 mv4f++; 171 break; 172 } 173 */ 174 case 4: { // F8 175 if (src+8 >= N_BYTES || dst+8 >= N_BYTES) 176 goto tryagain; 177 #if defined(__i386__) 178 /* Copying via an x87 register causes the test to fail, 179 because (I think) some obscure values that are FP 180 denormals get changed during the copy due to the FPU 181 normalising, or rounding, or whatever, them. This 182 causes them to no longer bit-for-bit match the 183 accompanying metadata. Yet we still need to do a 184 genuine 8-byte load/store to test the relevant memcheck 185 {LOADV8,STOREV8} routines. Hence use the MMX registers 186 instead, as copying through them should be 187 straightforward.. */ 188 __asm__ __volatile__( 189 "movq (%1), %%mm2\n\t" 190 "movq %%mm2, (%0)\n\t" 191 "emms" 192 : : "r"(arr+dst), "r"(arr+src) : "memory" 193 ); 194 #elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__) 195 /* On arm32, many compilers generate a 64-bit float move 196 using two 32 bit integer registers, which completely 197 defeats this test. Hence force a 64-bit NEON load and 198 store. I guess this will break the build on non-NEON 199 capable targets. */ 200 __asm__ __volatile__ ( 201 "vld1.64 {d7},[%0] ; vst1.64 {d7},[%1] " 202 : : "r"(arr+src), "r"(arr+dst) : "d7","memory" 203 ); 204 #else 205 /* Straightforward. On amd64, this gives a load/store of 206 the bottom half of an xmm register. On ppc32/64 this 207 is a straighforward load/store of an FP register. */ 208 *(F8*)(arr+dst) = *(F8*)(arr+src); 209 #endif 210 mv8f++; 211 break; 212 } 213 default: 214 fprintf(stderr, "sh-mem-random: bad size\n"); 215 exit(0); 216 } 217 } 218 219 printf("final check\n"); 220 check(arr, N_BYTES, "final check"); 221 222 printf("counts 1/2/4/8/F4/F8: %d %d %d %d %d %d\n", 223 mv1, mv2, mv4, mv8, mv4f, mv8f); 224 } 225 226 227 228 int main(void) 229 { 230 U1* arr; 231 232 if (0 == RUNNING_ON_VALGRIND) { 233 fprintf(stderr, "error: this program only works when run under Valgrind\n"); 234 exit(1); 235 } 236 237 printf("-------- testing non-auxmap range --------\n"); 238 239 arr = malloc(N_BYTES); 240 assert(arr); 241 do_test_at(arr); 242 free(arr); 243 244 if (sizeof(void*) == 8) { 245 // 64-bit platform. 246 int tries; 247 int nbytes_p; 248 // (U1*)(UWord)constULL funny casting to keep gcc quiet on 249 // 32-bit platforms 250 U1* huge_addr = (U1*)(UWord)0x6600000000ULL; // 408GB 251 // Note, kernel 2.6.? on Athlon64 refuses fixed mmap requests 252 // at above 512GB. 253 254 printf("-------- testing auxmap range --------\n"); 255 256 nbytes_p = (N_BYTES + PAGE_SIZE) & ~(PAGE_SIZE-1); 257 258 for (tries = 0; tries < 10; tries++) { 259 arr = mmap(huge_addr, nbytes_p, PROT_READ|PROT_WRITE, 260 MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1,0); 261 if (arr != MAP_FAILED) 262 break; 263 // hmm. fudge the address and try again. 264 huge_addr += (randomU4() & ~(PAGE_SIZE-1)); 265 } 266 267 if (tries >= 10) { 268 fprintf(stderr, "sh-mem-random: can't mmap hi-mem\n"); 269 exit(0); 270 } 271 assert(arr != MAP_FAILED); 272 273 do_test_at(arr); 274 } 275 276 return 0; 277 278 } 279