1 2 // Tests shadow memory correctness for 16-byte/32-byte/etc. vector 3 // loads/stores. Requires vector_copy() and VECTOR_BYTES to be 4 // specified somehow. 5 6 #ifndef VECTOR_BYTES 7 #error "VECTOR_BYTES must be defined" 8 #endif 9 10 #include <assert.h> 11 #include <stdlib.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include "tests/malloc.h" 15 #include "memcheck/memcheck.h" 16 17 // What we're actually testing 18 // .. is vector_copy, which should be defined before this point 19 20 // All the sizes here are in *bytes*, not bits. 21 22 typedef unsigned char U1; 23 typedef unsigned short U2; 24 typedef unsigned int U4; 25 typedef unsigned long long U8; 26 typedef unsigned long int UWord; 27 28 typedef unsigned char Bool; 29 #define True ((Bool)1) 30 #define False ((Bool)0) 31 32 #define CFENCE __asm__ __volatile__("":::"cc","memory") 33 34 static __attribute__((noinline)) const char* get_endianness ( void ) 35 { 36 volatile U4 w32 = 0x88776655; 37 volatile U1* p = (U1*)&w32; 38 if (p[0] == 0x55) { 39 assert(p[3] == 0x88); 40 return "little"; 41 } 42 if (p[0] == 0x88) { 43 assert(p[3] == 0x55); 44 return "big"; 45 } 46 assert(0); 47 } 48 49 static inline U4 randomU4 ( void ) 50 { 51 static U4 n = 0; 52 /* From "Numerical Recipes in C" 2nd Edition */ 53 n = 1664525UL * n + 1013904223UL; 54 return n; 55 } 56 57 static inline U1 randomU1 ( void ) 58 { 59 return 0xFF & (randomU4() >> 13); 60 } 61 62 #define N_BYTES 80000 63 #define N_EVENTS (N_BYTES * 2) 64 65 // Return x, but with its definedness bits set to be its own value bits 66 static inline U1 self_shadow ( U1 x ) 67 { 68 U1 res = 0xFF; 69 (void) VALGRIND_MAKE_MEM_UNDEFINED(&res, 1); 70 res &= x; 71 return res; 72 } 73 74 static inline U1 get_shadow ( U1 x ) 75 { 76 U1 res = 0; 77 U4 r = VALGRIND_GET_VBITS(&x, &res, 1); 78 assert(r == 1 || r == 0); 79 return res; 80 } 81 82 static inline U1 make_def ( U1 x ) 83 { 84 U1 y = x; 85 (void) VALGRIND_MAKE_MEM_DEFINED(&y, 1); 86 return y; 87 } 88 89 static inline U1 make_undef ( U1 x ) 90 { 91 U1 y = x; 92 (void) VALGRIND_MAKE_MEM_UNDEFINED(&y, 1); 93 return y; 94 } 95 96 static void make_noaccess ( U1* dst ) 97 { 98 (void) VALGRIND_MAKE_MEM_NOACCESS(dst, 1); 99 } 100 101 static void apply ( void(*fn)(U4,Bool), U4 arg1, Bool arg2 ) 102 { 103 switch (arg1 & (32-1)) { 104 case 0: CFENCE; fn(arg1, arg2); CFENCE; break; 105 case 1: CFENCE; fn(arg1, arg2); CFENCE; break; 106 case 2: CFENCE; fn(arg1, arg2); CFENCE; break; 107 case 3: CFENCE; fn(arg1, arg2); CFENCE; break; 108 case 4: CFENCE; fn(arg1, arg2); CFENCE; break; 109 case 5: CFENCE; fn(arg1, arg2); CFENCE; break; 110 case 6: CFENCE; fn(arg1, arg2); CFENCE; break; 111 case 7: CFENCE; fn(arg1, arg2); CFENCE; break; 112 case 8: CFENCE; fn(arg1, arg2); CFENCE; break; 113 case 9: CFENCE; fn(arg1, arg2); CFENCE; break; 114 case 10: CFENCE; fn(arg1, arg2); CFENCE; break; 115 case 11: CFENCE; fn(arg1, arg2); CFENCE; break; 116 case 12: CFENCE; fn(arg1, arg2); CFENCE; break; 117 case 13: CFENCE; fn(arg1, arg2); CFENCE; break; 118 case 14: CFENCE; fn(arg1, arg2); CFENCE; break; 119 case 15: CFENCE; fn(arg1, arg2); CFENCE; break; 120 case 16: CFENCE; fn(arg1, arg2); CFENCE; break; 121 case 17: CFENCE; fn(arg1, arg2); CFENCE; break; 122 case 18: CFENCE; fn(arg1, arg2); CFENCE; break; 123 case 19: CFENCE; fn(arg1, arg2); CFENCE; break; 124 case 20: CFENCE; fn(arg1, arg2); CFENCE; break; 125 case 21: CFENCE; fn(arg1, arg2); CFENCE; break; 126 case 22: CFENCE; fn(arg1, arg2); CFENCE; break; 127 case 23: CFENCE; fn(arg1, arg2); CFENCE; break; 128 case 24: CFENCE; fn(arg1, arg2); CFENCE; break; 129 case 25: CFENCE; fn(arg1, arg2); CFENCE; break; 130 case 26: CFENCE; fn(arg1, arg2); CFENCE; break; 131 case 27: CFENCE; fn(arg1, arg2); CFENCE; break; 132 case 28: CFENCE; fn(arg1, arg2); CFENCE; break; 133 case 29: CFENCE; fn(arg1, arg2); CFENCE; break; 134 case 30: CFENCE; fn(arg1, arg2); CFENCE; break; 135 case 31: CFENCE; fn(arg1, arg2); CFENCE; break; 136 default: CFENCE; fn(arg1, arg2); CFENCE; break; 137 } 138 } 139 140 // Try doing some partial-loads-ok/not-ok testing. 141 /* Test cases: 142 - load, aligned, all no-access 143 ==> addr err 144 - load, aligned, 1 to VECTOR_BYTES-1 initial bytes accessible, 145 then at least one unaccessible byte, 146 then remaining bytes in any state. 147 ==> if PLO then no error, but returned V bits are undefined 148 for unaccessible bytes 149 else 150 error; and V bits are defined for unaccessible bytes 151 152 All of the above, but non-aligned: 153 -- all return an addressing error 154 */ 155 156 static void do_partial_load_case ( U4 nInitialValid, Bool aligned ) 157 { 158 fprintf(stderr, 159 "------ PL %s case with %u leading acc+def bytes ------\n\n", 160 aligned ? "Aligned" : "Unaligned", nInitialValid); 161 162 void *temp; 163 if (posix_memalign(&temp, VECTOR_BYTES, 64) != 0) 164 abort(); 165 U1* block = temp; 166 U4 j; 167 for (j = 0; j < 64; j++) block[j] = 0; 168 169 if (!aligned) block++; 170 171 // Make the block have this pattern: 172 // block[0 .. i-1] accessible and defined 173 // block[i .. VECTOR_BYTES-1] repeating NOACCESS, UNDEF, DEF 174 // hence block[i], at the very least, is always NOACCESS 175 U4 i = nInitialValid; 176 for (j = i; j < VECTOR_BYTES; j++) { 177 switch ((j-i) % 3) { 178 case 0: make_noaccess(&block[j]); break; 179 case 1: block[j] = make_undef(block[j]); break; 180 case 2: /* already acc and def */ break; 181 } 182 } 183 184 // Do the access, possibly generating an error, and show the 185 // resulting V bits 186 U1 dst[VECTOR_BYTES]; 187 vector_copy(&dst[0], block); 188 189 U1 dst_vbits[VECTOR_BYTES]; 190 U4 r = VALGRIND_GET_VBITS(&dst[0], &dst_vbits[0], VECTOR_BYTES); 191 assert(r == 1 || r == 0); 192 193 fprintf(stderr, "\n"); 194 for (j = 0; j < VECTOR_BYTES; j++) { 195 fprintf(stderr, "%c", dst_vbits[j] == 0 ? 'd' 196 : dst_vbits[j] == 0xFF ? 'U' : '?'); 197 } 198 fprintf(stderr, "\n\n"); 199 200 // Also let's use the resulting value, to check we get an undef 201 // error 202 U1 sum = 0; 203 for (j = 0; j < VECTOR_BYTES; j++) 204 sum ^= dst[j]; 205 206 if (sum == 42) { 207 CFENCE; fprintf(stderr, "%s", ""); CFENCE; 208 } else { 209 CFENCE; fprintf(stderr, "%s", ""); CFENCE; 210 } 211 212 fprintf(stderr, "\n"); 213 214 if (!aligned) block--; 215 free(block); 216 } 217 218 int main ( void ) 219 { 220 fprintf(stderr, "sh-mem-vec%d: config: %s-endian, %d-bit word size\n", 221 VECTOR_BYTES * 8, get_endianness(), (int)(8 * sizeof(void*))); 222 223 U4 i; 224 void *temp; 225 if (posix_memalign(&temp, VECTOR_BYTES, N_BYTES) != 0) 226 abort(); 227 U1* buf = temp; 228 229 // Fill |buf| with bytes, so that zero bits have a zero shadow 230 // (are defined) and one bits have a one shadow (are undefined) 231 for (i = 0; i < N_BYTES/2; i++) { 232 buf[i] = self_shadow( (i & (1<<5)) ? 0x00 : 0xFF ); 233 } 234 for ( ; i < N_BYTES; i++) { 235 buf[i] = self_shadow( randomU1() ); 236 } 237 238 // Randomly copy the data around. Once every 8 srcs/dsts, force 239 // the src or dst to be aligned. Once every 64, force both to be 240 // aligned. So as to give the fast (aligned) paths some checking. 241 const U4 n_copies = N_EVENTS; 242 U4 n_d_aligned = 0; 243 U4 n_s_aligned = 0; 244 U4 n_both_aligned = 0; 245 U4 n_fails = 0; 246 247 for (i = 0; i < n_copies; i++) { 248 U4 si = randomU4() % (N_BYTES-VECTOR_BYTES); 249 U4 di = randomU4() % (N_BYTES-VECTOR_BYTES); 250 if (0 == (randomU1() & 7)) si &= ~(VECTOR_BYTES-1); 251 if (0 == (randomU1() & 7)) di &= ~(VECTOR_BYTES-1); 252 if (0 == (randomU1() & 63)) { di &= ~(VECTOR_BYTES-1); si &= ~(VECTOR_BYTES-1); } 253 254 void* dst = &buf[di]; 255 void* src = &buf[si]; 256 257 if (0 == (((UWord)src) & (VECTOR_BYTES-1))) n_s_aligned++; 258 if (0 == (((UWord)dst) & (VECTOR_BYTES-1))) n_d_aligned++; 259 if (0 == (((UWord)src) & (VECTOR_BYTES-1)) && 0 == (((UWord)dst) & (VECTOR_BYTES-1))) 260 n_both_aligned++; 261 262 vector_copy(dst, src); 263 } 264 265 U4 freq[256]; 266 for (i = 0; i < 256; i++) 267 freq[i] = 0; 268 269 for (i = 0; i < N_BYTES; i++) { 270 //if (i > 0 && 0 == (i & 0x0F)) fprintf(stderr, "\n"); 271 U1 v_actual = make_def(buf[i]); 272 U1 v_shadow = get_shadow(buf[i]); 273 if (v_actual != v_shadow) n_fails++; 274 //fprintf(stderr, "%02x:%02x ", (U4)v_actual, (U4)v_shadow); 275 freq[(U4)v_actual]++; 276 } 277 278 fprintf(stderr, "\n"); 279 U4 totFreq = 0; 280 for (i = 0; i < 256; i++) { 281 totFreq += freq[i]; 282 if (i > 0 && (0 == (i % 16))) fprintf(stderr, "\n"); 283 fprintf(stderr, "%5u ", freq[i]); 284 } 285 assert(totFreq == N_BYTES); 286 287 fprintf(stderr, "\n\n"); 288 fprintf(stderr, "%u copies, %u d_aligned, %u s_aligned, %u both_aligned\n", 289 n_copies, n_d_aligned, n_s_aligned, n_both_aligned); 290 fprintf(stderr, "%u %s\n", n_fails, n_fails == 0 ? "failures" : "FAILURES"); 291 292 // Check that we can detect underruns of the block. 293 fprintf(stderr, "\nExpect 2 x no error\n" ); 294 vector_copy( &buf[100], &buf[0] ); 295 vector_copy( &buf[0], &buf[100] ); 296 297 fprintf(stderr, "\nExpect 2 x error\n\n" ); 298 vector_copy( &buf[100], &buf[-1] ); // invalid rd 299 vector_copy( &buf[-1], &buf[100] ); // invalid wr 300 301 // and overruns .. 302 fprintf(stderr, "\nExpect 2 x no error\n" ); 303 vector_copy( &buf[200], &buf[N_BYTES-VECTOR_BYTES + 0] ); 304 vector_copy( &buf[N_BYTES-VECTOR_BYTES + 0], &buf[200] ); 305 306 fprintf(stderr, "\nExpect 2 x error\n\n" ); 307 vector_copy( &buf[200], &buf[N_BYTES-VECTOR_BYTES + 1] ); 308 vector_copy( &buf[N_BYTES-VECTOR_BYTES + 1], &buf[200] ); 309 310 free(buf); 311 fprintf(stderr, "\n"); 312 313 for (i = 0; i < VECTOR_BYTES; i++) 314 apply( do_partial_load_case, i, True/*aligned*/ ); 315 316 for (i = 0; i < VECTOR_BYTES; i++) 317 apply( do_partial_load_case, i, False/*not aligned*/ ); 318 319 return 0; 320 } 321