1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <assert.h> 4 #include <cutils/memory.h> 5 #include <time.h> 6 7 /* 8 * All systems must implement or emulate the rdhwr instruction to read 9 * the userlocal register. Systems that emulate also return teh count register 10 * when accessing register $2 so this should work on most systems 11 */ 12 #define USE_RDHWR 13 14 #ifdef USE_RDHWR 15 #define UNITS "cycles" 16 #define SCALE 2 /* Most CPU's */ 17 static inline uint32_t 18 get_count(void) 19 { 20 uint32_t res; 21 asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory"); 22 return res; 23 } 24 #else 25 #define UNITS "ns" 26 #define SCALE 1 27 static inline uint32_t 28 get_count(void) 29 { 30 struct timespec now; 31 uint32_t res; 32 clock_gettime(CLOCK_REALTIME, &now); 33 res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec); 34 // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res); 35 return res; 36 } 37 #endif 38 39 uint32_t overhead; 40 void 41 measure_overhead(void) 42 { 43 int i; 44 uint32_t start, stop, delta; 45 for (i = 0; i < 32; i++) { 46 start = get_count(); 47 stop = get_count(); 48 delta = stop - start; 49 if (overhead == 0 || delta < overhead) 50 overhead = delta; 51 } 52 printf("overhead is %d"UNITS"\n", overhead); 53 } 54 55 uint32_t 56 timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes) 57 { 58 uint32_t start, stop, delta; 59 start = get_count(); 60 (*fn)(d, val, bytes); 61 stop = get_count(); 62 delta = stop - start - overhead; 63 // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta); 64 return delta * SCALE; 65 } 66 67 /* define VERIFY to check that memset only touches the bytes it's supposed to */ 68 /*#define VERIFY*/ 69 70 /* 71 * Using a big arena means that memset will most likely miss in the cache 72 * NB Enabling verification effectively warms up the cache... 73 */ 74 #define ARENASIZE 0x1000000 75 #ifdef VERIFY 76 char arena[ARENASIZE+8]; /* Allow space for guard words */ 77 #else 78 char arena[ARENASIZE]; 79 #endif 80 81 void 82 testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold) 83 { 84 int offset; 85 void *d; 86 void *p; 87 uint32_t v, notv = 0; 88 uint32_t n; 89 int i, units; 90 int totalunits = 0, totalbytes = 0, samples = 0; 91 92 /* Reset RNG to ensure each test uses same random values */ 93 srand(0); /* FIXME should be able to use some other seed than 0 */ 94 95 for (i = 0; i < trials; i++) { 96 n = minbytes + (rand() % (maxbytes-minbytes)); /* How many bytes to do */ 97 offset = ((rand() % (ARENASIZE-n))); /* Where to start */ 98 99 #ifdef VERIFY 100 offset += 4; /* Allow space for guard word at beginning */ 101 #endif 102 v = rand(); 103 104 /* Adjust alignment and sizes based on transfer size */ 105 switch (size) { 106 case 1: 107 v &= 0xff; 108 notv = ~v & 0xff; 109 break; 110 case 2: 111 v &= 0xffff; 112 notv = ~v & 0xffff; 113 offset &= ~1; 114 n &= ~1; 115 break; 116 case 4: 117 notv = ~v; 118 offset &= ~3; 119 n &= ~3; 120 break; 121 } 122 123 d = &arena[offset]; 124 125 #ifdef VERIFY 126 /* Initialise the area and guard words */ 127 for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) { 128 if (size == 1) 129 *(uint8_t *)p = notv; 130 else if (size == 2) 131 *(uint16_t *)p = notv; 132 else if (size == 4) 133 *(uint32_t *)p = notv; 134 } 135 #endif 136 units = timeone(fn, d, v, n); 137 #ifdef VERIFY 138 /* Check the area and guard words */ 139 for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) { 140 uint32_t got = 0; 141 if (size == 1) 142 got = *(uint8_t *)p; 143 else if (size == 2) 144 got = *(uint16_t *)p; 145 else if (size == 4) 146 got = *(uint32_t *)p; 147 if (p < (void *)&arena[offset]) { 148 if (got != notv) 149 printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n); 150 } 151 else if (p < (void *)&arena[offset+n]) { 152 if (got != v) 153 printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n); 154 } 155 else { 156 if (got != notv) 157 printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n); 158 } 159 } 160 #endif 161 162 /* If the cycle count looks reasonable include it in the statistics */ 163 if (units < threshold) { 164 totalbytes += n; 165 totalunits += units; 166 samples++; 167 } 168 } 169 170 printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n", 171 tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits); 172 } 173 174 extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size); 175 extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size); 176 extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size); 177 extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size); 178 extern void memset_cmips(void* dst, int value, size_t size); 179 extern void memset_omips(void* dst, int value, size_t size); 180 181 int 182 main(int argc, char **argv) 183 { 184 int i; 185 struct { 186 char *type; 187 int trials; 188 int minbytes, maxbytes; 189 } *pp, params[] = { 190 {"small", 10000, 0, 64}, 191 {"medium", 10000, 64, 512}, 192 {"large", 10000, 512, 1280}, 193 {"varied", 10000, 0, 1280}, 194 }; 195 #define NPARAMS (sizeof(params)/sizeof(params[0])) 196 struct { 197 char *name; 198 void (*fn)(); 199 int size; 200 } *fp, functions[] = { 201 {"dmemset16", (void (*)())android_memset16_dumb, 2}, 202 {"tmemset16", (void (*)())android_memset16_test, 2}, 203 {"lmemset16", (void (*)())android_memset16, 2}, 204 205 {"dmemset32", (void (*)())android_memset32_dumb, 4}, 206 {"tmemset32", (void (*)())android_memset32_test, 4}, 207 {"lmemset32", (void (*)())android_memset32, 4}, 208 209 {"cmemset", (void (*)())memset_cmips, 1}, 210 {"omemset", (void (*)())memset_omips, 1}, 211 {"lmemset", (void (*)())memset, 1}, 212 }; 213 #define NFUNCTIONS (sizeof(functions)/sizeof(functions[0])) 214 char tag[40]; 215 int threshold; 216 217 measure_overhead(); 218 219 /* Warm up the page cache */ 220 memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */ 221 222 for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) { 223 (fp->fn)(arena, 0xffffffff, ARENASIZE); /* one call to get the code into Icache */ 224 for (pp = params; pp < ¶ms[NPARAMS]; pp++) { 225 sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes); 226 227 /* Set the cycle threshold */ 228 threshold = pp->maxbytes * 4 * 10; /* reasonable for cycles and ns */ 229 testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold); 230 } 231 printf ("\n"); 232 } 233 234 return 0; 235 } 236