Home | History | Annotate | Download | only in memset_mips
      1 #include <stdio.h>
      2 #include <stdlib.h>
      3 #include <assert.h>
      4 #include <cutils/memory.h>
      5 #include <time.h>
      6 
      7 /*
      8  * All systems must implement or emulate the rdhwr instruction to read
      9  * the userlocal register. Systems that emulate also return teh count register
     10  * when accessing register $2 so this should work on most systems
     11  */
     12 #define USE_RDHWR
     13 
     14 #ifdef USE_RDHWR
     15 #define UNITS "cycles"
     16 #define SCALE 2			/* Most CPU's */
     17 static inline uint32_t
     18 get_count(void)
     19 {
     20   uint32_t res;
     21   asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory");
     22   return res;
     23 }
     24 #else
     25 #define UNITS "ns"
     26 #define SCALE 1
     27 static inline uint32_t
     28 get_count(void)
     29 {
     30   struct timespec now;
     31   uint32_t res;
     32   clock_gettime(CLOCK_REALTIME, &now);
     33   res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec);
     34   // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res);
     35   return res;
     36 }
     37 #endif
     38 
     39 uint32_t overhead;
     40 void
     41 measure_overhead(void)
     42 {
     43   int i;
     44   uint32_t start, stop, delta;
     45   for (i = 0; i < 32; i++) {
     46     start = get_count();
     47     stop = get_count();
     48     delta = stop - start;
     49     if (overhead == 0 || delta < overhead)
     50       overhead = delta;
     51   }
     52   printf("overhead is %d"UNITS"\n", overhead);
     53 }
     54 
     55 uint32_t
     56 timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes)
     57 {
     58   uint32_t start, stop, delta;
     59   start = get_count();
     60   (*fn)(d, val, bytes);
     61   stop = get_count();
     62   delta = stop - start - overhead;
     63   // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta);
     64   return delta * SCALE;
     65 }
     66 
     67 /* define VERIFY to check that memset only touches the bytes it's supposed to */
     68 /*#define VERIFY*/
     69 
     70 /*
     71  * Using a big arena means that memset will most likely miss in the cache
     72  * NB Enabling verification effectively warms up the cache...
     73  */
     74 #define ARENASIZE 0x1000000
     75 #ifdef VERIFY
     76 char arena[ARENASIZE+8];	/* Allow space for guard words */
     77 #else
     78 char arena[ARENASIZE];
     79 #endif
     80 
     81 void
     82 testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold)
     83 {
     84   int offset;
     85   void *d;
     86   void *p;
     87   uint32_t v, notv = 0;
     88   uint32_t n;
     89   int i, units;
     90   int totalunits = 0, totalbytes = 0, samples = 0;
     91 
     92   /* Reset RNG to ensure each test uses same random values */
     93   srand(0);			/* FIXME should be able to use some other seed than 0 */
     94 
     95   for (i = 0; i < trials; i++) {
     96     n = minbytes + (rand() % (maxbytes-minbytes));	/* How many bytes to do */
     97     offset = ((rand() % (ARENASIZE-n)));		/* Where to start */
     98 
     99 #ifdef VERIFY
    100     offset += 4;		/* Allow space for guard word at beginning */
    101 #endif
    102     v = rand();
    103 
    104     /* Adjust alignment and sizes based on transfer size */
    105     switch (size) {
    106     case 1:
    107       v &= 0xff;
    108       notv = ~v & 0xff;
    109       break;
    110     case 2:
    111       v &= 0xffff;
    112       notv = ~v & 0xffff;
    113       offset &= ~1;
    114       n &= ~1;
    115       break;
    116     case 4:
    117       notv = ~v;
    118       offset &= ~3;
    119       n &= ~3;
    120       break;
    121     }
    122 
    123     d = &arena[offset];
    124 
    125 #ifdef VERIFY
    126     /* Initialise the area and guard words */
    127     for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
    128       if (size == 1)
    129 	*(uint8_t *)p = notv;
    130       else if (size == 2)
    131 	*(uint16_t *)p = notv;
    132       else if (size == 4)
    133 	*(uint32_t *)p = notv;
    134     }
    135 #endif
    136     units = timeone(fn, d, v, n);
    137 #ifdef VERIFY
    138     /* Check the area and guard words */
    139     for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
    140       uint32_t got = 0;
    141       if (size == 1)
    142 	got = *(uint8_t *)p;
    143       else if (size == 2)
    144 	got = *(uint16_t *)p;
    145       else if (size == 4)
    146 	got = *(uint32_t *)p;
    147       if (p < (void *)&arena[offset]) {
    148 	if (got != notv)
    149 	  printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n);
    150       }
    151       else if (p < (void *)&arena[offset+n]) {
    152 	if (got != v)
    153 	  printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
    154       }
    155       else {
    156 	if (got != notv)
    157 	  printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
    158       }
    159     }
    160 #endif
    161 
    162     /* If the cycle count looks reasonable include it in the statistics */
    163     if (units < threshold) {
    164       totalbytes += n;
    165       totalunits += units;
    166       samples++;
    167     }
    168   }
    169 
    170   printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n",
    171 	 tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits);
    172 }
    173 
    174 extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size);
    175 extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size);
    176 extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size);
    177 extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size);
    178 extern void memset_cmips(void* dst, int value, size_t size);
    179 extern void memset_omips(void* dst, int value, size_t size);
    180 
    181 int
    182 main(int argc, char **argv)
    183 {
    184   int i;
    185   struct {
    186     char *type;
    187     int trials;
    188     int minbytes, maxbytes;
    189   } *pp, params[] = {
    190     {"small",  10000,   0,   64},
    191     {"medium", 10000,  64,  512},
    192     {"large",  10000, 512, 1280},
    193     {"varied", 10000,   0, 1280},
    194   };
    195 #define NPARAMS (sizeof(params)/sizeof(params[0]))
    196   struct {
    197     char *name;
    198     void (*fn)();
    199     int size;
    200   } *fp, functions[] = {
    201     {"dmemset16", (void (*)())android_memset16_dumb, 2},
    202     {"tmemset16", (void (*)())android_memset16_test, 2},
    203     {"lmemset16", (void (*)())android_memset16,      2},
    204 
    205     {"dmemset32", (void (*)())android_memset32_dumb, 4},
    206     {"tmemset32", (void (*)())android_memset32_test, 4},
    207     {"lmemset32", (void (*)())android_memset32,      4},
    208 
    209     {"cmemset",    (void (*)())memset_cmips,         1},
    210     {"omemset",    (void (*)())memset_omips,         1},
    211     {"lmemset",    (void (*)())memset,               1},
    212   };
    213 #define NFUNCTIONS (sizeof(functions)/sizeof(functions[0]))
    214   char tag[40];
    215   int threshold;
    216 
    217   measure_overhead();
    218 
    219   /* Warm up the page cache */
    220   memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */
    221 
    222   for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) {
    223     (fp->fn)(arena, 0xffffffff, ARENASIZE);	/* one call to get the code into Icache */
    224     for (pp = params; pp < &params[NPARAMS]; pp++) {
    225       sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes);
    226 
    227       /* Set the cycle threshold */
    228       threshold = pp->maxbytes * 4 * 10;	/* reasonable for cycles and ns */
    229       testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold);
    230     }
    231     printf ("\n");
    232   }
    233 
    234   return 0;
    235 }
    236