Home | History | Annotate | Download | only in test
      1 /*
      2  * Microbenchmark for math functions.
      3  *
      4  * Copyright (c) 2018, Arm Limited.
      5  * SPDX-License-Identifier: MIT
      6  */
      7 
      8 #undef _GNU_SOURCE
      9 #define _GNU_SOURCE 1
     10 #include <stdint.h>
     11 #include <stdlib.h>
     12 #include <stdio.h>
     13 #include <string.h>
     14 #include <time.h>
     15 #include <math.h>
     16 #include "mathlib.h"
     17 
     18 /* Number of measurements, best result is reported.  */
     19 #define MEASURE 60
     20 /* Array size.  */
     21 #define N 8000
     22 /* Iterations over the array.  */
     23 #define ITER 125
     24 
     25 static double *Trace;
     26 static size_t trace_size;
     27 static double A[N];
     28 static float Af[N];
     29 static long measurecount = MEASURE;
     30 static long itercount = ITER;
     31 
     32 static double
     33 dummy (double x)
     34 {
     35   return x;
     36 }
     37 
     38 static float
     39 dummyf (float x)
     40 {
     41   return x;
     42 }
     43 
     44 static double
     45 xypow (double x)
     46 {
     47   return pow (x, x);
     48 }
     49 
     50 static float
     51 xypowf (float x)
     52 {
     53   return powf (x, x);
     54 }
     55 
     56 static double
     57 xpow (double x)
     58 {
     59   return pow (x, 23.4);
     60 }
     61 
     62 static float
     63 xpowf (float x)
     64 {
     65   return powf (x, 23.4f);
     66 }
     67 
     68 static double
     69 ypow (double x)
     70 {
     71   return pow (2.34, x);
     72 }
     73 
     74 static float
     75 ypowf (float x)
     76 {
     77   return powf (2.34f, x);
     78 }
     79 
     80 static float
     81 sincosf_wrap (float x)
     82 {
     83   float s, c;
     84   sincosf (x, &s, &c);
     85   return s + c;
     86 }
     87 
     88 static const struct fun
     89 {
     90   const char *name;
     91   int prec;
     92   double lo;
     93   double hi;
     94   union
     95   {
     96     double (*d) (double);
     97     float (*f) (float);
     98   } fun;
     99 } funtab[] = {
    100 #define D(func, lo, hi) {#func, 'd', lo, hi, {.d = func}},
    101 #define F(func, lo, hi) {#func, 'f', lo, hi, {.f = func}},
    102 D (dummy, 1.0, 2.0)
    103 D (exp, -9.9, 9.9)
    104 D (exp, 0.5, 1.0)
    105 D (exp2, -9.9, 9.9)
    106 D (log, 0.01, 11.1)
    107 D (log, 0.999, 1.001)
    108 D (log2, 0.01, 11.1)
    109 D (log2, 0.999, 1.001)
    110 {"pow", 'd', 0.01, 11.1, {.d = xypow}},
    111 D (xpow, 0.01, 11.1)
    112 D (ypow, -9.9, 9.9)
    113 
    114 F (dummyf, 1.0, 2.0)
    115 F (expf, -9.9, 9.9)
    116 F (exp2f, -9.9, 9.9)
    117 F (logf, 0.01, 11.1)
    118 F (log2f, 0.01, 11.1)
    119 {"powf", 'f', 0.01, 11.1, {.f = xypowf}},
    120 F (xpowf, 0.01, 11.1)
    121 F (ypowf, -9.9, 9.9)
    122 {"sincosf", 'f', 0.1, 0.7, {.f = sincosf_wrap}},
    123 {"sincosf", 'f', 0.8, 3.1, {.f = sincosf_wrap}},
    124 {"sincosf", 'f', -3.1, 3.1, {.f = sincosf_wrap}},
    125 {"sincosf", 'f', 3.3, 33.3, {.f = sincosf_wrap}},
    126 {"sincosf", 'f', 100, 1000, {.f = sincosf_wrap}},
    127 {"sincosf", 'f', 1e6, 1e32, {.f = sincosf_wrap}},
    128 F (sinf, 0.1, 0.7)
    129 F (sinf, 0.8, 3.1)
    130 F (sinf, -3.1, 3.1)
    131 F (sinf, 3.3, 33.3)
    132 F (sinf, 100, 1000)
    133 F (sinf, 1e6, 1e32)
    134 F (cosf, 0.1, 0.7)
    135 F (cosf, 0.8, 3.1)
    136 F (cosf, -3.1, 3.1)
    137 F (cosf, 3.3, 33.3)
    138 F (cosf, 100, 1000)
    139 F (cosf, 1e6, 1e32)
    140 {0},
    141 #undef F
    142 #undef D
    143 };
    144 
    145 static void
    146 gen_linear (double lo, double hi)
    147 {
    148   for (int i = 0; i < N; i++)
    149     A[i] = (lo * (N - i) + hi * i) / N;
    150 }
    151 
    152 static void
    153 genf_linear (double lo, double hi)
    154 {
    155   for (int i = 0; i < N; i++)
    156     Af[i] = (float)(lo * (N - i) + hi * i) / N;
    157 }
    158 
    159 static inline double
    160 asdouble (uint64_t i)
    161 {
    162   union
    163   {
    164     uint64_t i;
    165     double f;
    166   } u = {i};
    167   return u.f;
    168 }
    169 
    170 static uint64_t seed = 0x0123456789abcdef;
    171 
    172 static double
    173 frand (double lo, double hi)
    174 {
    175   seed = 6364136223846793005ULL * seed + 1;
    176   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
    177 }
    178 
    179 static void
    180 gen_rand (double lo, double hi)
    181 {
    182   for (int i = 0; i < N; i++)
    183     A[i] = frand (lo, hi);
    184 }
    185 
    186 static void
    187 genf_rand (double lo, double hi)
    188 {
    189   for (int i = 0; i < N; i++)
    190     Af[i] = (float)frand (lo, hi);
    191 }
    192 
    193 static void
    194 gen_trace (int index)
    195 {
    196   for (int i = 0; i < N; i++)
    197     A[i] = Trace[index + i];
    198 }
    199 
    200 static void
    201 genf_trace (int index)
    202 {
    203   for (int i = 0; i < N; i++)
    204     Af[i] = (float)Trace[index + i];
    205 }
    206 
    207 static void
    208 run_thruput (double f (double))
    209 {
    210   for (int i = 0; i < N; i++)
    211     f (A[i]);
    212 }
    213 
    214 static void
    215 runf_thruput (float f (float))
    216 {
    217   for (int i = 0; i < N; i++)
    218     f (Af[i]);
    219 }
    220 
    221 volatile double zero = 0;
    222 
    223 static void
    224 run_latency (double f (double))
    225 {
    226   double z = zero;
    227   double prev = z;
    228   for (int i = 0; i < N; i++)
    229     prev = f (A[i] + prev * z);
    230 }
    231 
    232 static void
    233 runf_latency (float f (float))
    234 {
    235   float z = (float)zero;
    236   float prev = z;
    237   for (int i = 0; i < N; i++)
    238     prev = f (Af[i] + prev * z);
    239 }
    240 
    241 static uint64_t
    242 tic (void)
    243 {
    244   struct timespec ts;
    245   if (clock_gettime (CLOCK_REALTIME, &ts))
    246     abort ();
    247   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
    248 }
    249 
    250 #define TIMEIT(run, f) do { \
    251   dt = -1; \
    252   run (f); /* Warm up.  */ \
    253   for (int j = 0; j < measurecount; j++) \
    254     { \
    255       uint64_t t0 = tic (); \
    256       for (int i = 0; i < itercount; i++) \
    257 	run (f); \
    258       uint64_t t1 = tic (); \
    259       if (t1 - t0 < dt) \
    260 	dt = t1 - t0; \
    261     } \
    262 } while (0)
    263 
    264 static void
    265 bench1 (const struct fun *f, int type, double lo, double hi)
    266 {
    267   uint64_t dt = 0;
    268   uint64_t ns100;
    269   const char *s = type == 't' ? "rthruput" : "latency";
    270 
    271   if (f->prec == 'd' && type == 't')
    272     TIMEIT (run_thruput, f->fun.d);
    273   else if (f->prec == 'd' && type == 'l')
    274     TIMEIT (run_latency, f->fun.d);
    275   else if (f->prec == 'f' && type == 't')
    276     TIMEIT (runf_thruput, f->fun.f);
    277   else if (f->prec == 'f' && type == 'l')
    278     TIMEIT (runf_latency, f->fun.f);
    279 
    280   ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
    281   printf ("%7s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
    282 	  (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
    283 	  (unsigned long long) dt, lo, hi);
    284   fflush (stdout);
    285 }
    286 
    287 static void
    288 bench (const struct fun *f, double lo, double hi, int type, int gen)
    289 {
    290   if (f->prec == 'd' && gen == 'r')
    291     gen_rand (lo, hi);
    292   else if (f->prec == 'd' && gen == 'l')
    293     gen_linear (lo, hi);
    294   else if (f->prec == 'd' && gen == 't')
    295     gen_trace (0);
    296   else if (f->prec == 'f' && gen == 'r')
    297     genf_rand (lo, hi);
    298   else if (f->prec == 'f' && gen == 'l')
    299     genf_linear (lo, hi);
    300   else if (f->prec == 'f' && gen == 't')
    301     genf_trace (0);
    302 
    303   if (gen == 't')
    304     hi = trace_size / N;
    305 
    306   if (type == 'b' || type == 't')
    307     bench1 (f, 't', lo, hi);
    308 
    309   if (type == 'b' || type == 'l')
    310     bench1 (f, 'l', lo, hi);
    311 
    312   for (int i = N; i < trace_size; i += N)
    313     {
    314       if (f->prec == 'd')
    315 	gen_trace (i);
    316       else
    317 	genf_trace (i);
    318 
    319       lo = i / N;
    320       if (type == 'b' || type == 't')
    321 	bench1 (f, 't', lo, hi);
    322 
    323       if (type == 'b' || type == 'l')
    324 	bench1 (f, 'l', lo, hi);
    325     }
    326 }
    327 
    328 static void
    329 readtrace (const char *name)
    330 {
    331 	int n = 0;
    332 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
    333 	if (!f)
    334 	  {
    335 	    printf ("openning \"%s\" failed: %m\n", name);
    336 	    exit (1);
    337 	  }
    338 	for (;;)
    339 	  {
    340 	    if (n >= trace_size)
    341 	      {
    342 		trace_size += N;
    343 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
    344 		if (Trace == NULL)
    345 		  {
    346 		    printf ("out of memory\n");
    347 		    exit (1);
    348 		  }
    349 	      }
    350 	    if (fscanf (f, "%lf", Trace + n) != 1)
    351 	      break;
    352 	    n++;
    353 	  }
    354 	if (ferror (f) || n == 0)
    355 	  {
    356 	    printf ("reading \"%s\" failed: %m\n", name);
    357 	    exit (1);
    358 	  }
    359 	fclose (f);
    360 	if (n % N == 0)
    361 	  trace_size = n;
    362 	for (int i = 0; n < trace_size; n++, i++)
    363 	  Trace[n] = Trace[i];
    364 }
    365 
    366 static void
    367 usage (void)
    368 {
    369   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
    370 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
    371 	  "[func2 ..]\n");
    372   printf ("func:\n");
    373   printf ("%7s [run all benchmarks]\n", "all");
    374   for (const struct fun *f = funtab; f->name; f++)
    375     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
    376   exit (1);
    377 }
    378 
    379 int
    380 main (int argc, char *argv[])
    381 {
    382   int usergen = 0, gen = 'r', type = 'b', all = 0;
    383   double lo = 0, hi = 0;
    384   const char *tracefile = "-";
    385 
    386   argv++;
    387   argc--;
    388   for (;;)
    389     {
    390       if (argc <= 0)
    391 	usage ();
    392       if (argv[0][0] != '-')
    393 	break;
    394       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
    395 	{
    396 	  usergen = 1;
    397 	  lo = strtod (argv[1], 0);
    398 	  hi = strtod (argv[2], 0);
    399 	  argv += 3;
    400 	  argc -= 3;
    401 	}
    402       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
    403 	{
    404 	  measurecount = strtol (argv[1], 0, 0);
    405 	  argv += 2;
    406 	  argc -= 2;
    407 	}
    408       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
    409 	{
    410 	  itercount = strtol (argv[1], 0, 0);
    411 	  argv += 2;
    412 	  argc -= 2;
    413 	}
    414       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
    415 	{
    416 	  gen = argv[1][0];
    417 	  if (strchr ("rlt", gen) == 0)
    418 	    usage ();
    419 	  argv += 2;
    420 	  argc -= 2;
    421 	}
    422       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
    423 	{
    424 	  gen = 't';  /* -f implies -g trace.  */
    425 	  tracefile = argv[1];
    426 	  argv += 2;
    427 	  argc -= 2;
    428 	}
    429       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
    430 	{
    431 	  type = argv[1][0];
    432 	  if (strchr ("ltb", type) == 0)
    433 	    usage ();
    434 	  argv += 2;
    435 	  argc -= 2;
    436 	}
    437       else
    438 	usage ();
    439     }
    440   if (gen == 't')
    441     {
    442       readtrace (tracefile);
    443       lo = hi = 0;
    444       usergen = 1;
    445     }
    446   while (argc > 0)
    447     {
    448       int found = 0;
    449       all = strcmp (argv[0], "all") == 0;
    450       for (const struct fun *f = funtab; f->name; f++)
    451 	if (all || strcmp (argv[0], f->name) == 0)
    452 	  {
    453 	    found = 1;
    454 	    if (!usergen)
    455 	      {
    456 		lo = f->lo;
    457 		hi = f->hi;
    458 	      }
    459 	    bench (f, lo, hi, type, gen);
    460 	    if (usergen && !all)
    461 	      break;
    462 	  }
    463       if (!found)
    464 	printf ("unknown function: %s\n", argv[0]);
    465       argv++;
    466       argc--;
    467     }
    468   return 0;
    469 }
    470