1 /* 2 * Microbenchmark for math functions. 3 * 4 * Copyright (c) 2018, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #undef _GNU_SOURCE 9 #define _GNU_SOURCE 1 10 #include <stdint.h> 11 #include <stdlib.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <time.h> 15 #include <math.h> 16 #include "mathlib.h" 17 18 /* Number of measurements, best result is reported. */ 19 #define MEASURE 60 20 /* Array size. */ 21 #define N 8000 22 /* Iterations over the array. */ 23 #define ITER 125 24 25 static double *Trace; 26 static size_t trace_size; 27 static double A[N]; 28 static float Af[N]; 29 static long measurecount = MEASURE; 30 static long itercount = ITER; 31 32 static double 33 dummy (double x) 34 { 35 return x; 36 } 37 38 static float 39 dummyf (float x) 40 { 41 return x; 42 } 43 44 static double 45 xypow (double x) 46 { 47 return pow (x, x); 48 } 49 50 static float 51 xypowf (float x) 52 { 53 return powf (x, x); 54 } 55 56 static double 57 xpow (double x) 58 { 59 return pow (x, 23.4); 60 } 61 62 static float 63 xpowf (float x) 64 { 65 return powf (x, 23.4f); 66 } 67 68 static double 69 ypow (double x) 70 { 71 return pow (2.34, x); 72 } 73 74 static float 75 ypowf (float x) 76 { 77 return powf (2.34f, x); 78 } 79 80 static float 81 sincosf_wrap (float x) 82 { 83 float s, c; 84 sincosf (x, &s, &c); 85 return s + c; 86 } 87 88 static const struct fun 89 { 90 const char *name; 91 int prec; 92 double lo; 93 double hi; 94 union 95 { 96 double (*d) (double); 97 float (*f) (float); 98 } fun; 99 } funtab[] = { 100 #define D(func, lo, hi) {#func, 'd', lo, hi, {.d = func}}, 101 #define F(func, lo, hi) {#func, 'f', lo, hi, {.f = func}}, 102 D (dummy, 1.0, 2.0) 103 D (exp, -9.9, 9.9) 104 D (exp, 0.5, 1.0) 105 D (exp2, -9.9, 9.9) 106 D (log, 0.01, 11.1) 107 D (log, 0.999, 1.001) 108 D (log2, 0.01, 11.1) 109 D (log2, 0.999, 1.001) 110 {"pow", 'd', 0.01, 11.1, {.d = xypow}}, 111 D (xpow, 0.01, 11.1) 112 D (ypow, -9.9, 9.9) 113 114 F (dummyf, 1.0, 2.0) 115 F (expf, -9.9, 9.9) 116 F (exp2f, -9.9, 9.9) 117 F (logf, 0.01, 11.1) 118 F (log2f, 0.01, 11.1) 119 {"powf", 'f', 0.01, 11.1, {.f = xypowf}}, 120 F (xpowf, 0.01, 11.1) 121 F (ypowf, -9.9, 9.9) 122 {"sincosf", 'f', 0.1, 0.7, {.f = sincosf_wrap}}, 123 {"sincosf", 'f', 0.8, 3.1, {.f = sincosf_wrap}}, 124 {"sincosf", 'f', -3.1, 3.1, {.f = sincosf_wrap}}, 125 {"sincosf", 'f', 3.3, 33.3, {.f = sincosf_wrap}}, 126 {"sincosf", 'f', 100, 1000, {.f = sincosf_wrap}}, 127 {"sincosf", 'f', 1e6, 1e32, {.f = sincosf_wrap}}, 128 F (sinf, 0.1, 0.7) 129 F (sinf, 0.8, 3.1) 130 F (sinf, -3.1, 3.1) 131 F (sinf, 3.3, 33.3) 132 F (sinf, 100, 1000) 133 F (sinf, 1e6, 1e32) 134 F (cosf, 0.1, 0.7) 135 F (cosf, 0.8, 3.1) 136 F (cosf, -3.1, 3.1) 137 F (cosf, 3.3, 33.3) 138 F (cosf, 100, 1000) 139 F (cosf, 1e6, 1e32) 140 {0}, 141 #undef F 142 #undef D 143 }; 144 145 static void 146 gen_linear (double lo, double hi) 147 { 148 for (int i = 0; i < N; i++) 149 A[i] = (lo * (N - i) + hi * i) / N; 150 } 151 152 static void 153 genf_linear (double lo, double hi) 154 { 155 for (int i = 0; i < N; i++) 156 Af[i] = (float)(lo * (N - i) + hi * i) / N; 157 } 158 159 static inline double 160 asdouble (uint64_t i) 161 { 162 union 163 { 164 uint64_t i; 165 double f; 166 } u = {i}; 167 return u.f; 168 } 169 170 static uint64_t seed = 0x0123456789abcdef; 171 172 static double 173 frand (double lo, double hi) 174 { 175 seed = 6364136223846793005ULL * seed + 1; 176 return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0); 177 } 178 179 static void 180 gen_rand (double lo, double hi) 181 { 182 for (int i = 0; i < N; i++) 183 A[i] = frand (lo, hi); 184 } 185 186 static void 187 genf_rand (double lo, double hi) 188 { 189 for (int i = 0; i < N; i++) 190 Af[i] = (float)frand (lo, hi); 191 } 192 193 static void 194 gen_trace (int index) 195 { 196 for (int i = 0; i < N; i++) 197 A[i] = Trace[index + i]; 198 } 199 200 static void 201 genf_trace (int index) 202 { 203 for (int i = 0; i < N; i++) 204 Af[i] = (float)Trace[index + i]; 205 } 206 207 static void 208 run_thruput (double f (double)) 209 { 210 for (int i = 0; i < N; i++) 211 f (A[i]); 212 } 213 214 static void 215 runf_thruput (float f (float)) 216 { 217 for (int i = 0; i < N; i++) 218 f (Af[i]); 219 } 220 221 volatile double zero = 0; 222 223 static void 224 run_latency (double f (double)) 225 { 226 double z = zero; 227 double prev = z; 228 for (int i = 0; i < N; i++) 229 prev = f (A[i] + prev * z); 230 } 231 232 static void 233 runf_latency (float f (float)) 234 { 235 float z = (float)zero; 236 float prev = z; 237 for (int i = 0; i < N; i++) 238 prev = f (Af[i] + prev * z); 239 } 240 241 static uint64_t 242 tic (void) 243 { 244 struct timespec ts; 245 if (clock_gettime (CLOCK_REALTIME, &ts)) 246 abort (); 247 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 248 } 249 250 #define TIMEIT(run, f) do { \ 251 dt = -1; \ 252 run (f); /* Warm up. */ \ 253 for (int j = 0; j < measurecount; j++) \ 254 { \ 255 uint64_t t0 = tic (); \ 256 for (int i = 0; i < itercount; i++) \ 257 run (f); \ 258 uint64_t t1 = tic (); \ 259 if (t1 - t0 < dt) \ 260 dt = t1 - t0; \ 261 } \ 262 } while (0) 263 264 static void 265 bench1 (const struct fun *f, int type, double lo, double hi) 266 { 267 uint64_t dt = 0; 268 uint64_t ns100; 269 const char *s = type == 't' ? "rthruput" : "latency"; 270 271 if (f->prec == 'd' && type == 't') 272 TIMEIT (run_thruput, f->fun.d); 273 else if (f->prec == 'd' && type == 'l') 274 TIMEIT (run_latency, f->fun.d); 275 else if (f->prec == 'f' && type == 't') 276 TIMEIT (runf_thruput, f->fun.f); 277 else if (f->prec == 'f' && type == 'l') 278 TIMEIT (runf_latency, f->fun.f); 279 280 ns100 = (100 * dt + itercount * N / 2) / (itercount * N); 281 printf ("%7s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s, 282 (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), 283 (unsigned long long) dt, lo, hi); 284 fflush (stdout); 285 } 286 287 static void 288 bench (const struct fun *f, double lo, double hi, int type, int gen) 289 { 290 if (f->prec == 'd' && gen == 'r') 291 gen_rand (lo, hi); 292 else if (f->prec == 'd' && gen == 'l') 293 gen_linear (lo, hi); 294 else if (f->prec == 'd' && gen == 't') 295 gen_trace (0); 296 else if (f->prec == 'f' && gen == 'r') 297 genf_rand (lo, hi); 298 else if (f->prec == 'f' && gen == 'l') 299 genf_linear (lo, hi); 300 else if (f->prec == 'f' && gen == 't') 301 genf_trace (0); 302 303 if (gen == 't') 304 hi = trace_size / N; 305 306 if (type == 'b' || type == 't') 307 bench1 (f, 't', lo, hi); 308 309 if (type == 'b' || type == 'l') 310 bench1 (f, 'l', lo, hi); 311 312 for (int i = N; i < trace_size; i += N) 313 { 314 if (f->prec == 'd') 315 gen_trace (i); 316 else 317 genf_trace (i); 318 319 lo = i / N; 320 if (type == 'b' || type == 't') 321 bench1 (f, 't', lo, hi); 322 323 if (type == 'b' || type == 'l') 324 bench1 (f, 'l', lo, hi); 325 } 326 } 327 328 static void 329 readtrace (const char *name) 330 { 331 int n = 0; 332 FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r"); 333 if (!f) 334 { 335 printf ("openning \"%s\" failed: %m\n", name); 336 exit (1); 337 } 338 for (;;) 339 { 340 if (n >= trace_size) 341 { 342 trace_size += N; 343 Trace = realloc (Trace, trace_size * sizeof (Trace[0])); 344 if (Trace == NULL) 345 { 346 printf ("out of memory\n"); 347 exit (1); 348 } 349 } 350 if (fscanf (f, "%lf", Trace + n) != 1) 351 break; 352 n++; 353 } 354 if (ferror (f) || n == 0) 355 { 356 printf ("reading \"%s\" failed: %m\n", name); 357 exit (1); 358 } 359 fclose (f); 360 if (n % N == 0) 361 trace_size = n; 362 for (int i = 0; n < trace_size; n++, i++) 363 Trace[n] = Trace[i]; 364 } 365 366 static void 367 usage (void) 368 { 369 printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] " 370 "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func " 371 "[func2 ..]\n"); 372 printf ("func:\n"); 373 printf ("%7s [run all benchmarks]\n", "all"); 374 for (const struct fun *f = funtab; f->name; f++) 375 printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi); 376 exit (1); 377 } 378 379 int 380 main (int argc, char *argv[]) 381 { 382 int usergen = 0, gen = 'r', type = 'b', all = 0; 383 double lo = 0, hi = 0; 384 const char *tracefile = "-"; 385 386 argv++; 387 argc--; 388 for (;;) 389 { 390 if (argc <= 0) 391 usage (); 392 if (argv[0][0] != '-') 393 break; 394 else if (argc >= 3 && strcmp (argv[0], "-i") == 0) 395 { 396 usergen = 1; 397 lo = strtod (argv[1], 0); 398 hi = strtod (argv[2], 0); 399 argv += 3; 400 argc -= 3; 401 } 402 else if (argc >= 2 && strcmp (argv[0], "-m") == 0) 403 { 404 measurecount = strtol (argv[1], 0, 0); 405 argv += 2; 406 argc -= 2; 407 } 408 else if (argc >= 2 && strcmp (argv[0], "-c") == 0) 409 { 410 itercount = strtol (argv[1], 0, 0); 411 argv += 2; 412 argc -= 2; 413 } 414 else if (argc >= 2 && strcmp (argv[0], "-g") == 0) 415 { 416 gen = argv[1][0]; 417 if (strchr ("rlt", gen) == 0) 418 usage (); 419 argv += 2; 420 argc -= 2; 421 } 422 else if (argc >= 2 && strcmp (argv[0], "-f") == 0) 423 { 424 gen = 't'; /* -f implies -g trace. */ 425 tracefile = argv[1]; 426 argv += 2; 427 argc -= 2; 428 } 429 else if (argc >= 2 && strcmp (argv[0], "-t") == 0) 430 { 431 type = argv[1][0]; 432 if (strchr ("ltb", type) == 0) 433 usage (); 434 argv += 2; 435 argc -= 2; 436 } 437 else 438 usage (); 439 } 440 if (gen == 't') 441 { 442 readtrace (tracefile); 443 lo = hi = 0; 444 usergen = 1; 445 } 446 while (argc > 0) 447 { 448 int found = 0; 449 all = strcmp (argv[0], "all") == 0; 450 for (const struct fun *f = funtab; f->name; f++) 451 if (all || strcmp (argv[0], f->name) == 0) 452 { 453 found = 1; 454 if (!usergen) 455 { 456 lo = f->lo; 457 hi = f->hi; 458 } 459 bench (f, lo, hi, type, gen); 460 if (usergen && !all) 461 break; 462 } 463 if (!found) 464 printf ("unknown function: %s\n", argv[0]); 465 argv++; 466 argc--; 467 } 468 return 0; 469 } 470