1 /* 2 ** Copyright 2010 The Android Open Source Project 3 ** 4 ** Licensed under the Apache License, Version 2.0 (the "License"); 5 ** you may not use this file except in compliance with the License. 6 ** You may obtain a copy of the License at 7 ** 8 ** http://www.apache.org/licenses/LICENSE-2.0 9 ** 10 ** Unless required by applicable law or agreed to in writing, software 11 ** distributed under the License is distributed on an "AS IS" BASIS, 12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ** See the License for the specific language governing permissions and 14 ** limitations under the License. 15 */ 16 17 /* 18 * Micro-benchmarking of sleep/cpu speed/memcpy/memset/memory reads. 19 */ 20 21 #include <stdio.h> 22 #include <stdlib.h> 23 #include <ctype.h> 24 #include <math.h> 25 #include <sched.h> 26 #include <sys/resource.h> 27 #include <time.h> 28 #include <unistd.h> 29 30 // The default size of data that will be manipulated in each iteration of 31 // a memory benchmark. Can be modified with the --data_size option. 32 #define DEFAULT_DATA_SIZE 1000000000 33 34 // Number of nanoseconds in a second. 35 #define NS_PER_SEC 1000000000 36 37 // The maximum number of arguments that a benchmark will accept. 38 #define MAX_ARGS 2 39 40 // Use macros to compute values to try and avoid disturbing memory as much 41 // as possible after each iteration. 42 #define COMPUTE_AVERAGE_KB(avg_kb, bytes, time_ns) \ 43 avg_kb = ((bytes) / 1024.0) / ((double)(time_ns) / NS_PER_SEC); 44 45 #define COMPUTE_RUNNING(avg, running_avg, square_avg, cur_idx) \ 46 running_avg = ((running_avg) / ((cur_idx) + 1)) * (cur_idx) + (avg) / ((cur_idx) + 1); \ 47 square_avg = ((square_avg) / ((cur_idx) + 1)) * (cur_idx) + ((avg) / ((cur_idx) + 1)) * (avg); 48 49 #define GET_STD_DEV(running_avg, square_avg) \ 50 sqrt((square_avg) - (running_avg) * (running_avg)) 51 52 // Contains information about benchmark options. 53 typedef struct { 54 bool print_average; 55 bool print_each_iter; 56 57 int dst_align; 58 int src_align; 59 60 int cpu_to_lock; 61 62 int data_size; 63 64 int args[MAX_ARGS]; 65 int num_args; 66 } command_data_t; 67 68 // Struct that contains a mapping of benchmark name to benchmark function. 69 typedef struct { 70 const char *name; 71 int (*ptr)(const command_data_t &cmd_data); 72 } function_t; 73 74 // Get the current time in nanoseconds. 75 uint64_t nanoTime() { 76 struct timespec t; 77 78 t.tv_sec = t.tv_nsec = 0; 79 clock_gettime(CLOCK_MONOTONIC, &t); 80 return static_cast<uint64_t>(t.tv_sec) * NS_PER_SEC + t.tv_nsec; 81 } 82 83 // Allocate memory with a specific alignment and return that pointer. 84 // This function assumes an alignment value that is a power of 2. 85 // If the alignment is 0, then use the pointer returned by malloc. 86 uint8_t *allocateAlignedMemory(size_t size, int alignment) { 87 uint64_t ptr = reinterpret_cast<uint64_t>(malloc(size + 2 * alignment)); 88 if (!ptr) 89 return NULL; 90 if (alignment > 0) { 91 // When setting the alignment, set it to exactly the alignment chosen. 92 // The pointer returned will be guaranteed not to be aligned to anything 93 // more than that. 94 ptr += alignment - (ptr & (alignment - 1)); 95 ptr |= alignment; 96 } 97 98 return reinterpret_cast<uint8_t*>(ptr); 99 } 100 101 int benchmarkSleep(const command_data_t &cmd_data) { 102 uint64_t time_ns; 103 104 int delay = cmd_data.args[0]; 105 int iters = cmd_data.args[1]; 106 bool print_each_iter = cmd_data.print_each_iter; 107 bool print_average = cmd_data.print_average; 108 double avg, running_avg = 0.0, square_avg = 0.0; 109 for (int i = 0; iters == -1 || i < iters; i++) { 110 time_ns = nanoTime(); 111 sleep(delay); 112 time_ns = nanoTime() - time_ns; 113 114 avg = (double)time_ns / NS_PER_SEC; 115 116 if (print_average) { 117 COMPUTE_RUNNING(avg, running_avg, square_avg, i); 118 } 119 120 if (print_each_iter) { 121 printf("sleep(%d) took %.06f seconds\n", delay, avg); 122 } 123 } 124 125 if (print_average) { 126 printf(" sleep(%d) average %.06f seconds std dev %f\n", delay, 127 running_avg, GET_STD_DEV(running_avg, square_avg)); 128 } 129 130 return 0; 131 } 132 133 int benchmarkCpu(const command_data_t &cmd_data) { 134 // Use volatile so that the loop is not optimized away by the compiler. 135 volatile int cpu_foo; 136 137 uint64_t time_ns; 138 int iters = cmd_data.args[1]; 139 bool print_each_iter = cmd_data.print_each_iter; 140 bool print_average = cmd_data.print_average; 141 double avg, running_avg = 0.0, square_avg = 0.0; 142 for (int i = 0; iters == -1 || i < iters; i++) { 143 time_ns = nanoTime(); 144 for (cpu_foo = 0; cpu_foo < 100000000; cpu_foo++); 145 time_ns = nanoTime() - time_ns; 146 147 avg = (double)time_ns / NS_PER_SEC; 148 149 if (print_average) { 150 COMPUTE_RUNNING(avg, running_avg, square_avg, i); 151 } 152 153 if (print_each_iter) { 154 printf("cpu took %.06f seconds\n", avg); 155 } 156 } 157 158 if (print_average) { 159 printf(" cpu average %.06f seconds std dev %f\n", 160 running_avg, GET_STD_DEV(running_avg, square_avg)); 161 } 162 163 return 0; 164 } 165 166 int benchmarkMemset(const command_data_t &cmd_data) { 167 int size = cmd_data.args[0]; 168 int iters = cmd_data.args[1]; 169 170 uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align); 171 if (!dst) 172 return -1; 173 174 double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0; 175 uint64_t time_ns; 176 int j; 177 bool print_average = cmd_data.print_average; 178 bool print_each_iter = cmd_data.print_each_iter; 179 int copies = cmd_data.data_size/size; 180 for (int i = 0; iters == -1 || i < iters; i++) { 181 time_ns = nanoTime(); 182 for (j = 0; j < copies; j++) 183 memset(dst, 0, size); 184 time_ns = nanoTime() - time_ns; 185 186 // Compute in kb to avoid any overflows. 187 COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns); 188 189 if (print_average) { 190 COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i); 191 } 192 193 if (print_each_iter) { 194 printf("memset %dx%d bytes took %.06f seconds (%f MB/s)\n", 195 copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0); 196 } 197 } 198 199 if (print_average) { 200 printf(" memset %dx%d bytes average %.2f MB/s std dev %.4f\n", 201 copies, size, running_avg_kb / 1024.0, 202 GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0); 203 } 204 return 0; 205 } 206 207 int benchmarkMemcpy(const command_data_t &cmd_data) { 208 int size = cmd_data.args[0]; 209 int iters = cmd_data.args[1]; 210 211 uint8_t *src = allocateAlignedMemory(size, cmd_data.src_align); 212 if (!src) 213 return -1; 214 uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align); 215 if (!dst) 216 return -1; 217 218 uint64_t time_ns; 219 double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0; 220 int j; 221 bool print_average = cmd_data.print_average; 222 bool print_each_iter = cmd_data.print_each_iter; 223 int copies = cmd_data.data_size / size; 224 for (int i = 0; iters == -1 || i < iters; i++) { 225 time_ns = nanoTime(); 226 for (j = 0; j < copies; j++) 227 memcpy(dst, src, size); 228 time_ns = nanoTime() - time_ns; 229 230 // Compute in kb to avoid any overflows. 231 COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns); 232 233 if (print_average) { 234 COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i); 235 } 236 237 if (print_each_iter) { 238 printf("memcpy %dx%d bytes took %.06f seconds (%f MB/s)\n", 239 copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0); 240 } 241 } 242 if (print_average) { 243 printf(" memcpy %dx%d bytes average %.2f MB/s std dev %.4f\n", 244 copies, size, running_avg_kb/1024.0, 245 GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0); 246 } 247 return 0; 248 } 249 250 int benchmarkMemread(const command_data_t &cmd_data) { 251 int size = cmd_data.args[0]; 252 int iters = cmd_data.args[1]; 253 254 int *src = reinterpret_cast<int*>(malloc(size)); 255 if (!src) 256 return -1; 257 258 // Use volatile so the compiler does not optimize away the reads. 259 volatile int foo; 260 uint64_t time_ns; 261 int j, k; 262 double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0; 263 bool print_average = cmd_data.print_average; 264 bool print_each_iter = cmd_data.print_each_iter; 265 int c = cmd_data.data_size / size; 266 for (int i = 0; iters == -1 || i < iters; i++) { 267 time_ns = nanoTime(); 268 for (j = 0; j < c; j++) 269 for (k = 0; k < size/4; k++) 270 foo = src[k]; 271 time_ns = nanoTime() - time_ns; 272 273 // Compute in kb to avoid any overflows. 274 COMPUTE_AVERAGE_KB(avg_kb, c * size, time_ns); 275 276 if (print_average) { 277 COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i); 278 } 279 280 if (print_each_iter) { 281 printf("read %dx%d bytes took %.06f seconds (%f MB/s)\n", 282 c, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0); 283 } 284 } 285 286 if (print_average) { 287 printf(" read %dx%d bytes average %.2f MB/s std dev %.4f\n", 288 c, size, running_avg_kb/1024.0, 289 GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0); 290 } 291 292 return 0; 293 } 294 295 // Create the mapping structure. 296 function_t function_table[] = { 297 { "sleep", benchmarkSleep }, 298 { "cpu", benchmarkCpu }, 299 { "memset", benchmarkMemset }, 300 { "memcpy", benchmarkMemcpy }, 301 { "memread", benchmarkMemread }, 302 { NULL, NULL } 303 }; 304 305 void usage() { 306 printf("Usage:\n"); 307 printf(" micro_bench [--data_size DATA_BYTES] [--print_average]\n"); 308 printf(" [--no_print_each_iter] [--lock_to_cpu CORE]\n"); 309 printf(" --data_size DATA_BYTES\n"); 310 printf(" For the data benchmarks (memcpy/memset/memread) the approximate\n"); 311 printf(" size of data, in bytes, that will be manipulated in each iteration.\n"); 312 printf(" --print_average\n"); 313 printf(" Print the average and standard deviation of all iterations.\n"); 314 printf(" --no_print_each_iter\n"); 315 printf(" Do not print any values in each iteration.\n"); 316 printf(" --lock_to_cpu CORE\n"); 317 printf(" Lock to the specified CORE. The default is to use the last core found.\n"); 318 printf(" ITERS\n"); 319 printf(" The number of iterations to execute each benchmark. If not\n"); 320 printf(" passed in then run forever.\n"); 321 printf(" micro_bench sleep TIME_TO_SLEEP [ITERS]\n"); 322 printf(" TIME_TO_SLEEP\n"); 323 printf(" The time in seconds to sleep.\n"); 324 printf(" micro_bench cpu UNUSED [ITERS]\n"); 325 printf(" micro_bench [--dst_align ALIGN] memset NUM_BYTES [ITERS]\n"); 326 printf(" --dst_align ALIGN\n"); 327 printf(" Align the memset destination pointer to ALIGN. The default is to use the\n"); 328 printf(" value returned by malloc.\n"); 329 printf(" micro_bench [--src_align ALIGN] [--dst_align ALIGN] memcpy NUM_BYTES [ITERS]\n"); 330 printf(" --src_align ALIGN\n"); 331 printf(" Align the memcpy source pointer to ALIGN. The default is to use the\n"); 332 printf(" value returned by malloc.\n"); 333 printf(" --dst_align ALIGN\n"); 334 printf(" Align the memcpy destination pointer to ALIGN. The default is to use the\n"); 335 printf(" value returned by malloc.\n"); 336 printf(" micro_bench memread NUM_BYTES [ITERS]\n"); 337 } 338 339 function_t *processOptions(int argc, char **argv, command_data_t *cmd_data) { 340 function_t *command = NULL; 341 342 // Initialize the command_flags. 343 cmd_data->print_average = false; 344 cmd_data->print_each_iter = true; 345 cmd_data->dst_align = 0; 346 cmd_data->src_align = 0; 347 cmd_data->num_args = 0; 348 cmd_data->cpu_to_lock = -1; 349 cmd_data->data_size = DEFAULT_DATA_SIZE; 350 for (int i = 0; i < MAX_ARGS; i++) { 351 cmd_data->args[i] = -1; 352 } 353 354 for (int i = 1; i < argc; i++) { 355 if (argv[i][0] == '-') { 356 int *save_value = NULL; 357 if (strcmp(argv[i], "--print_average") == 0) { 358 cmd_data->print_average = true; 359 } else if (strcmp(argv[i], "--no_print_each_iter") == 0) { 360 cmd_data->print_each_iter = false; 361 } else if (strcmp(argv[i], "--dst_align") == 0) { 362 save_value = &cmd_data->dst_align; 363 } else if (strcmp(argv[i], "--src_align") == 0) { 364 save_value = &cmd_data->src_align; 365 } else if (strcmp(argv[i], "--lock_to_cpu") == 0) { 366 save_value = &cmd_data->cpu_to_lock; 367 } else if (strcmp(argv[i], "--data_size") == 0) { 368 save_value = &cmd_data->data_size; 369 } else { 370 printf("Unknown option %s\n", argv[i]); 371 return NULL; 372 } 373 if (save_value) { 374 // Checking both characters without a strlen() call should be 375 // safe since as long as the argument exists, one character will 376 // be present (\0). And if the first character is '-', then 377 // there will always be a second character (\0 again). 378 if (i == argc - 1 || (argv[i + 1][0] == '-' && !isdigit(argv[i + 1][1]))) { 379 printf("The option %s requires one argument.\n", 380 argv[i]); 381 return NULL; 382 } 383 *save_value = atoi(argv[++i]); 384 } 385 } else if (!command) { 386 for (function_t *function = function_table; function->name != NULL; function++) { 387 if (strcmp(argv[i], function->name) == 0) { 388 command = function; 389 break; 390 } 391 } 392 if (!command) { 393 printf("Uknown command %s\n", argv[i]); 394 return NULL; 395 } 396 } else if (cmd_data->num_args > MAX_ARGS) { 397 printf("More than %d number arguments passed in.\n", MAX_ARGS); 398 return NULL; 399 } else { 400 cmd_data->args[cmd_data->num_args++] = atoi(argv[i]); 401 } 402 } 403 404 // Check the arguments passed in make sense. 405 if (cmd_data->num_args != 1 && cmd_data->num_args != 2) { 406 printf("Not enough arguments passed in.\n"); 407 return NULL; 408 } else if (cmd_data->dst_align < 0) { 409 printf("The --dst_align option must be greater than or equal to 0.\n"); 410 return NULL; 411 } else if (cmd_data->src_align < 0) { 412 printf("The --src_align option must be greater than or equal to 0.\n"); 413 return NULL; 414 } else if (cmd_data->data_size <= 0) { 415 printf("The --data_size option must be a positive number.\n"); 416 return NULL; 417 } else if ((cmd_data->dst_align & (cmd_data->dst_align - 1))) { 418 printf("The --dst_align option must be a power of 2.\n"); 419 return NULL; 420 } else if ((cmd_data->src_align & (cmd_data->src_align - 1))) { 421 printf("The --src_align option must be a power of 2.\n"); 422 return NULL; 423 } 424 425 return command; 426 } 427 428 bool raisePriorityAndLock(int cpu_to_lock) { 429 cpu_set_t cpuset; 430 431 if (setpriority(PRIO_PROCESS, 0, -20)) { 432 perror("Unable to raise priority of process.\n"); 433 return false; 434 } 435 436 CPU_ZERO(&cpuset); 437 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) { 438 perror("sched_getaffinity failed"); 439 return false; 440 } 441 442 if (cpu_to_lock < 0) { 443 // Lock to the last active core we find. 444 for (int i = 0; i < CPU_SETSIZE; i++) { 445 if (CPU_ISSET(i, &cpuset)) { 446 cpu_to_lock = i; 447 } 448 } 449 } else if (!CPU_ISSET(cpu_to_lock, &cpuset)) { 450 printf("Cpu %d does not exist.\n", cpu_to_lock); 451 return false; 452 } 453 454 if (cpu_to_lock < 0) { 455 printf("Cannot find any valid cpu to lock.\n"); 456 return false; 457 } 458 459 CPU_ZERO(&cpuset); 460 CPU_SET(cpu_to_lock, &cpuset); 461 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 462 perror("sched_setaffinity failed"); 463 return false; 464 } 465 466 return true; 467 } 468 469 int main(int argc, char **argv) { 470 command_data_t cmd_data; 471 472 function_t *command = processOptions(argc, argv, &cmd_data); 473 if (!command) { 474 usage(); 475 return -1; 476 } 477 478 if (!raisePriorityAndLock(cmd_data.cpu_to_lock)) { 479 return -1; 480 } 481 482 printf("%s\n", command->name); 483 return (*command->ptr)(cmd_data); 484 } 485