Home | History | Annotate | Download | only in micro_bench
      1 /*
      2 ** Copyright 2010 The Android Open Source Project
      3 **
      4 ** Licensed under the Apache License, Version 2.0 (the "License");
      5 ** you may not use this file except in compliance with the License.
      6 ** You may obtain a copy of the License at
      7 **
      8 **     http://www.apache.org/licenses/LICENSE-2.0
      9 **
     10 ** Unless required by applicable law or agreed to in writing, software
     11 ** distributed under the License is distributed on an "AS IS" BASIS,
     12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ** See the License for the specific language governing permissions and
     14 ** limitations under the License.
     15 */
     16 
     17 /*
     18  * Micro-benchmarking of sleep/cpu speed/memcpy/memset/memory reads.
     19  */
     20 
     21 #include <stdio.h>
     22 #include <stdlib.h>
     23 #include <ctype.h>
     24 #include <math.h>
     25 #include <sched.h>
     26 #include <sys/resource.h>
     27 #include <time.h>
     28 #include <unistd.h>
     29 
     30 // The default size of data that will be manipulated in each iteration of
     31 // a memory benchmark. Can be modified with the --data_size option.
     32 #define DEFAULT_DATA_SIZE       1000000000
     33 
     34 // Number of nanoseconds in a second.
     35 #define NS_PER_SEC              1000000000
     36 
     37 // The maximum number of arguments that a benchmark will accept.
     38 #define MAX_ARGS    2
     39 
     40 // Use macros to compute values to try and avoid disturbing memory as much
     41 // as possible after each iteration.
     42 #define COMPUTE_AVERAGE_KB(avg_kb, bytes, time_ns) \
     43         avg_kb = ((bytes) / 1024.0) / ((double)(time_ns) / NS_PER_SEC);
     44 
     45 #define COMPUTE_RUNNING(avg, running_avg, square_avg, cur_idx) \
     46     running_avg = ((running_avg) / ((cur_idx) + 1)) * (cur_idx) + (avg) / ((cur_idx) + 1); \
     47     square_avg = ((square_avg) / ((cur_idx) + 1)) * (cur_idx) + ((avg) / ((cur_idx) + 1)) * (avg);
     48 
     49 #define GET_STD_DEV(running_avg, square_avg) \
     50     sqrt((square_avg) - (running_avg) * (running_avg))
     51 
     52 // Contains information about benchmark options.
     53 typedef struct {
     54     bool print_average;
     55     bool print_each_iter;
     56 
     57     int dst_align;
     58     int src_align;
     59 
     60     int cpu_to_lock;
     61 
     62     int data_size;
     63 
     64     int args[MAX_ARGS];
     65     int num_args;
     66 } command_data_t;
     67 
     68 // Struct that contains a mapping of benchmark name to benchmark function.
     69 typedef struct {
     70     const char *name;
     71     int (*ptr)(const command_data_t &cmd_data);
     72 } function_t;
     73 
     74 // Get the current time in nanoseconds.
     75 uint64_t nanoTime() {
     76   struct timespec t;
     77 
     78   t.tv_sec = t.tv_nsec = 0;
     79   clock_gettime(CLOCK_MONOTONIC, &t);
     80   return static_cast<uint64_t>(t.tv_sec) * NS_PER_SEC + t.tv_nsec;
     81 }
     82 
     83 // Allocate memory with a specific alignment and return that pointer.
     84 // This function assumes an alignment value that is a power of 2.
     85 // If the alignment is 0, then use the pointer returned by malloc.
     86 uint8_t *allocateAlignedMemory(size_t size, int alignment) {
     87   uint64_t ptr = reinterpret_cast<uint64_t>(malloc(size + 2 * alignment));
     88   if (!ptr)
     89       return NULL;
     90   if (alignment > 0) {
     91       // When setting the alignment, set it to exactly the alignment chosen.
     92       // The pointer returned will be guaranteed not to be aligned to anything
     93       // more than that.
     94       ptr += alignment - (ptr & (alignment - 1));
     95       ptr |= alignment;
     96   }
     97 
     98   return reinterpret_cast<uint8_t*>(ptr);
     99 }
    100 
    101 int benchmarkSleep(const command_data_t &cmd_data) {
    102     uint64_t time_ns;
    103 
    104     int delay = cmd_data.args[0];
    105     int iters = cmd_data.args[1];
    106     bool print_each_iter = cmd_data.print_each_iter;
    107     bool print_average = cmd_data.print_average;
    108     double avg, running_avg = 0.0, square_avg = 0.0;
    109     for (int i = 0; iters == -1 || i < iters; i++) {
    110         time_ns = nanoTime();
    111         sleep(delay);
    112         time_ns = nanoTime() - time_ns;
    113 
    114         avg = (double)time_ns / NS_PER_SEC;
    115 
    116         if (print_average) {
    117             COMPUTE_RUNNING(avg, running_avg, square_avg, i);
    118         }
    119 
    120         if (print_each_iter) {
    121             printf("sleep(%d) took %.06f seconds\n", delay, avg);
    122         }
    123     }
    124 
    125     if (print_average) {
    126         printf("  sleep(%d) average %.06f seconds std dev %f\n", delay,
    127                running_avg, GET_STD_DEV(running_avg, square_avg));
    128     }
    129 
    130     return 0;
    131 }
    132 
    133 int benchmarkCpu(const command_data_t &cmd_data) {
    134     // Use volatile so that the loop is not optimized away by the compiler.
    135     volatile int cpu_foo;
    136 
    137     uint64_t time_ns;
    138     int iters = cmd_data.args[1];
    139     bool print_each_iter = cmd_data.print_each_iter;
    140     bool print_average = cmd_data.print_average;
    141     double avg, running_avg = 0.0, square_avg = 0.0;
    142     for (int i = 0; iters == -1 || i < iters; i++) {
    143         time_ns = nanoTime();
    144         for (cpu_foo = 0; cpu_foo < 100000000; cpu_foo++);
    145         time_ns = nanoTime() - time_ns;
    146 
    147         avg = (double)time_ns / NS_PER_SEC;
    148 
    149         if (print_average) {
    150             COMPUTE_RUNNING(avg, running_avg, square_avg, i);
    151         }
    152 
    153         if (print_each_iter) {
    154             printf("cpu took %.06f seconds\n", avg);
    155         }
    156     }
    157 
    158     if (print_average) {
    159         printf("  cpu average %.06f seconds std dev %f\n",
    160                running_avg, GET_STD_DEV(running_avg, square_avg));
    161     }
    162 
    163     return 0;
    164 }
    165 
    166 int benchmarkMemset(const command_data_t &cmd_data) {
    167     int size = cmd_data.args[0];
    168     int iters = cmd_data.args[1];
    169 
    170     uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align);
    171     if (!dst)
    172         return -1;
    173 
    174     double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
    175     uint64_t time_ns;
    176     int j;
    177     bool print_average = cmd_data.print_average;
    178     bool print_each_iter = cmd_data.print_each_iter;
    179     int copies = cmd_data.data_size/size;
    180     for (int i = 0; iters == -1 || i < iters; i++) {
    181         time_ns = nanoTime();
    182         for (j = 0; j < copies; j++)
    183             memset(dst, 0, size);
    184         time_ns = nanoTime() - time_ns;
    185 
    186         // Compute in kb to avoid any overflows.
    187         COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns);
    188 
    189         if (print_average) {
    190             COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
    191         }
    192 
    193         if (print_each_iter) {
    194             printf("memset %dx%d bytes took %.06f seconds (%f MB/s)\n",
    195                    copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
    196         }
    197     }
    198 
    199     if (print_average) {
    200         printf("  memset %dx%d bytes average %.2f MB/s std dev %.4f\n",
    201                copies, size, running_avg_kb / 1024.0,
    202                GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
    203     }
    204     return 0;
    205 }
    206 
    207 int benchmarkMemcpy(const command_data_t &cmd_data) {
    208     int size = cmd_data.args[0];
    209     int iters = cmd_data.args[1];
    210 
    211     uint8_t *src = allocateAlignedMemory(size, cmd_data.src_align);
    212     if (!src)
    213         return -1;
    214     uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align);
    215     if (!dst)
    216         return -1;
    217 
    218     uint64_t time_ns;
    219     double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
    220     int j;
    221     bool print_average = cmd_data.print_average;
    222     bool print_each_iter = cmd_data.print_each_iter;
    223     int copies = cmd_data.data_size / size;
    224     for (int i = 0; iters == -1 || i < iters; i++) {
    225         time_ns = nanoTime();
    226         for (j = 0; j < copies; j++)
    227             memcpy(dst, src, size);
    228         time_ns = nanoTime() - time_ns;
    229 
    230         // Compute in kb to avoid any overflows.
    231         COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns);
    232 
    233         if (print_average) {
    234             COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
    235         }
    236 
    237         if (print_each_iter) {
    238             printf("memcpy %dx%d bytes took %.06f seconds (%f MB/s)\n",
    239                    copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
    240         }
    241     }
    242     if (print_average) {
    243         printf("  memcpy %dx%d bytes average %.2f MB/s std dev %.4f\n",
    244                copies, size, running_avg_kb/1024.0,
    245                GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
    246     }
    247     return 0;
    248 }
    249 
    250 int benchmarkMemread(const command_data_t &cmd_data) {
    251     int size = cmd_data.args[0];
    252     int iters = cmd_data.args[1];
    253 
    254     int *src = reinterpret_cast<int*>(malloc(size));
    255     if (!src)
    256         return -1;
    257 
    258     // Use volatile so the compiler does not optimize away the reads.
    259     volatile int foo;
    260     uint64_t time_ns;
    261     int j, k;
    262     double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
    263     bool print_average = cmd_data.print_average;
    264     bool print_each_iter = cmd_data.print_each_iter;
    265     int c = cmd_data.data_size / size;
    266     for (int i = 0; iters == -1 || i < iters; i++) {
    267         time_ns = nanoTime();
    268         for (j = 0; j < c; j++)
    269             for (k = 0; k < size/4; k++)
    270                 foo = src[k];
    271         time_ns = nanoTime() - time_ns;
    272 
    273         // Compute in kb to avoid any overflows.
    274         COMPUTE_AVERAGE_KB(avg_kb, c * size, time_ns);
    275 
    276         if (print_average) {
    277             COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
    278         }
    279 
    280         if (print_each_iter) {
    281             printf("read %dx%d bytes took %.06f seconds (%f MB/s)\n",
    282                    c, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
    283         }
    284     }
    285 
    286     if (print_average) {
    287         printf("  read %dx%d bytes average %.2f MB/s std dev %.4f\n",
    288                c, size, running_avg_kb/1024.0,
    289                GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
    290     }
    291 
    292     return 0;
    293 }
    294 
    295 // Create the mapping structure.
    296 function_t function_table[] = {
    297     { "sleep", benchmarkSleep },
    298     { "cpu", benchmarkCpu },
    299     { "memset", benchmarkMemset },
    300     { "memcpy", benchmarkMemcpy },
    301     { "memread", benchmarkMemread },
    302     { NULL, NULL }
    303 };
    304 
    305 void usage() {
    306     printf("Usage:\n");
    307     printf("  micro_bench [--data_size DATA_BYTES] [--print_average]\n");
    308     printf("              [--no_print_each_iter] [--lock_to_cpu CORE]\n");
    309     printf("    --data_size DATA_BYTES\n");
    310     printf("      For the data benchmarks (memcpy/memset/memread) the approximate\n");
    311     printf("      size of data, in bytes, that will be manipulated in each iteration.\n");
    312     printf("    --print_average\n");
    313     printf("      Print the average and standard deviation of all iterations.\n");
    314     printf("    --no_print_each_iter\n");
    315     printf("      Do not print any values in each iteration.\n");
    316     printf("    --lock_to_cpu CORE\n");
    317     printf("      Lock to the specified CORE. The default is to use the last core found.\n");
    318     printf("    ITERS\n");
    319     printf("      The number of iterations to execute each benchmark. If not\n");
    320     printf("      passed in then run forever.\n");
    321     printf("  micro_bench sleep TIME_TO_SLEEP [ITERS]\n");
    322     printf("    TIME_TO_SLEEP\n");
    323     printf("      The time in seconds to sleep.\n");
    324     printf("  micro_bench cpu UNUSED [ITERS]\n");
    325     printf("  micro_bench [--dst_align ALIGN] memset NUM_BYTES [ITERS]\n");
    326     printf("    --dst_align ALIGN\n");
    327     printf("      Align the memset destination pointer to ALIGN. The default is to use the\n");
    328     printf("      value returned by malloc.\n");
    329     printf("  micro_bench [--src_align ALIGN] [--dst_align ALIGN] memcpy NUM_BYTES [ITERS]\n");
    330     printf("    --src_align ALIGN\n");
    331     printf("      Align the memcpy source pointer to ALIGN. The default is to use the\n");
    332     printf("      value returned by malloc.\n");
    333     printf("    --dst_align ALIGN\n");
    334     printf("      Align the memcpy destination pointer to ALIGN. The default is to use the\n");
    335     printf("      value returned by malloc.\n");
    336     printf("  micro_bench memread NUM_BYTES [ITERS]\n");
    337 }
    338 
    339 function_t *processOptions(int argc, char **argv, command_data_t *cmd_data) {
    340     function_t *command = NULL;
    341 
    342     // Initialize the command_flags.
    343     cmd_data->print_average = false;
    344     cmd_data->print_each_iter = true;
    345     cmd_data->dst_align = 0;
    346     cmd_data->src_align = 0;
    347     cmd_data->num_args = 0;
    348     cmd_data->cpu_to_lock = -1;
    349     cmd_data->data_size = DEFAULT_DATA_SIZE;
    350     for (int i = 0; i < MAX_ARGS; i++) {
    351         cmd_data->args[i] = -1;
    352     }
    353 
    354     for (int i = 1; i < argc; i++) {
    355         if (argv[i][0] == '-') {
    356             int *save_value = NULL;
    357             if (strcmp(argv[i], "--print_average") == 0) {
    358               cmd_data->print_average = true;
    359             } else if (strcmp(argv[i], "--no_print_each_iter") == 0) {
    360               cmd_data->print_each_iter = false;
    361             } else if (strcmp(argv[i], "--dst_align") == 0) {
    362               save_value = &cmd_data->dst_align;
    363             } else if (strcmp(argv[i], "--src_align") == 0) {
    364               save_value = &cmd_data->src_align;
    365             } else if (strcmp(argv[i], "--lock_to_cpu") == 0) {
    366               save_value = &cmd_data->cpu_to_lock;
    367             } else if (strcmp(argv[i], "--data_size") == 0) {
    368               save_value = &cmd_data->data_size;
    369             } else {
    370                 printf("Unknown option %s\n", argv[i]);
    371                 return NULL;
    372             }
    373             if (save_value) {
    374                 // Checking both characters without a strlen() call should be
    375                 // safe since as long as the argument exists, one character will
    376                 // be present (\0). And if the first character is '-', then
    377                 // there will always be a second character (\0 again).
    378                 if (i == argc - 1 || (argv[i + 1][0] == '-' && !isdigit(argv[i + 1][1]))) {
    379                     printf("The option %s requires one argument.\n",
    380                            argv[i]);
    381                     return NULL;
    382                 }
    383                 *save_value = atoi(argv[++i]);
    384             }
    385         } else if (!command) {
    386             for (function_t *function = function_table; function->name != NULL; function++) {
    387                 if (strcmp(argv[i], function->name) == 0) {
    388                     command = function;
    389                     break;
    390                 }
    391             }
    392             if (!command) {
    393                 printf("Uknown command %s\n", argv[i]);
    394                 return NULL;
    395             }
    396         } else if (cmd_data->num_args > MAX_ARGS) {
    397             printf("More than %d number arguments passed in.\n", MAX_ARGS);
    398             return NULL;
    399         } else {
    400             cmd_data->args[cmd_data->num_args++] = atoi(argv[i]);
    401         }
    402     }
    403 
    404     // Check the arguments passed in make sense.
    405     if (cmd_data->num_args != 1 && cmd_data->num_args != 2) {
    406         printf("Not enough arguments passed in.\n");
    407         return NULL;
    408     } else if (cmd_data->dst_align < 0) {
    409         printf("The --dst_align option must be greater than or equal to 0.\n");
    410         return NULL;
    411     } else if (cmd_data->src_align < 0) {
    412         printf("The --src_align option must be greater than or equal to 0.\n");
    413         return NULL;
    414     } else if (cmd_data->data_size <= 0) {
    415         printf("The --data_size option must be a positive number.\n");
    416         return NULL;
    417     } else if ((cmd_data->dst_align & (cmd_data->dst_align - 1))) {
    418         printf("The --dst_align option must be a power of 2.\n");
    419         return NULL;
    420     } else if ((cmd_data->src_align & (cmd_data->src_align - 1))) {
    421         printf("The --src_align option must be a power of 2.\n");
    422         return NULL;
    423     }
    424 
    425     return command;
    426 }
    427 
    428 bool raisePriorityAndLock(int cpu_to_lock) {
    429     cpu_set_t cpuset;
    430 
    431     if (setpriority(PRIO_PROCESS, 0, -20)) {
    432         perror("Unable to raise priority of process.\n");
    433         return false;
    434     }
    435 
    436     CPU_ZERO(&cpuset);
    437     if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
    438         perror("sched_getaffinity failed");
    439         return false;
    440     }
    441 
    442     if (cpu_to_lock < 0) {
    443         // Lock to the last active core we find.
    444         for (int i = 0; i < CPU_SETSIZE; i++) {
    445             if (CPU_ISSET(i, &cpuset)) {
    446                 cpu_to_lock = i;
    447             }
    448         }
    449     } else if (!CPU_ISSET(cpu_to_lock, &cpuset)) {
    450         printf("Cpu %d does not exist.\n", cpu_to_lock);
    451         return false;
    452     }
    453 
    454     if (cpu_to_lock < 0) {
    455         printf("Cannot find any valid cpu to lock.\n");
    456         return false;
    457     }
    458 
    459     CPU_ZERO(&cpuset);
    460     CPU_SET(cpu_to_lock, &cpuset);
    461     if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
    462         perror("sched_setaffinity failed");
    463         return false;
    464     }
    465 
    466     return true;
    467 }
    468 
    469 int main(int argc, char **argv) {
    470     command_data_t cmd_data;
    471 
    472     function_t *command = processOptions(argc, argv, &cmd_data);
    473     if (!command) {
    474       usage();
    475       return -1;
    476     }
    477 
    478     if (!raisePriorityAndLock(cmd_data.cpu_to_lock)) {
    479       return -1;
    480     }
    481 
    482     printf("%s\n", command->name);
    483     return (*command->ptr)(cmd_data);
    484 }
    485