Home | History | Annotate | Download | only in memtest
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "bandwidth.h"
     18 
     19 #include <ctype.h>
     20 #include <pthread.h>
     21 #include <sched.h>
     22 #include <sys/resource.h>
     23 #include <sys/time.h>
     24 #include <unistd.h>
     25 
     26 #include <map>
     27 #include <vector>
     28 
     29 
     30 typedef struct {
     31     const char *name;
     32     bool int_type;
     33 } option_t;
     34 
     35 option_t bandwidth_opts[] = {
     36     { "size", true },
     37     { "num_warm_loops", true },
     38     { "num_loops", true },
     39     { "type", false },
     40     { NULL, false },
     41 };
     42 
     43 option_t per_core_opts[] = {
     44     { "size", true },
     45     { "num_warm_loops", true},
     46     { "num_loops", true },
     47     { "type", false },
     48     { NULL, false },
     49 };
     50 
     51 option_t multithread_opts[] = {
     52     { "size", true },
     53     { "num_warm_loops", true},
     54     { "num_loops", true },
     55     { "type", false },
     56     { "num_threads", true },
     57     { NULL, false },
     58 };
     59 
     60 typedef union {
     61     int int_value;
     62     const char *char_value;
     63 } arg_value_t;
     64 typedef std::map<const char*, arg_value_t> arg_t;
     65 
     66 bool processBandwidthOptions(int argc, char** argv, option_t options[],
     67                              arg_t *values) {
     68     for (int i = 1; i < argc; i++) {
     69         if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
     70             char *arg = &argv[i][2];
     71 
     72             for (int j = 0; options[j].name != NULL; j++) {
     73                 if (strcmp(arg, options[j].name) == 0) {
     74                     const char *name = options[j].name;
     75                     if (i == argc - 1) {
     76                         printf("The option --%s requires an argument.\n", name);
     77                         return false;
     78                     }
     79                     if (options[j].int_type) {
     80                         (*values)[name].int_value = strtol(argv[++i], NULL, 0);
     81                     } else {
     82                         (*values)[name].char_value = argv[++i];
     83                     }
     84                 }
     85             }
     86         }
     87     }
     88 
     89     return true;
     90 }
     91 
     92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
     93     BandwidthBenchmark *bench = NULL;
     94 
     95     const char *name = values["type"].char_value;
     96     size_t size = 0;
     97     if (values.count("size") > 0) {
     98         size = values["size"].int_value;
     99     }
    100     if (strcmp(name, "copy_ldrd_strd") == 0) {
    101         bench = new CopyLdrdStrdBenchmark();
    102     } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
    103         bench = new CopyLdmiaStmiaBenchmark();
    104     } else if (strcmp(name, "copy_vld1_vst1") == 0) {
    105         bench = new CopyVld1Vst1Benchmark();
    106     } else if (strcmp(name, "copy_vldr_vstr") == 0) {
    107         bench = new CopyVldrVstrBenchmark();
    108     } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
    109         bench = new CopyVldmiaVstmiaBenchmark();
    110     } else if (strcmp(name, "memcpy") == 0) {
    111         bench = new MemcpyBenchmark();
    112     } else if (strcmp(name, "write_strd") == 0) {
    113         bench = new WriteStrdBenchmark();
    114     } else if (strcmp(name, "write_stmia") == 0) {
    115         bench = new WriteStmiaBenchmark();
    116     } else if (strcmp(name, "write_vst1") == 0) {
    117         bench = new WriteVst1Benchmark();
    118     } else if (strcmp(name, "write_vstr") == 0) {
    119         bench = new WriteVstrBenchmark();
    120     } else if (strcmp(name, "write_vstmia") == 0) {
    121         bench = new WriteVstmiaBenchmark();
    122     } else if (strcmp(name, "memset") == 0) {
    123         bench = new MemsetBenchmark();
    124     } else if (strcmp(name, "read_ldrd") == 0) {
    125         bench = new ReadLdrdBenchmark();
    126     } else if (strcmp(name, "read_ldmia") == 0) {
    127         bench = new ReadLdmiaBenchmark();
    128     } else if (strcmp(name, "read_vld1") == 0) {
    129         bench = new ReadVld1Benchmark();
    130     } else if (strcmp(name, "read_vldr") == 0) {
    131         bench = new ReadVldrBenchmark();
    132     } else if (strcmp(name, "read_vldmia") == 0) {
    133         bench = new ReadVldmiaBenchmark();
    134     } else {
    135         printf("Unknown type name %s\n", name);
    136         return NULL;
    137     }
    138 
    139     if (!bench->setSize(size)) {
    140         printf("Failed to allocate buffers for benchmark.\n");
    141         delete bench;
    142         return NULL;
    143     }
    144 
    145     if (values.count("num_warm_loops") > 0) {
    146         bench->set_num_loops(values["num_warm_loops"].int_value);
    147     }
    148     if (values.count("num_loops") > 0) {
    149         bench->set_num_loops(values["num_loops"].int_value);
    150     }
    151 
    152     return bench;
    153 }
    154 
    155 bool getAvailCpus(std::vector<int> *cpu_list) {
    156     cpu_set_t cpuset;
    157 
    158     CPU_ZERO(&cpuset);
    159     if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
    160         perror("sched_getaffinity failed.");
    161         return false;
    162     }
    163 
    164     for (int i = 0; i < CPU_SETSIZE; i++) {
    165         if (CPU_ISSET(i, &cpuset)) {
    166             cpu_list->push_back(i);
    167         }
    168     }
    169 
    170     return true;
    171 }
    172 
    173 typedef struct {
    174     int core;
    175     BandwidthBenchmark *bench;
    176     double  avg_mb;
    177     volatile bool *run;
    178 } thread_arg_t;
    179 
    180 void *runBandwidthThread(void *data) {
    181     thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
    182 
    183     if (arg->core >= 0) {
    184         cpu_set_t cpuset;
    185         CPU_ZERO(&cpuset);
    186         CPU_SET(arg->core, &cpuset);
    187         if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
    188             perror("sched_setaffinity failed");
    189             return NULL;
    190         }
    191     }
    192 
    193     // Spinloop waiting for the run variable to get set to true.
    194     while (!*arg->run) {
    195     }
    196 
    197     double avg_mb = 0;
    198     for (int run = 1; ; run++) {
    199         arg->bench->run();
    200         if (!*arg->run) {
    201             // Throw away the last data point since it's possible not
    202             // all of the threads are running at this point.
    203             break;
    204         }
    205         avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
    206     }
    207     arg->avg_mb = avg_mb;
    208 
    209     return NULL;
    210 }
    211 
    212 bool processThreadArgs(int argc, char** argv, option_t options[],
    213                        arg_t *values) {
    214     // Use some smaller values for the number of loops.
    215     (*values)["num_warm_loops"].int_value = 1000000;
    216     (*values)["num_loops"].int_value = 10000000;
    217 
    218     if (!processBandwidthOptions(argc, argv, options, values)) {
    219         return false;
    220     }
    221     if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
    222         printf("The size values must be a multiple of 64.\n");
    223         return false;
    224     }
    225     if (values->count("type") == 0) {
    226         printf("Must specify the type value.\n");
    227         return false;
    228     }
    229 
    230     BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
    231     if (!bench) {
    232         return false;
    233     }
    234 
    235     if (setpriority(PRIO_PROCESS, 0, -20)) {
    236         perror("Unable to raise priority of process.");
    237         return false;
    238     }
    239 
    240     printf("Calculating optimum run time...\n");
    241     nsecs_t t = system_time();
    242     bench->run();
    243     t = system_time() - t;
    244     // Since this is only going to be running single threaded, assume that
    245     // if the number is set to ten times this value, we should get at least
    246     // a couple of samples per thread.
    247     int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
    248 
    249     (*values)["run_time"].int_value = run_time;
    250     (*values)["size"].int_value = bench->size();
    251     (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
    252     (*values)["num_loops"].int_value = bench->num_loops();
    253     delete bench;
    254 
    255     return true;
    256 }
    257 
    258 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
    259     pthread_t threads[num_threads];
    260     volatile bool run = false;
    261 
    262     int rc;
    263     for (int i = 0; i < num_threads; i++) {
    264         args[i].run = &run;
    265         rc = pthread_create(&threads[i], NULL, runBandwidthThread,
    266                             (void*)&args[i]);
    267         if (rc != 0) {
    268             printf("Failed to launch thread %d\n", i);
    269             return false;
    270         }
    271     }
    272 
    273     // Kick start the threads.
    274     run = true;
    275 
    276     // Let the threads run.
    277     sleep(run_time);
    278 
    279     // Stop the threads.
    280     run = false;
    281 
    282     // Wait for the threads to complete.
    283     for (int i = 0; i < num_threads; i++) {
    284         rc = pthread_join(threads[i], NULL);
    285         if (rc != 0) {
    286             printf("Thread %d failed to join.\n", i);
    287             return false;
    288         }
    289         printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
    290                args[i].bench->getName(), args[i].avg_mb);
    291     }
    292 
    293     return true;
    294 }
    295 
    296 int per_core_bandwidth(int argc, char** argv) {
    297     arg_t values;
    298     if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
    299         return -1;
    300     }
    301 
    302     std::vector<int> cpu_list;
    303     if (!getAvailCpus(&cpu_list)) {
    304         printf("Failed to get available cpu list.\n");
    305         return -1;
    306     }
    307 
    308     thread_arg_t args[cpu_list.size()];
    309 
    310     int i = 0;
    311     for (std::vector<int>::iterator it = cpu_list.begin();
    312          it != cpu_list.end(); ++it, ++i) {
    313         args[i].core = *it;
    314         args[i].bench = createBandwidthBenchmarkObject(values);
    315         if (!args[i].bench) {
    316             for (int j = 0; j < i; j++)
    317                 delete args[j].bench;
    318             return -1;
    319         }
    320     }
    321 
    322     printf("Running on %d cores\n", cpu_list.size());
    323     printf("  run_time = %ds\n", values["run_time"].int_value);
    324     printf("  size = %d\n", values["size"].int_value);
    325     printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
    326     printf("  num_loops = %d\n", values["num_loops"].int_value);
    327     printf("\n");
    328 
    329     if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
    330         return -1;
    331     }
    332 
    333     return 0;
    334 }
    335 
    336 int multithread_bandwidth(int argc, char** argv) {
    337     arg_t values;
    338     if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
    339         return -1;
    340     }
    341     if (values.count("num_threads") == 0) {
    342         printf("Must specify the num_threads value.\n");
    343         return -1;
    344     }
    345     int num_threads = values["num_threads"].int_value;
    346 
    347     thread_arg_t args[num_threads];
    348 
    349     for (int i = 0; i < num_threads; i++) {
    350         args[i].core = -1;
    351         args[i].bench = createBandwidthBenchmarkObject(values);
    352         if (!args[i].bench) {
    353             for (int j = 0; j < i; j++)
    354                 delete args[j].bench;
    355             return -1;
    356         }
    357     }
    358 
    359     printf("Running %d threads\n", num_threads);
    360     printf("  run_time = %ds\n", values["run_time"].int_value);
    361     printf("  size = %d\n", values["size"].int_value);
    362     printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
    363     printf("  num_loops = %d\n", values["num_loops"].int_value);
    364     printf("\n");
    365 
    366     if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
    367         return -1;
    368     }
    369 
    370     return 0;
    371 }
    372 
    373 bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
    374                              std::vector<BandwidthBenchmark*> bench_objs) {
    375     arg_t values;
    376     values["size"].int_value = 0;
    377     values["num_warm_loops"].int_value = 0;
    378     values["num_loops"].int_value = 0;
    379     if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
    380         return false;
    381     }
    382 
    383     size_t size = values["size"].int_value;
    384     if ((size % 64) != 0) {
    385         printf("The size value must be a multiple of 64.\n");
    386         return false;
    387     }
    388 
    389     if (setpriority(PRIO_PROCESS, 0, -20)) {
    390         perror("Unable to raise priority of process.");
    391         return false;
    392     }
    393 
    394     bool preamble_printed = false;
    395     size_t num_warm_loops = values["num_warm_loops"].int_value;
    396     size_t num_loops = values["num_loops"].int_value;
    397     for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
    398          it != bench_objs.end(); ++it) {
    399         if (!(*it)->canRun()) {
    400             continue;
    401         }
    402         if (!(*it)->setSize(values["size"].int_value)) {
    403             printf("Failed creating buffer for bandwidth test.\n");
    404             return false;
    405         }
    406         if (num_warm_loops) {
    407             (*it)->set_num_warm_loops(num_warm_loops);
    408         }
    409         if (num_loops) {
    410             (*it)->set_num_loops(num_loops);
    411         }
    412         if (!preamble_printed) {
    413             preamble_printed = true;
    414             printf("Benchmarking %s bandwidth\n", name);
    415             printf("  size = %d\n", (*it)->size());
    416             printf("  num_warm_loops = %d\n", (*it)->num_warm_loops());
    417             printf("  num_loops = %d\n\n", (*it)->num_loops());
    418         }
    419         (*it)->run();
    420         printf("  %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
    421                (*it)->mb_per_sec());
    422     }
    423 
    424     return true;
    425 }
    426 
    427 int copy_bandwidth(int argc, char** argv) {
    428     std::vector<BandwidthBenchmark*> bench_objs;
    429     bench_objs.push_back(new CopyLdrdStrdBenchmark());
    430     bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
    431     bench_objs.push_back(new CopyVld1Vst1Benchmark());
    432     bench_objs.push_back(new CopyVldrVstrBenchmark());
    433     bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
    434     bench_objs.push_back(new MemcpyBenchmark());
    435 
    436     if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
    437         return -1;
    438     }
    439     return 0;
    440 }
    441 
    442 int write_bandwidth(int argc, char** argv) {
    443     std::vector<BandwidthBenchmark*> bench_objs;
    444     bench_objs.push_back(new WriteStrdBenchmark());
    445     bench_objs.push_back(new WriteStmiaBenchmark());
    446     bench_objs.push_back(new WriteVst1Benchmark());
    447     bench_objs.push_back(new WriteVstrBenchmark());
    448     bench_objs.push_back(new WriteVstmiaBenchmark());
    449     bench_objs.push_back(new MemsetBenchmark());
    450 
    451     if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
    452         return -1;
    453     }
    454 
    455     return 0;
    456 }
    457 
    458 int read_bandwidth(int argc, char** argv) {
    459     std::vector<BandwidthBenchmark*> bench_objs;
    460     bench_objs.push_back(new ReadLdrdBenchmark());
    461     bench_objs.push_back(new ReadLdmiaBenchmark());
    462     bench_objs.push_back(new ReadVld1Benchmark());
    463     bench_objs.push_back(new ReadVldrBenchmark());
    464     bench_objs.push_back(new ReadVldmiaBenchmark());
    465 
    466     if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
    467         return -1;
    468     }
    469     return 0;
    470 }
    471