Home | History | Annotate | Download | only in memtest
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <pthread.h>
     18 #include <sched.h>
     19 #include <sys/time.h>
     20 #include <sys/resource.h>
     21 #include <unistd.h>
     22 #include <ctype.h>
     23 
     24 #include <map>
     25 #include <vector>
     26 
     27 #include "bandwidth.h"
     28 
     29 
     30 typedef struct {
     31     const char *name;
     32     bool int_type;
     33 } option_t;
     34 
     35 option_t bandwidth_opts[] = {
     36     { "size", true },
     37     { "num_warm_loops", true },
     38     { "num_loops", true },
     39     { "type", false },
     40     { NULL, false },
     41 };
     42 
     43 option_t per_core_opts[] = {
     44     { "size", true },
     45     { "num_warm_loops", true},
     46     { "num_loops", true },
     47     { "type", false },
     48     { NULL, false },
     49 };
     50 
     51 option_t multithread_opts[] = {
     52     { "size", true },
     53     { "num_warm_loops", true},
     54     { "num_loops", true },
     55     { "type", false },
     56     { "num_threads", true },
     57     { NULL, false },
     58 };
     59 
     60 typedef union {
     61     int int_value;
     62     const char *char_value;
     63 } arg_value_t;
     64 typedef std::map<const char*, arg_value_t> arg_t;
     65 
     66 bool processBandwidthOptions(int argc, char** argv, option_t options[],
     67                              arg_t *values) {
     68     for (int i = 1; i < argc; i++) {
     69         if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
     70             char *arg = &argv[i][2];
     71 
     72             for (int j = 0; options[j].name != NULL; j++) {
     73                 if (strcmp(arg, options[j].name) == 0) {
     74                     const char *name = options[j].name;
     75                     if (i == argc - 1) {
     76                         printf("The option --%s requires an argument.\n", name);
     77                         return false;
     78                     }
     79                     if (options[j].int_type) {
     80                         (*values)[name].int_value = strtol(argv[++i], NULL, 0);
     81                     } else {
     82                         (*values)[name].char_value = argv[++i];
     83                     }
     84                 }
     85             }
     86         }
     87     }
     88 
     89     return true;
     90 }
     91 
     92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
     93     BandwidthBenchmark *bench = NULL;
     94 
     95     const char *name = values["type"].char_value;
     96     size_t size = 0;
     97     if (values.count("size") > 0) {
     98         size = values["size"].int_value;
     99     }
    100     if (strcmp(name, "copy_ldrd_strd") == 0) {
    101         bench = new CopyLdrdStrdBenchmark();
    102     } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
    103         bench = new CopyLdmiaStmiaBenchmark();
    104     } else if (strcmp(name, "copy_vld1_vst1") == 0) {
    105         bench = new CopyVld1Vst1Benchmark();
    106     } else if (strcmp(name, "copy_vldr_vstr") == 0) {
    107         bench = new CopyVldrVstrBenchmark();
    108     } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
    109         bench = new CopyVldmiaVstmiaBenchmark();
    110     } else if (strcmp(name, "memcpy") == 0) {
    111         bench = new MemcpyBenchmark();
    112     } else if (strcmp(name, "write_strd") == 0) {
    113         bench = new WriteStrdBenchmark();
    114     } else if (strcmp(name, "write_stmia") == 0) {
    115         bench = new WriteStmiaBenchmark();
    116     } else if (strcmp(name, "write_vst1") == 0) {
    117         bench = new WriteVst1Benchmark();
    118     } else if (strcmp(name, "write_vstr") == 0) {
    119         bench = new WriteVstrBenchmark();
    120     } else if (strcmp(name, "write_vstmia") == 0) {
    121         bench = new WriteVstmiaBenchmark();
    122     } else if (strcmp(name, "memset") == 0) {
    123         bench = new MemsetBenchmark();
    124     } else if (strcmp(name, "read_ldrd") == 0) {
    125         bench = new ReadLdrdBenchmark();
    126     } else if (strcmp(name, "read_ldmia") == 0) {
    127         bench = new ReadLdmiaBenchmark();
    128     } else if (strcmp(name, "read_vld1") == 0) {
    129         bench = new ReadVld1Benchmark();
    130     } else if (strcmp(name, "read_vldr") == 0) {
    131         bench = new ReadVldrBenchmark();
    132     } else if (strcmp(name, "read_vldmia") == 0) {
    133         bench = new ReadVldmiaBenchmark();
    134     } else {
    135         printf("Unknown type name %s\n", name);
    136         return NULL;
    137     }
    138 
    139     if (!bench->setSize(values["size"].int_value)) {
    140         printf("Failed to allocate buffers for benchmark.\n");
    141         return NULL;
    142     }
    143 
    144     if (values.count("num_warm_loops") > 0) {
    145         bench->set_num_loops(values["num_warm_loops"].int_value);
    146     }
    147     if (values.count("num_loops") > 0) {
    148         bench->set_num_loops(values["num_loops"].int_value);
    149     }
    150 
    151     return bench;
    152 }
    153 
    154 bool getAvailCpus(std::vector<int> *cpu_list) {
    155     cpu_set_t cpuset;
    156 
    157     CPU_ZERO(&cpuset);
    158     if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
    159         perror("sched_getaffinity failed.");
    160         return false;
    161     }
    162 
    163     for (int i = 0; i < CPU_SETSIZE; i++) {
    164         if (CPU_ISSET(i, &cpuset)) {
    165             cpu_list->push_back(i);
    166         }
    167     }
    168 
    169     return true;
    170 }
    171 
    172 typedef struct {
    173     int core;
    174     BandwidthBenchmark *bench;
    175     double  avg_mb;
    176     volatile bool *run;
    177 } thread_arg_t;
    178 
    179 void *runBandwidthThread(void *data) {
    180     thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
    181 
    182     if (arg->core >= 0) {
    183         cpu_set_t cpuset;
    184         CPU_ZERO(&cpuset);
    185         CPU_SET(arg->core, &cpuset);
    186         if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
    187             perror("sched_setaffinity failed");
    188             return NULL;
    189         }
    190     }
    191 
    192     // Spinloop waiting for the run variable to get set to true.
    193     while (!*arg->run) {
    194     }
    195 
    196     double avg_mb = 0;
    197     for (int run = 1; ; run++) {
    198         arg->bench->run();
    199         if (!*arg->run) {
    200             // Throw away the last data point since it's possible not
    201             // all of the threads are running at this point.
    202             break;
    203         }
    204         avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
    205     }
    206     arg->avg_mb = avg_mb;
    207 
    208     return NULL;
    209 }
    210 
    211 bool processThreadArgs(int argc, char** argv, option_t options[],
    212                        arg_t *values) {
    213     // Use some smaller values for the number of loops.
    214     (*values)["num_warm_loops"].int_value = 1000000;
    215     (*values)["num_loops"].int_value = 10000000;
    216 
    217     if (!processBandwidthOptions(argc, argv, options, values)) {
    218         return false;
    219     }
    220     if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
    221         printf("The size values must be a multiple of 64.\n");
    222         return false;
    223     }
    224     if (values->count("type") == 0) {
    225         printf("Must specify the type value.\n");
    226         return false;
    227     }
    228 
    229     BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
    230     if (!bench) {
    231         return false;
    232     }
    233 
    234     if (setpriority(PRIO_PROCESS, 0, -20)) {
    235         perror("Unable to raise priority of process.");
    236         return false;
    237     }
    238 
    239     printf("Calculating optimum run time...\n");
    240     nsecs_t t = system_time();
    241     bench->run();
    242     t = system_time() - t;
    243     // Since this is only going to be running single threaded, assume that
    244     // if the number is set to ten times this value, we should get at least
    245     // a couple of samples per thread.
    246     int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
    247 
    248     (*values)["run_time"].int_value = run_time;
    249     (*values)["size"].int_value = bench->size();
    250     (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
    251     (*values)["num_loops"].int_value = bench->num_loops();
    252     delete bench;
    253 
    254     return true;
    255 }
    256 
    257 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
    258     pthread_t threads[num_threads];
    259     volatile bool run = false;
    260 
    261     int rc;
    262     for (int i = 0; i < num_threads; i++) {
    263         args[i].run = &run;
    264         rc = pthread_create(&threads[i], NULL, runBandwidthThread,
    265                             (void*)&args[i]);
    266         if (rc != 0) {
    267             printf("Failed to launch thread %d\n", i);
    268             return false;
    269         }
    270     }
    271 
    272     // Kick start the threads.
    273     run = true;
    274 
    275     // Let the threads run.
    276     sleep(run_time);
    277 
    278     // Stop the threads.
    279     run = false;
    280 
    281     // Wait for the threads to complete.
    282     for (int i = 0; i < num_threads; i++) {
    283         rc = pthread_join(threads[i], NULL);
    284         if (rc != 0) {
    285             printf("Thread %d failed to join.\n", i);
    286             return false;
    287         }
    288         printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
    289                args[i].bench->getName(), args[i].avg_mb);
    290     }
    291 
    292     return true;
    293 }
    294 
    295 int per_core_bandwidth(int argc, char** argv) {
    296     arg_t values;
    297     if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
    298         return -1;
    299     }
    300 
    301     std::vector<int> cpu_list;
    302     if (!getAvailCpus(&cpu_list)) {
    303         printf("Failed to get available cpu list.\n");
    304         return -1;
    305     }
    306 
    307     thread_arg_t args[cpu_list.size()];
    308 
    309     int i = 0;
    310     for (std::vector<int>::iterator it = cpu_list.begin();
    311          it != cpu_list.end(); ++it, ++i) {
    312         args[i].core = *it;
    313         args[i].bench = createBandwidthBenchmarkObject(values);
    314         if (!args[i].bench) {
    315             return -1;
    316         }
    317     }
    318 
    319     printf("Running on %d cores\n", cpu_list.size());
    320     printf("  run_time = %ds\n", values["run_time"].int_value);
    321     printf("  size = %d\n", values["size"].int_value);
    322     printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
    323     printf("  num_loops = %d\n", values["num_loops"].int_value);
    324     printf("\n");
    325 
    326     if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
    327         return -1;
    328     }
    329 
    330     return 0;
    331 }
    332 
    333 int multithread_bandwidth(int argc, char** argv) {
    334     arg_t values;
    335     if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
    336         return -1;
    337     }
    338     if (values.count("num_threads") == 0) {
    339         printf("Must specify the num_threads value.\n");
    340         return -1;
    341     }
    342     int num_threads = values["num_threads"].int_value;
    343 
    344     thread_arg_t args[num_threads];
    345 
    346     int i = 0;
    347     for (int i = 0; i < num_threads; i++) {
    348         args[i].core = -1;
    349         args[i].bench = createBandwidthBenchmarkObject(values);
    350         if (!args[i].bench) {
    351             return -1;
    352         }
    353     }
    354 
    355     printf("Running %d threads\n", num_threads);
    356     printf("  run_time = %ds\n", values["run_time"].int_value);
    357     printf("  size = %d\n", values["size"].int_value);
    358     printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
    359     printf("  num_loops = %d\n", values["num_loops"].int_value);
    360     printf("\n");
    361 
    362     if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
    363         return -1;
    364     }
    365 
    366     return 0;
    367 }
    368 
    369 bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
    370                              std::vector<BandwidthBenchmark*> bench_objs) {
    371     arg_t values;
    372     values["size"].int_value = 0;
    373     values["num_warm_loops"].int_value = 0;
    374     values["num_loops"].int_value = 0;
    375     if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
    376         return false;
    377     }
    378 
    379     size_t size = values["size"].int_value;
    380     if ((size % 64) != 0) {
    381         printf("The size value must be a multiple of 64.\n");
    382         return false;
    383     }
    384 
    385     if (setpriority(PRIO_PROCESS, 0, -20)) {
    386         perror("Unable to raise priority of process.");
    387         return false;
    388     }
    389 
    390     bool preamble_printed = false;
    391     size_t num_warm_loops = values["num_warm_loops"].int_value;
    392     size_t num_loops = values["num_loops"].int_value;
    393     for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
    394          it != bench_objs.end(); ++it) {
    395         if (!(*it)->canRun()) {
    396             continue;
    397         }
    398         if (!(*it)->setSize(values["num_warm_loops"].int_value)) {
    399             printf("Failed creating buffer for bandwidth test.\n");
    400             return false;
    401         }
    402         if (num_warm_loops) {
    403             (*it)->set_num_warm_loops(num_warm_loops);
    404         }
    405         if (num_loops) {
    406             (*it)->set_num_loops(num_loops);
    407         }
    408         if (!preamble_printed) {
    409             preamble_printed = true;
    410             printf("Benchmarking %s bandwidth\n", name);
    411             printf("  size = %d\n", (*it)->size());
    412             printf("  num_warm_loops = %d\n", (*it)->num_warm_loops());
    413             printf("  num_loops = %d\n\n", (*it)->num_loops());
    414         }
    415         (*it)->run();
    416         printf("  %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
    417                (*it)->mb_per_sec());
    418     }
    419 
    420     return true;
    421 }
    422 
    423 int copy_bandwidth(int argc, char** argv) {
    424     std::vector<BandwidthBenchmark*> bench_objs;
    425     bench_objs.push_back(new CopyLdrdStrdBenchmark());
    426     bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
    427     bench_objs.push_back(new CopyVld1Vst1Benchmark());
    428     bench_objs.push_back(new CopyVldrVstrBenchmark());
    429     bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
    430     bench_objs.push_back(new MemcpyBenchmark());
    431 
    432     if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
    433         return -1;
    434     }
    435     return 0;
    436 }
    437 
    438 int write_bandwidth(int argc, char** argv) {
    439     std::vector<BandwidthBenchmark*> bench_objs;
    440     bench_objs.push_back(new WriteStrdBenchmark());
    441     bench_objs.push_back(new WriteStmiaBenchmark());
    442     bench_objs.push_back(new WriteVst1Benchmark());
    443     bench_objs.push_back(new WriteVstrBenchmark());
    444     bench_objs.push_back(new WriteVstmiaBenchmark());
    445     bench_objs.push_back(new MemsetBenchmark());
    446 
    447     if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
    448         return -1;
    449     }
    450 
    451     return 0;
    452 }
    453 
    454 int read_bandwidth(int argc, char** argv) {
    455     std::vector<BandwidthBenchmark*> bench_objs;
    456     bench_objs.push_back(new ReadLdrdBenchmark());
    457     bench_objs.push_back(new ReadLdmiaBenchmark());
    458     bench_objs.push_back(new ReadVld1Benchmark());
    459     bench_objs.push_back(new ReadVldrBenchmark());
    460     bench_objs.push_back(new ReadVldmiaBenchmark());
    461 
    462     if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
    463         return -1;
    464     }
    465     return 0;
    466 }
    467