1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <pthread.h> 18 #include <sched.h> 19 #include <sys/time.h> 20 #include <sys/resource.h> 21 #include <unistd.h> 22 #include <ctype.h> 23 24 #include <map> 25 #include <vector> 26 27 #include "bandwidth.h" 28 29 30 typedef struct { 31 const char *name; 32 bool int_type; 33 } option_t; 34 35 option_t bandwidth_opts[] = { 36 { "size", true }, 37 { "num_warm_loops", true }, 38 { "num_loops", true }, 39 { "type", false }, 40 { NULL, false }, 41 }; 42 43 option_t per_core_opts[] = { 44 { "size", true }, 45 { "num_warm_loops", true}, 46 { "num_loops", true }, 47 { "type", false }, 48 { NULL, false }, 49 }; 50 51 option_t multithread_opts[] = { 52 { "size", true }, 53 { "num_warm_loops", true}, 54 { "num_loops", true }, 55 { "type", false }, 56 { "num_threads", true }, 57 { NULL, false }, 58 }; 59 60 typedef union { 61 int int_value; 62 const char *char_value; 63 } arg_value_t; 64 typedef std::map<const char*, arg_value_t> arg_t; 65 66 bool processBandwidthOptions(int argc, char** argv, option_t options[], 67 arg_t *values) { 68 for (int i = 1; i < argc; i++) { 69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) { 70 char *arg = &argv[i][2]; 71 72 for (int j = 0; options[j].name != NULL; j++) { 73 if (strcmp(arg, options[j].name) == 0) { 74 const char *name = options[j].name; 75 if (i == argc - 1) { 76 printf("The option --%s requires an argument.\n", name); 77 return false; 78 } 79 if (options[j].int_type) { 80 (*values)[name].int_value = strtol(argv[++i], NULL, 0); 81 } else { 82 (*values)[name].char_value = argv[++i]; 83 } 84 } 85 } 86 } 87 } 88 89 return true; 90 } 91 92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { 93 BandwidthBenchmark *bench = NULL; 94 95 const char *name = values["type"].char_value; 96 size_t size = 0; 97 if (values.count("size") > 0) { 98 size = values["size"].int_value; 99 } 100 if (strcmp(name, "copy_ldrd_strd") == 0) { 101 bench = new CopyLdrdStrdBenchmark(); 102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) { 103 bench = new CopyLdmiaStmiaBenchmark(); 104 } else if (strcmp(name, "copy_vld1_vst1") == 0) { 105 bench = new CopyVld1Vst1Benchmark(); 106 } else if (strcmp(name, "copy_vldr_vstr") == 0) { 107 bench = new CopyVldrVstrBenchmark(); 108 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) { 109 bench = new CopyVldmiaVstmiaBenchmark(); 110 } else if (strcmp(name, "memcpy") == 0) { 111 bench = new MemcpyBenchmark(); 112 } else if (strcmp(name, "write_strd") == 0) { 113 bench = new WriteStrdBenchmark(); 114 } else if (strcmp(name, "write_stmia") == 0) { 115 bench = new WriteStmiaBenchmark(); 116 } else if (strcmp(name, "write_vst1") == 0) { 117 bench = new WriteVst1Benchmark(); 118 } else if (strcmp(name, "write_vstr") == 0) { 119 bench = new WriteVstrBenchmark(); 120 } else if (strcmp(name, "write_vstmia") == 0) { 121 bench = new WriteVstmiaBenchmark(); 122 } else if (strcmp(name, "memset") == 0) { 123 bench = new MemsetBenchmark(); 124 } else if (strcmp(name, "read_ldrd") == 0) { 125 bench = new ReadLdrdBenchmark(); 126 } else if (strcmp(name, "read_ldmia") == 0) { 127 bench = new ReadLdmiaBenchmark(); 128 } else if (strcmp(name, "read_vld1") == 0) { 129 bench = new ReadVld1Benchmark(); 130 } else if (strcmp(name, "read_vldr") == 0) { 131 bench = new ReadVldrBenchmark(); 132 } else if (strcmp(name, "read_vldmia") == 0) { 133 bench = new ReadVldmiaBenchmark(); 134 } else { 135 printf("Unknown type name %s\n", name); 136 return NULL; 137 } 138 139 if (!bench->setSize(values["size"].int_value)) { 140 printf("Failed to allocate buffers for benchmark.\n"); 141 return NULL; 142 } 143 144 if (values.count("num_warm_loops") > 0) { 145 bench->set_num_loops(values["num_warm_loops"].int_value); 146 } 147 if (values.count("num_loops") > 0) { 148 bench->set_num_loops(values["num_loops"].int_value); 149 } 150 151 return bench; 152 } 153 154 bool getAvailCpus(std::vector<int> *cpu_list) { 155 cpu_set_t cpuset; 156 157 CPU_ZERO(&cpuset); 158 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) { 159 perror("sched_getaffinity failed."); 160 return false; 161 } 162 163 for (int i = 0; i < CPU_SETSIZE; i++) { 164 if (CPU_ISSET(i, &cpuset)) { 165 cpu_list->push_back(i); 166 } 167 } 168 169 return true; 170 } 171 172 typedef struct { 173 int core; 174 BandwidthBenchmark *bench; 175 double avg_mb; 176 volatile bool *run; 177 } thread_arg_t; 178 179 void *runBandwidthThread(void *data) { 180 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data); 181 182 if (arg->core >= 0) { 183 cpu_set_t cpuset; 184 CPU_ZERO(&cpuset); 185 CPU_SET(arg->core, &cpuset); 186 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 187 perror("sched_setaffinity failed"); 188 return NULL; 189 } 190 } 191 192 // Spinloop waiting for the run variable to get set to true. 193 while (!*arg->run) { 194 } 195 196 double avg_mb = 0; 197 for (int run = 1; ; run++) { 198 arg->bench->run(); 199 if (!*arg->run) { 200 // Throw away the last data point since it's possible not 201 // all of the threads are running at this point. 202 break; 203 } 204 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run; 205 } 206 arg->avg_mb = avg_mb; 207 208 return NULL; 209 } 210 211 bool processThreadArgs(int argc, char** argv, option_t options[], 212 arg_t *values) { 213 // Use some smaller values for the number of loops. 214 (*values)["num_warm_loops"].int_value = 1000000; 215 (*values)["num_loops"].int_value = 10000000; 216 217 if (!processBandwidthOptions(argc, argv, options, values)) { 218 return false; 219 } 220 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) { 221 printf("The size values must be a multiple of 64.\n"); 222 return false; 223 } 224 if (values->count("type") == 0) { 225 printf("Must specify the type value.\n"); 226 return false; 227 } 228 229 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values); 230 if (!bench) { 231 return false; 232 } 233 234 if (setpriority(PRIO_PROCESS, 0, -20)) { 235 perror("Unable to raise priority of process."); 236 return false; 237 } 238 239 printf("Calculating optimum run time...\n"); 240 nsecs_t t = system_time(); 241 bench->run(); 242 t = system_time() - t; 243 // Since this is only going to be running single threaded, assume that 244 // if the number is set to ten times this value, we should get at least 245 // a couple of samples per thread. 246 int run_time = int((t/1000000000.0)*10 + 0.5) + 5; 247 248 (*values)["run_time"].int_value = run_time; 249 (*values)["size"].int_value = bench->size(); 250 (*values)["num_warm_loops"].int_value = bench->num_warm_loops(); 251 (*values)["num_loops"].int_value = bench->num_loops(); 252 delete bench; 253 254 return true; 255 } 256 257 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) { 258 pthread_t threads[num_threads]; 259 volatile bool run = false; 260 261 int rc; 262 for (int i = 0; i < num_threads; i++) { 263 args[i].run = &run; 264 rc = pthread_create(&threads[i], NULL, runBandwidthThread, 265 (void*)&args[i]); 266 if (rc != 0) { 267 printf("Failed to launch thread %d\n", i); 268 return false; 269 } 270 } 271 272 // Kick start the threads. 273 run = true; 274 275 // Let the threads run. 276 sleep(run_time); 277 278 // Stop the threads. 279 run = false; 280 281 // Wait for the threads to complete. 282 for (int i = 0; i < num_threads; i++) { 283 rc = pthread_join(threads[i], NULL); 284 if (rc != 0) { 285 printf("Thread %d failed to join.\n", i); 286 return false; 287 } 288 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i, 289 args[i].bench->getName(), args[i].avg_mb); 290 } 291 292 return true; 293 } 294 295 int per_core_bandwidth(int argc, char** argv) { 296 arg_t values; 297 if (!processThreadArgs(argc, argv, per_core_opts, &values)) { 298 return -1; 299 } 300 301 std::vector<int> cpu_list; 302 if (!getAvailCpus(&cpu_list)) { 303 printf("Failed to get available cpu list.\n"); 304 return -1; 305 } 306 307 thread_arg_t args[cpu_list.size()]; 308 309 int i = 0; 310 for (std::vector<int>::iterator it = cpu_list.begin(); 311 it != cpu_list.end(); ++it, ++i) { 312 args[i].core = *it; 313 args[i].bench = createBandwidthBenchmarkObject(values); 314 if (!args[i].bench) { 315 return -1; 316 } 317 } 318 319 printf("Running on %d cores\n", cpu_list.size()); 320 printf(" run_time = %ds\n", values["run_time"].int_value); 321 printf(" size = %d\n", values["size"].int_value); 322 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 323 printf(" num_loops = %d\n", values["num_loops"].int_value); 324 printf("\n"); 325 326 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) { 327 return -1; 328 } 329 330 return 0; 331 } 332 333 int multithread_bandwidth(int argc, char** argv) { 334 arg_t values; 335 if (!processThreadArgs(argc, argv, multithread_opts, &values)) { 336 return -1; 337 } 338 if (values.count("num_threads") == 0) { 339 printf("Must specify the num_threads value.\n"); 340 return -1; 341 } 342 int num_threads = values["num_threads"].int_value; 343 344 thread_arg_t args[num_threads]; 345 346 int i = 0; 347 for (int i = 0; i < num_threads; i++) { 348 args[i].core = -1; 349 args[i].bench = createBandwidthBenchmarkObject(values); 350 if (!args[i].bench) { 351 return -1; 352 } 353 } 354 355 printf("Running %d threads\n", num_threads); 356 printf(" run_time = %ds\n", values["run_time"].int_value); 357 printf(" size = %d\n", values["size"].int_value); 358 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 359 printf(" num_loops = %d\n", values["num_loops"].int_value); 360 printf("\n"); 361 362 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) { 363 return -1; 364 } 365 366 return 0; 367 } 368 369 bool run_bandwidth_benchmark(int argc, char** argv, const char *name, 370 std::vector<BandwidthBenchmark*> bench_objs) { 371 arg_t values; 372 values["size"].int_value = 0; 373 values["num_warm_loops"].int_value = 0; 374 values["num_loops"].int_value = 0; 375 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { 376 return false; 377 } 378 379 size_t size = values["size"].int_value; 380 if ((size % 64) != 0) { 381 printf("The size value must be a multiple of 64.\n"); 382 return false; 383 } 384 385 if (setpriority(PRIO_PROCESS, 0, -20)) { 386 perror("Unable to raise priority of process."); 387 return false; 388 } 389 390 bool preamble_printed = false; 391 size_t num_warm_loops = values["num_warm_loops"].int_value; 392 size_t num_loops = values["num_loops"].int_value; 393 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin(); 394 it != bench_objs.end(); ++it) { 395 if (!(*it)->canRun()) { 396 continue; 397 } 398 if (!(*it)->setSize(values["num_warm_loops"].int_value)) { 399 printf("Failed creating buffer for bandwidth test.\n"); 400 return false; 401 } 402 if (num_warm_loops) { 403 (*it)->set_num_warm_loops(num_warm_loops); 404 } 405 if (num_loops) { 406 (*it)->set_num_loops(num_loops); 407 } 408 if (!preamble_printed) { 409 preamble_printed = true; 410 printf("Benchmarking %s bandwidth\n", name); 411 printf(" size = %d\n", (*it)->size()); 412 printf(" num_warm_loops = %d\n", (*it)->num_warm_loops()); 413 printf(" num_loops = %d\n\n", (*it)->num_loops()); 414 } 415 (*it)->run(); 416 printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(), 417 (*it)->mb_per_sec()); 418 } 419 420 return true; 421 } 422 423 int copy_bandwidth(int argc, char** argv) { 424 std::vector<BandwidthBenchmark*> bench_objs; 425 bench_objs.push_back(new CopyLdrdStrdBenchmark()); 426 bench_objs.push_back(new CopyLdmiaStmiaBenchmark()); 427 bench_objs.push_back(new CopyVld1Vst1Benchmark()); 428 bench_objs.push_back(new CopyVldrVstrBenchmark()); 429 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark()); 430 bench_objs.push_back(new MemcpyBenchmark()); 431 432 if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) { 433 return -1; 434 } 435 return 0; 436 } 437 438 int write_bandwidth(int argc, char** argv) { 439 std::vector<BandwidthBenchmark*> bench_objs; 440 bench_objs.push_back(new WriteStrdBenchmark()); 441 bench_objs.push_back(new WriteStmiaBenchmark()); 442 bench_objs.push_back(new WriteVst1Benchmark()); 443 bench_objs.push_back(new WriteVstrBenchmark()); 444 bench_objs.push_back(new WriteVstmiaBenchmark()); 445 bench_objs.push_back(new MemsetBenchmark()); 446 447 if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) { 448 return -1; 449 } 450 451 return 0; 452 } 453 454 int read_bandwidth(int argc, char** argv) { 455 std::vector<BandwidthBenchmark*> bench_objs; 456 bench_objs.push_back(new ReadLdrdBenchmark()); 457 bench_objs.push_back(new ReadLdmiaBenchmark()); 458 bench_objs.push_back(new ReadVld1Benchmark()); 459 bench_objs.push_back(new ReadVldrBenchmark()); 460 bench_objs.push_back(new ReadVldmiaBenchmark()); 461 462 if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) { 463 return -1; 464 } 465 return 0; 466 } 467