1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "bandwidth.h" 18 19 #include <ctype.h> 20 #include <pthread.h> 21 #include <sched.h> 22 #include <sys/resource.h> 23 #include <sys/time.h> 24 #include <unistd.h> 25 26 #include <map> 27 #include <vector> 28 29 30 typedef struct { 31 const char *name; 32 bool int_type; 33 } option_t; 34 35 option_t bandwidth_opts[] = { 36 { "size", true }, 37 { "num_warm_loops", true }, 38 { "num_loops", true }, 39 { "type", false }, 40 { NULL, false }, 41 }; 42 43 option_t per_core_opts[] = { 44 { "size", true }, 45 { "num_warm_loops", true}, 46 { "num_loops", true }, 47 { "type", false }, 48 { NULL, false }, 49 }; 50 51 option_t multithread_opts[] = { 52 { "size", true }, 53 { "num_warm_loops", true}, 54 { "num_loops", true }, 55 { "type", false }, 56 { "num_threads", true }, 57 { NULL, false }, 58 }; 59 60 typedef union { 61 int int_value; 62 const char *char_value; 63 } arg_value_t; 64 typedef std::map<const char*, arg_value_t> arg_t; 65 66 bool processBandwidthOptions(int argc, char** argv, option_t options[], 67 arg_t *values) { 68 for (int i = 1; i < argc; i++) { 69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) { 70 char *arg = &argv[i][2]; 71 72 for (int j = 0; options[j].name != NULL; j++) { 73 if (strcmp(arg, options[j].name) == 0) { 74 const char *name = options[j].name; 75 if (i == argc - 1) { 76 printf("The option --%s requires an argument.\n", name); 77 return false; 78 } 79 if (options[j].int_type) { 80 (*values)[name].int_value = strtol(argv[++i], NULL, 0); 81 } else { 82 (*values)[name].char_value = argv[++i]; 83 } 84 } 85 } 86 } 87 } 88 89 return true; 90 } 91 92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { 93 BandwidthBenchmark *bench = NULL; 94 95 const char *name = values["type"].char_value; 96 size_t size = 0; 97 if (values.count("size") > 0) { 98 size = values["size"].int_value; 99 } 100 if (strcmp(name, "copy_ldrd_strd") == 0) { 101 bench = new CopyLdrdStrdBenchmark(); 102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) { 103 bench = new CopyLdmiaStmiaBenchmark(); 104 } else if (strcmp(name, "copy_vld1_vst1") == 0) { 105 bench = new CopyVld1Vst1Benchmark(); 106 } else if (strcmp(name, "copy_vldr_vstr") == 0) { 107 bench = new CopyVldrVstrBenchmark(); 108 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) { 109 bench = new CopyVldmiaVstmiaBenchmark(); 110 } else if (strcmp(name, "memcpy") == 0) { 111 bench = new MemcpyBenchmark(); 112 } else if (strcmp(name, "write_strd") == 0) { 113 bench = new WriteStrdBenchmark(); 114 } else if (strcmp(name, "write_stmia") == 0) { 115 bench = new WriteStmiaBenchmark(); 116 } else if (strcmp(name, "write_vst1") == 0) { 117 bench = new WriteVst1Benchmark(); 118 } else if (strcmp(name, "write_vstr") == 0) { 119 bench = new WriteVstrBenchmark(); 120 } else if (strcmp(name, "write_vstmia") == 0) { 121 bench = new WriteVstmiaBenchmark(); 122 } else if (strcmp(name, "memset") == 0) { 123 bench = new MemsetBenchmark(); 124 } else if (strcmp(name, "read_ldrd") == 0) { 125 bench = new ReadLdrdBenchmark(); 126 } else if (strcmp(name, "read_ldmia") == 0) { 127 bench = new ReadLdmiaBenchmark(); 128 } else if (strcmp(name, "read_vld1") == 0) { 129 bench = new ReadVld1Benchmark(); 130 } else if (strcmp(name, "read_vldr") == 0) { 131 bench = new ReadVldrBenchmark(); 132 } else if (strcmp(name, "read_vldmia") == 0) { 133 bench = new ReadVldmiaBenchmark(); 134 } else { 135 printf("Unknown type name %s\n", name); 136 return NULL; 137 } 138 139 if (!bench->setSize(size)) { 140 printf("Failed to allocate buffers for benchmark.\n"); 141 delete bench; 142 return NULL; 143 } 144 145 if (values.count("num_warm_loops") > 0) { 146 bench->set_num_loops(values["num_warm_loops"].int_value); 147 } 148 if (values.count("num_loops") > 0) { 149 bench->set_num_loops(values["num_loops"].int_value); 150 } 151 152 return bench; 153 } 154 155 bool getAvailCpus(std::vector<int> *cpu_list) { 156 cpu_set_t cpuset; 157 158 CPU_ZERO(&cpuset); 159 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) { 160 perror("sched_getaffinity failed."); 161 return false; 162 } 163 164 for (int i = 0; i < CPU_SETSIZE; i++) { 165 if (CPU_ISSET(i, &cpuset)) { 166 cpu_list->push_back(i); 167 } 168 } 169 170 return true; 171 } 172 173 typedef struct { 174 int core; 175 BandwidthBenchmark *bench; 176 double avg_mb; 177 volatile bool *run; 178 } thread_arg_t; 179 180 void *runBandwidthThread(void *data) { 181 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data); 182 183 if (arg->core >= 0) { 184 cpu_set_t cpuset; 185 CPU_ZERO(&cpuset); 186 CPU_SET(arg->core, &cpuset); 187 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 188 perror("sched_setaffinity failed"); 189 return NULL; 190 } 191 } 192 193 // Spinloop waiting for the run variable to get set to true. 194 while (!*arg->run) { 195 } 196 197 double avg_mb = 0; 198 for (int run = 1; ; run++) { 199 arg->bench->run(); 200 if (!*arg->run) { 201 // Throw away the last data point since it's possible not 202 // all of the threads are running at this point. 203 break; 204 } 205 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run; 206 } 207 arg->avg_mb = avg_mb; 208 209 return NULL; 210 } 211 212 bool processThreadArgs(int argc, char** argv, option_t options[], 213 arg_t *values) { 214 // Use some smaller values for the number of loops. 215 (*values)["num_warm_loops"].int_value = 1000000; 216 (*values)["num_loops"].int_value = 10000000; 217 218 if (!processBandwidthOptions(argc, argv, options, values)) { 219 return false; 220 } 221 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) { 222 printf("The size values must be a multiple of 64.\n"); 223 return false; 224 } 225 if (values->count("type") == 0) { 226 printf("Must specify the type value.\n"); 227 return false; 228 } 229 230 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values); 231 if (!bench) { 232 return false; 233 } 234 235 if (setpriority(PRIO_PROCESS, 0, -20)) { 236 perror("Unable to raise priority of process."); 237 return false; 238 } 239 240 printf("Calculating optimum run time...\n"); 241 nsecs_t t = system_time(); 242 bench->run(); 243 t = system_time() - t; 244 // Since this is only going to be running single threaded, assume that 245 // if the number is set to ten times this value, we should get at least 246 // a couple of samples per thread. 247 int run_time = int((t/1000000000.0)*10 + 0.5) + 5; 248 249 (*values)["run_time"].int_value = run_time; 250 (*values)["size"].int_value = bench->size(); 251 (*values)["num_warm_loops"].int_value = bench->num_warm_loops(); 252 (*values)["num_loops"].int_value = bench->num_loops(); 253 delete bench; 254 255 return true; 256 } 257 258 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) { 259 pthread_t threads[num_threads]; 260 volatile bool run = false; 261 262 int rc; 263 for (int i = 0; i < num_threads; i++) { 264 args[i].run = &run; 265 rc = pthread_create(&threads[i], NULL, runBandwidthThread, 266 (void*)&args[i]); 267 if (rc != 0) { 268 printf("Failed to launch thread %d\n", i); 269 return false; 270 } 271 } 272 273 // Kick start the threads. 274 run = true; 275 276 // Let the threads run. 277 sleep(run_time); 278 279 // Stop the threads. 280 run = false; 281 282 // Wait for the threads to complete. 283 for (int i = 0; i < num_threads; i++) { 284 rc = pthread_join(threads[i], NULL); 285 if (rc != 0) { 286 printf("Thread %d failed to join.\n", i); 287 return false; 288 } 289 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i, 290 args[i].bench->getName(), args[i].avg_mb); 291 } 292 293 return true; 294 } 295 296 int per_core_bandwidth(int argc, char** argv) { 297 arg_t values; 298 if (!processThreadArgs(argc, argv, per_core_opts, &values)) { 299 return -1; 300 } 301 302 std::vector<int> cpu_list; 303 if (!getAvailCpus(&cpu_list)) { 304 printf("Failed to get available cpu list.\n"); 305 return -1; 306 } 307 308 thread_arg_t args[cpu_list.size()]; 309 310 int i = 0; 311 for (std::vector<int>::iterator it = cpu_list.begin(); 312 it != cpu_list.end(); ++it, ++i) { 313 args[i].core = *it; 314 args[i].bench = createBandwidthBenchmarkObject(values); 315 if (!args[i].bench) { 316 for (int j = 0; j < i; j++) 317 delete args[j].bench; 318 return -1; 319 } 320 } 321 322 printf("Running on %d cores\n", cpu_list.size()); 323 printf(" run_time = %ds\n", values["run_time"].int_value); 324 printf(" size = %d\n", values["size"].int_value); 325 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 326 printf(" num_loops = %d\n", values["num_loops"].int_value); 327 printf("\n"); 328 329 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) { 330 return -1; 331 } 332 333 return 0; 334 } 335 336 int multithread_bandwidth(int argc, char** argv) { 337 arg_t values; 338 if (!processThreadArgs(argc, argv, multithread_opts, &values)) { 339 return -1; 340 } 341 if (values.count("num_threads") == 0) { 342 printf("Must specify the num_threads value.\n"); 343 return -1; 344 } 345 int num_threads = values["num_threads"].int_value; 346 347 thread_arg_t args[num_threads]; 348 349 for (int i = 0; i < num_threads; i++) { 350 args[i].core = -1; 351 args[i].bench = createBandwidthBenchmarkObject(values); 352 if (!args[i].bench) { 353 for (int j = 0; j < i; j++) 354 delete args[j].bench; 355 return -1; 356 } 357 } 358 359 printf("Running %d threads\n", num_threads); 360 printf(" run_time = %ds\n", values["run_time"].int_value); 361 printf(" size = %d\n", values["size"].int_value); 362 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); 363 printf(" num_loops = %d\n", values["num_loops"].int_value); 364 printf("\n"); 365 366 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) { 367 return -1; 368 } 369 370 return 0; 371 } 372 373 bool run_bandwidth_benchmark(int argc, char** argv, const char *name, 374 std::vector<BandwidthBenchmark*> bench_objs) { 375 arg_t values; 376 values["size"].int_value = 0; 377 values["num_warm_loops"].int_value = 0; 378 values["num_loops"].int_value = 0; 379 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { 380 return false; 381 } 382 383 size_t size = values["size"].int_value; 384 if ((size % 64) != 0) { 385 printf("The size value must be a multiple of 64.\n"); 386 return false; 387 } 388 389 if (setpriority(PRIO_PROCESS, 0, -20)) { 390 perror("Unable to raise priority of process."); 391 return false; 392 } 393 394 bool preamble_printed = false; 395 size_t num_warm_loops = values["num_warm_loops"].int_value; 396 size_t num_loops = values["num_loops"].int_value; 397 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin(); 398 it != bench_objs.end(); ++it) { 399 if (!(*it)->canRun()) { 400 continue; 401 } 402 if (!(*it)->setSize(values["size"].int_value)) { 403 printf("Failed creating buffer for bandwidth test.\n"); 404 return false; 405 } 406 if (num_warm_loops) { 407 (*it)->set_num_warm_loops(num_warm_loops); 408 } 409 if (num_loops) { 410 (*it)->set_num_loops(num_loops); 411 } 412 if (!preamble_printed) { 413 preamble_printed = true; 414 printf("Benchmarking %s bandwidth\n", name); 415 printf(" size = %d\n", (*it)->size()); 416 printf(" num_warm_loops = %d\n", (*it)->num_warm_loops()); 417 printf(" num_loops = %d\n\n", (*it)->num_loops()); 418 } 419 (*it)->run(); 420 printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(), 421 (*it)->mb_per_sec()); 422 } 423 424 return true; 425 } 426 427 int copy_bandwidth(int argc, char** argv) { 428 std::vector<BandwidthBenchmark*> bench_objs; 429 bench_objs.push_back(new CopyLdrdStrdBenchmark()); 430 bench_objs.push_back(new CopyLdmiaStmiaBenchmark()); 431 bench_objs.push_back(new CopyVld1Vst1Benchmark()); 432 bench_objs.push_back(new CopyVldrVstrBenchmark()); 433 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark()); 434 bench_objs.push_back(new MemcpyBenchmark()); 435 436 if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) { 437 return -1; 438 } 439 return 0; 440 } 441 442 int write_bandwidth(int argc, char** argv) { 443 std::vector<BandwidthBenchmark*> bench_objs; 444 bench_objs.push_back(new WriteStrdBenchmark()); 445 bench_objs.push_back(new WriteStmiaBenchmark()); 446 bench_objs.push_back(new WriteVst1Benchmark()); 447 bench_objs.push_back(new WriteVstrBenchmark()); 448 bench_objs.push_back(new WriteVstmiaBenchmark()); 449 bench_objs.push_back(new MemsetBenchmark()); 450 451 if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) { 452 return -1; 453 } 454 455 return 0; 456 } 457 458 int read_bandwidth(int argc, char** argv) { 459 std::vector<BandwidthBenchmark*> bench_objs; 460 bench_objs.push_back(new ReadLdrdBenchmark()); 461 bench_objs.push_back(new ReadLdmiaBenchmark()); 462 bench_objs.push_back(new ReadVld1Benchmark()); 463 bench_objs.push_back(new ReadVldrBenchmark()); 464 bench_objs.push_back(new ReadVldmiaBenchmark()); 465 466 if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) { 467 return -1; 468 } 469 return 0; 470 } 471