1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Benoit Jacob <benoitjacob (at) google.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #include <iostream> 11 #include <cstdint> 12 #include <cstdlib> 13 #include <vector> 14 #include <fstream> 15 #include <memory> 16 #include <cstdio> 17 18 bool eigen_use_specific_block_size; 19 int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n; 20 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size 21 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k 22 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m 23 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n 24 #include <Eigen/Core> 25 26 #include <bench/BenchTimer.h> 27 28 using namespace Eigen; 29 using namespace std; 30 31 static BenchTimer timer; 32 33 // how many times we repeat each measurement. 34 // measurements are randomly shuffled - we're not doing 35 // all N identical measurements in a row. 36 const int measurement_repetitions = 3; 37 38 // Timings below this value are too short to be accurate, 39 // we'll repeat measurements with more iterations until 40 // we get a timing above that threshold. 41 const float min_accurate_time = 1e-2f; 42 43 // See --min-working-set-size command line parameter. 44 size_t min_working_set_size = 0; 45 46 float max_clock_speed = 0.0f; 47 48 // range of sizes that we will benchmark (in all 3 K,M,N dimensions) 49 const size_t maxsize = 2048; 50 const size_t minsize = 16; 51 52 typedef MatrixXf MatrixType; 53 typedef MatrixType::Scalar Scalar; 54 typedef internal::packet_traits<Scalar>::type Packet; 55 56 static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two"); 57 static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two"); 58 static_assert(maxsize > minsize, "maxsize must be larger than minsize"); 59 static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)"); 60 61 // just a helper to store a triple of K,M,N sizes for matrix product 62 struct size_triple_t 63 { 64 size_t k, m, n; 65 size_triple_t() : k(0), m(0), n(0) {} 66 size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} 67 size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} 68 size_triple_t(uint16_t compact) 69 { 70 k = 1 << ((compact & 0xf00) >> 8); 71 m = 1 << ((compact & 0x0f0) >> 4); 72 n = 1 << ((compact & 0x00f) >> 0); 73 } 74 }; 75 76 uint8_t log2_pot(size_t x) { 77 size_t l = 0; 78 while (x >>= 1) l++; 79 return l; 80 } 81 82 // Convert between size tripes and a compact form fitting in 12 bits 83 // where each size, which must be a POT, is encoded as its log2, on 4 bits 84 // so the largest representable size is 2^15 == 32k ... big enough. 85 uint16_t compact_size_triple(size_t k, size_t m, size_t n) 86 { 87 return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); 88 } 89 90 uint16_t compact_size_triple(const size_triple_t& t) 91 { 92 return compact_size_triple(t.k, t.m, t.n); 93 } 94 95 // A single benchmark. Initially only contains benchmark params. 96 // Then call run(), which stores the result in the gflops field. 97 struct benchmark_t 98 { 99 uint16_t compact_product_size; 100 uint16_t compact_block_size; 101 bool use_default_block_size; 102 float gflops; 103 benchmark_t() 104 : compact_product_size(0) 105 , compact_block_size(0) 106 , use_default_block_size(false) 107 , gflops(0) 108 { 109 } 110 benchmark_t(size_t pk, size_t pm, size_t pn, 111 size_t bk, size_t bm, size_t bn) 112 : compact_product_size(compact_size_triple(pk, pm, pn)) 113 , compact_block_size(compact_size_triple(bk, bm, bn)) 114 , use_default_block_size(false) 115 , gflops(0) 116 {} 117 benchmark_t(size_t pk, size_t pm, size_t pn) 118 : compact_product_size(compact_size_triple(pk, pm, pn)) 119 , compact_block_size(0) 120 , use_default_block_size(true) 121 , gflops(0) 122 {} 123 124 void run(); 125 }; 126 127 ostream& operator<<(ostream& s, const benchmark_t& b) 128 { 129 s << hex << b.compact_product_size << dec; 130 if (b.use_default_block_size) { 131 size_triple_t t(b.compact_product_size); 132 Index k = t.k, m = t.m, n = t.n; 133 internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n); 134 s << " default(" << k << ", " << m << ", " << n << ")"; 135 } else { 136 s << " " << hex << b.compact_block_size << dec; 137 } 138 s << " " << b.gflops; 139 return s; 140 } 141 142 // We sort first by increasing benchmark parameters, 143 // then by decreasing performance. 144 bool operator<(const benchmark_t& b1, const benchmark_t& b2) 145 { 146 return b1.compact_product_size < b2.compact_product_size || 147 (b1.compact_product_size == b2.compact_product_size && ( 148 (b1.compact_block_size < b2.compact_block_size || ( 149 b1.compact_block_size == b2.compact_block_size && 150 b1.gflops > b2.gflops)))); 151 } 152 153 void benchmark_t::run() 154 { 155 size_triple_t productsizes(compact_product_size); 156 157 if (use_default_block_size) { 158 eigen_use_specific_block_size = false; 159 } else { 160 // feed eigen with our custom blocking params 161 eigen_use_specific_block_size = true; 162 size_triple_t blocksizes(compact_block_size); 163 eigen_block_size_k = blocksizes.k; 164 eigen_block_size_m = blocksizes.m; 165 eigen_block_size_n = blocksizes.n; 166 } 167 168 // set up the matrix pool 169 170 const size_t combined_three_matrices_sizes = 171 sizeof(Scalar) * 172 (productsizes.k * productsizes.m + 173 productsizes.k * productsizes.n + 174 productsizes.m * productsizes.n); 175 176 // 64 M is large enough that nobody has a cache bigger than that, 177 // while still being small enough that everybody has this much RAM, 178 // so conveniently we don't need to special-case platforms here. 179 const size_t unlikely_large_cache_size = 64 << 20; 180 181 const size_t working_set_size = 182 min_working_set_size ? min_working_set_size : unlikely_large_cache_size; 183 184 const size_t matrix_pool_size = 185 1 + working_set_size / combined_three_matrices_sizes; 186 187 MatrixType *lhs = new MatrixType[matrix_pool_size]; 188 MatrixType *rhs = new MatrixType[matrix_pool_size]; 189 MatrixType *dst = new MatrixType[matrix_pool_size]; 190 191 for (size_t i = 0; i < matrix_pool_size; i++) { 192 lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k); 193 rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n); 194 dst[i] = MatrixType::Zero(productsizes.m, productsizes.n); 195 } 196 197 // main benchmark loop 198 199 int iters_at_a_time = 1; 200 float time_per_iter = 0.0f; 201 size_t matrix_index = 0; 202 while (true) { 203 204 double starttime = timer.getCpuTime(); 205 for (int i = 0; i < iters_at_a_time; i++) { 206 dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index]; 207 matrix_index++; 208 if (matrix_index == matrix_pool_size) { 209 matrix_index = 0; 210 } 211 } 212 double endtime = timer.getCpuTime(); 213 214 const float timing = float(endtime - starttime); 215 216 if (timing >= min_accurate_time) { 217 time_per_iter = timing / iters_at_a_time; 218 break; 219 } 220 221 iters_at_a_time *= 2; 222 } 223 224 delete[] lhs; 225 delete[] rhs; 226 delete[] dst; 227 228 gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter; 229 } 230 231 void print_cpuinfo() 232 { 233 #ifdef __linux__ 234 cout << "contents of /proc/cpuinfo:" << endl; 235 string line; 236 ifstream cpuinfo("/proc/cpuinfo"); 237 if (cpuinfo.is_open()) { 238 while (getline(cpuinfo, line)) { 239 cout << line << endl; 240 } 241 cpuinfo.close(); 242 } 243 cout << endl; 244 #elif defined __APPLE__ 245 cout << "output of sysctl hw:" << endl; 246 system("sysctl hw"); 247 cout << endl; 248 #endif 249 } 250 251 template <typename T> 252 string type_name() 253 { 254 return "unknown"; 255 } 256 257 template<> 258 string type_name<float>() 259 { 260 return "float"; 261 } 262 263 template<> 264 string type_name<double>() 265 { 266 return "double"; 267 } 268 269 struct action_t 270 { 271 virtual const char* invokation_name() const { abort(); return nullptr; } 272 virtual void run() const { abort(); } 273 virtual ~action_t() {} 274 }; 275 276 void show_usage_and_exit(int /*argc*/, char* argv[], 277 const vector<unique_ptr<action_t>>& available_actions) 278 { 279 cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl; 280 cerr << "available actions:" << endl << endl; 281 for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { 282 cerr << " " << (*it)->invokation_name() << endl; 283 } 284 cerr << endl; 285 cerr << "options:" << endl << endl; 286 cerr << " --min-working-set-size=N:" << endl; 287 cerr << " Set the minimum working set size to N bytes." << endl; 288 cerr << " This is rounded up as needed to a multiple of matrix size." << endl; 289 cerr << " A larger working set lowers the chance of a warm cache." << endl; 290 cerr << " The default value 0 means use a large enough working" << endl; 291 cerr << " set to likely outsize caches." << endl; 292 cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl; 293 cerr << " avoid warm caches." << endl; 294 exit(1); 295 } 296 297 float measure_clock_speed() 298 { 299 cerr << "Measuring clock speed... \r" << flush; 300 301 vector<float> all_gflops; 302 for (int i = 0; i < 8; i++) { 303 benchmark_t b(1024, 1024, 1024); 304 b.run(); 305 all_gflops.push_back(b.gflops); 306 } 307 308 sort(all_gflops.begin(), all_gflops.end()); 309 float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5]; 310 311 // multiply by an arbitrary constant to discourage trying doing anything with the 312 // returned values besides just comparing them with each other. 313 float result = stable_estimate * 123.456f; 314 315 return result; 316 } 317 318 struct human_duration_t 319 { 320 int seconds; 321 human_duration_t(int s) : seconds(s) {} 322 }; 323 324 ostream& operator<<(ostream& s, const human_duration_t& d) 325 { 326 int remainder = d.seconds; 327 if (remainder > 3600) { 328 int hours = remainder / 3600; 329 s << hours << " h "; 330 remainder -= hours * 3600; 331 } 332 if (remainder > 60) { 333 int minutes = remainder / 60; 334 s << minutes << " min "; 335 remainder -= minutes * 60; 336 } 337 if (d.seconds < 600) { 338 s << remainder << " s"; 339 } 340 return s; 341 } 342 343 const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data"; 344 345 void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run) 346 { 347 FILE* file = fopen(filename, "w"); 348 if (!file) { 349 cerr << "Could not open file " << filename << " for writing." << endl; 350 cerr << "Do you have write permissions on the current working directory?" << endl; 351 exit(1); 352 } 353 size_t benchmarks_vector_size = benchmarks.size(); 354 fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file); 355 fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file); 356 fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file); 357 fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file); 358 fclose(file); 359 } 360 361 bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run) 362 { 363 FILE* file = fopen(filename, "r"); 364 if (!file) { 365 return false; 366 } 367 if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) { 368 return false; 369 } 370 size_t benchmarks_vector_size = 0; 371 if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) { 372 return false; 373 } 374 if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) { 375 return false; 376 } 377 benchmarks.resize(benchmarks_vector_size); 378 if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) { 379 return false; 380 } 381 unlink(filename); 382 return true; 383 } 384 385 void try_run_some_benchmarks( 386 vector<benchmark_t>& benchmarks, 387 double time_start, 388 size_t& first_benchmark_to_run) 389 { 390 if (first_benchmark_to_run == benchmarks.size()) { 391 return; 392 } 393 394 double time_last_progress_update = 0; 395 double time_last_clock_speed_measurement = 0; 396 double time_now = 0; 397 398 size_t benchmark_index = first_benchmark_to_run; 399 400 while (true) { 401 float ratio_done = float(benchmark_index) / benchmarks.size(); 402 time_now = timer.getRealTime(); 403 404 // We check clock speed every minute and at the end. 405 if (benchmark_index == benchmarks.size() || 406 time_now > time_last_clock_speed_measurement + 60.0f) 407 { 408 time_last_clock_speed_measurement = time_now; 409 410 // Ensure that clock speed is as expected 411 float current_clock_speed = measure_clock_speed(); 412 413 // The tolerance needs to be smaller than the relative difference between 414 // clock speeds that a device could operate under. 415 // It seems unlikely that a device would be throttling clock speeds by 416 // amounts smaller than 2%. 417 // With a value of 1%, I was getting within noise on a Sandy Bridge. 418 const float clock_speed_tolerance = 0.02f; 419 420 if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) { 421 // Clock speed is now higher than we previously measured. 422 // Either our initial measurement was inaccurate, which won't happen 423 // too many times as we are keeping the best clock speed value and 424 // and allowing some tolerance; or something really weird happened, 425 // which invalidates all benchmark results collected so far. 426 // Either way, we better restart all over again now. 427 if (benchmark_index) { 428 cerr << "Restarting at " << 100.0f * ratio_done 429 << " % because clock speed increased. " << endl; 430 } 431 max_clock_speed = current_clock_speed; 432 first_benchmark_to_run = 0; 433 return; 434 } 435 436 bool rerun_last_tests = false; 437 438 if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { 439 cerr << "Measurements completed so far: " 440 << 100.0f * ratio_done 441 << " % " << endl; 442 cerr << "Clock speed seems to be only " 443 << current_clock_speed/max_clock_speed 444 << " times what it used to be." << endl; 445 446 unsigned int seconds_to_sleep_if_lower_clock_speed = 1; 447 448 while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { 449 if (seconds_to_sleep_if_lower_clock_speed > 32) { 450 cerr << "Sleeping longer probably won't make a difference." << endl; 451 cerr << "Serializing benchmarks to " << session_filename << endl; 452 serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run); 453 cerr << "Now restart this benchmark, and it should pick up where we left." << endl; 454 exit(2); 455 } 456 rerun_last_tests = true; 457 cerr << "Sleeping " 458 << seconds_to_sleep_if_lower_clock_speed 459 << " s... \r" << endl; 460 sleep(seconds_to_sleep_if_lower_clock_speed); 461 current_clock_speed = measure_clock_speed(); 462 seconds_to_sleep_if_lower_clock_speed *= 2; 463 } 464 } 465 466 if (rerun_last_tests) { 467 cerr << "Redoing the last " 468 << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() 469 << " % because clock speed had been low. " << endl; 470 return; 471 } 472 473 // nothing wrong with the clock speed so far, so there won't be a need to rerun 474 // benchmarks run so far in case we later encounter a lower clock speed. 475 first_benchmark_to_run = benchmark_index; 476 } 477 478 if (benchmark_index == benchmarks.size()) { 479 // We're done! 480 first_benchmark_to_run = benchmarks.size(); 481 // Erase progress info 482 cerr << " " << endl; 483 return; 484 } 485 486 // Display progress info on stderr 487 if (time_now > time_last_progress_update + 1.0f) { 488 time_last_progress_update = time_now; 489 cerr << "Measurements... " << 100.0f * ratio_done 490 << " %, ETA " 491 << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done) 492 << " \r" << flush; 493 } 494 495 // This is where we actually run a benchmark! 496 benchmarks[benchmark_index].run(); 497 benchmark_index++; 498 } 499 } 500 501 void run_benchmarks(vector<benchmark_t>& benchmarks) 502 { 503 size_t first_benchmark_to_run; 504 vector<benchmark_t> deserialized_benchmarks; 505 bool use_deserialized_benchmarks = false; 506 if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) { 507 cerr << "Found serialized session with " 508 << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() 509 << " % already done" << endl; 510 if (deserialized_benchmarks.size() == benchmarks.size() && 511 first_benchmark_to_run > 0 && 512 first_benchmark_to_run < benchmarks.size()) 513 { 514 use_deserialized_benchmarks = true; 515 } 516 } 517 518 if (use_deserialized_benchmarks) { 519 benchmarks = deserialized_benchmarks; 520 } else { 521 // not using deserialized benchmarks, starting from scratch 522 first_benchmark_to_run = 0; 523 524 // Randomly shuffling benchmarks allows us to get accurate enough progress info, 525 // as now the cheap/expensive benchmarks are randomly mixed so they average out. 526 // It also means that if data is corrupted for some time span, the odds are that 527 // not all repetitions of a given benchmark will be corrupted. 528 random_shuffle(benchmarks.begin(), benchmarks.end()); 529 } 530 531 for (int i = 0; i < 4; i++) { 532 max_clock_speed = max(max_clock_speed, measure_clock_speed()); 533 } 534 535 double time_start = 0.0; 536 while (first_benchmark_to_run < benchmarks.size()) { 537 if (first_benchmark_to_run == 0) { 538 time_start = timer.getRealTime(); 539 } 540 try_run_some_benchmarks(benchmarks, 541 time_start, 542 first_benchmark_to_run); 543 } 544 545 // Sort timings by increasing benchmark parameters, and decreasing gflops. 546 // The latter is very important. It means that we can ignore all but the first 547 // benchmark with given parameters. 548 sort(benchmarks.begin(), benchmarks.end()); 549 550 // Collect best (i.e. now first) results for each parameter values. 551 vector<benchmark_t> best_benchmarks; 552 for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { 553 if (best_benchmarks.empty() || 554 best_benchmarks.back().compact_product_size != it->compact_product_size || 555 best_benchmarks.back().compact_block_size != it->compact_block_size) 556 { 557 best_benchmarks.push_back(*it); 558 } 559 } 560 561 // keep and return only the best benchmarks 562 benchmarks = best_benchmarks; 563 } 564 565 struct measure_all_pot_sizes_action_t : action_t 566 { 567 virtual const char* invokation_name() const { return "all-pot-sizes"; } 568 virtual void run() const 569 { 570 vector<benchmark_t> benchmarks; 571 for (int repetition = 0; repetition < measurement_repetitions; repetition++) { 572 for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { 573 for (size_t msize = minsize; msize <= maxsize; msize *= 2) { 574 for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { 575 for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) { 576 for (size_t mblock = minsize; mblock <= msize; mblock *= 2) { 577 for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) { 578 benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock); 579 } 580 } 581 } 582 } 583 } 584 } 585 } 586 587 run_benchmarks(benchmarks); 588 589 cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl; 590 for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { 591 cout << *it << endl; 592 } 593 } 594 }; 595 596 struct measure_default_sizes_action_t : action_t 597 { 598 virtual const char* invokation_name() const { return "default-sizes"; } 599 virtual void run() const 600 { 601 vector<benchmark_t> benchmarks; 602 for (int repetition = 0; repetition < measurement_repetitions; repetition++) { 603 for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { 604 for (size_t msize = minsize; msize <= maxsize; msize *= 2) { 605 for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { 606 benchmarks.emplace_back(ksize, msize, nsize); 607 } 608 } 609 } 610 } 611 612 run_benchmarks(benchmarks); 613 614 cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl; 615 for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { 616 cout << *it << endl; 617 } 618 } 619 }; 620 621 int main(int argc, char* argv[]) 622 { 623 double time_start = timer.getRealTime(); 624 cout.precision(4); 625 cerr.precision(4); 626 627 vector<unique_ptr<action_t>> available_actions; 628 available_actions.emplace_back(new measure_all_pot_sizes_action_t); 629 available_actions.emplace_back(new measure_default_sizes_action_t); 630 631 auto action = available_actions.end(); 632 633 if (argc <= 1) { 634 show_usage_and_exit(argc, argv, available_actions); 635 } 636 for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { 637 if (!strcmp(argv[1], (*it)->invokation_name())) { 638 action = it; 639 break; 640 } 641 } 642 643 if (action == available_actions.end()) { 644 show_usage_and_exit(argc, argv, available_actions); 645 } 646 647 for (int i = 2; i < argc; i++) { 648 if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { 649 const char* equals_sign = strchr(argv[i], '='); 650 min_working_set_size = strtoul(equals_sign+1, nullptr, 10); 651 } else { 652 cerr << "unrecognized option: " << argv[i] << endl << endl; 653 show_usage_and_exit(argc, argv, available_actions); 654 } 655 } 656 657 print_cpuinfo(); 658 659 cout << "benchmark parameters:" << endl; 660 cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl; 661 cout << "scalar type: " << type_name<Scalar>() << endl; 662 cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl; 663 cout << "minsize = " << minsize << endl; 664 cout << "maxsize = " << maxsize << endl; 665 cout << "measurement_repetitions = " << measurement_repetitions << endl; 666 cout << "min_accurate_time = " << min_accurate_time << endl; 667 cout << "min_working_set_size = " << min_working_set_size; 668 if (min_working_set_size == 0) { 669 cout << " (try to outsize caches)"; 670 } 671 cout << endl << endl; 672 673 (*action)->run(); 674 675 double time_end = timer.getRealTime(); 676 cerr << "Finished in " << human_duration_t(time_end - time_start) << endl; 677 } 678