1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Support for registering benchmarks for functions. 16 17 /* Example usage: 18 // Define a function that executes the code to be measured a 19 // specified number of times: 20 static void BM_StringCreation(benchmark::State& state) { 21 for (auto _ : state) 22 std::string empty_string; 23 } 24 25 // Register the function as a benchmark 26 BENCHMARK(BM_StringCreation); 27 28 // Define another benchmark 29 static void BM_StringCopy(benchmark::State& state) { 30 std::string x = "hello"; 31 for (auto _ : state) 32 std::string copy(x); 33 } 34 BENCHMARK(BM_StringCopy); 35 36 // Augment the main() program to invoke benchmarks if specified 37 // via the --benchmarks command line flag. E.g., 38 // my_unittest --benchmark_filter=all 39 // my_unittest --benchmark_filter=BM_StringCreation 40 // my_unittest --benchmark_filter=String 41 // my_unittest --benchmark_filter='Copy|Creation' 42 int main(int argc, char** argv) { 43 benchmark::Initialize(&argc, argv); 44 benchmark::RunSpecifiedBenchmarks(); 45 return 0; 46 } 47 48 // Sometimes a family of microbenchmarks can be implemented with 49 // just one routine that takes an extra argument to specify which 50 // one of the family of benchmarks to run. For example, the following 51 // code defines a family of microbenchmarks for measuring the speed 52 // of memcpy() calls of different lengths: 53 54 static void BM_memcpy(benchmark::State& state) { 55 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)]; 56 memset(src, 'x', state.range(0)); 57 for (auto _ : state) 58 memcpy(dst, src, state.range(0)); 59 state.SetBytesProcessed(int64_t(state.iterations()) * 60 int64_t(state.range(0))); 61 delete[] src; delete[] dst; 62 } 63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); 64 65 // The preceding code is quite repetitive, and can be replaced with the 66 // following short-hand. The following invocation will pick a few 67 // appropriate arguments in the specified range and will generate a 68 // microbenchmark for each such argument. 69 BENCHMARK(BM_memcpy)->Range(8, 8<<10); 70 71 // You might have a microbenchmark that depends on two inputs. For 72 // example, the following code defines a family of microbenchmarks for 73 // measuring the speed of set insertion. 74 static void BM_SetInsert(benchmark::State& state) { 75 set<int> data; 76 for (auto _ : state) { 77 state.PauseTiming(); 78 data = ConstructRandomSet(state.range(0)); 79 state.ResumeTiming(); 80 for (int j = 0; j < state.range(1); ++j) 81 data.insert(RandomNumber()); 82 } 83 } 84 BENCHMARK(BM_SetInsert) 85 ->Args({1<<10, 128}) 86 ->Args({2<<10, 128}) 87 ->Args({4<<10, 128}) 88 ->Args({8<<10, 128}) 89 ->Args({1<<10, 512}) 90 ->Args({2<<10, 512}) 91 ->Args({4<<10, 512}) 92 ->Args({8<<10, 512}); 93 94 // The preceding code is quite repetitive, and can be replaced with 95 // the following short-hand. The following macro will pick a few 96 // appropriate arguments in the product of the two specified ranges 97 // and will generate a microbenchmark for each such pair. 98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); 99 100 // For more complex patterns of inputs, passing a custom function 101 // to Apply allows programmatic specification of an 102 // arbitrary set of arguments to run the microbenchmark on. 103 // The following example enumerates a dense range on 104 // one parameter, and a sparse range on the second. 105 static void CustomArguments(benchmark::internal::Benchmark* b) { 106 for (int i = 0; i <= 10; ++i) 107 for (int j = 32; j <= 1024*1024; j *= 8) 108 b->Args({i, j}); 109 } 110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments); 111 112 // Templated microbenchmarks work the same way: 113 // Produce then consume 'size' messages 'iters' times 114 // Measures throughput in the absence of multiprogramming. 115 template <class Q> int BM_Sequential(benchmark::State& state) { 116 Q q; 117 typename Q::value_type v; 118 for (auto _ : state) { 119 for (int i = state.range(0); i--; ) 120 q.push(v); 121 for (int e = state.range(0); e--; ) 122 q.Wait(&v); 123 } 124 // actually messages, not bytes: 125 state.SetBytesProcessed( 126 static_cast<int64_t>(state.iterations())*state.range(0)); 127 } 128 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10); 129 130 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the 131 benchmark. This option overrides the `benchmark_min_time` flag. 132 133 void BM_test(benchmark::State& state) { 134 ... body ... 135 } 136 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds. 137 138 In a multithreaded test, it is guaranteed that none of the threads will start 139 until all have reached the loop start, and all will have finished before any 140 thread exits the loop body. As such, any global setup or teardown you want to 141 do can be wrapped in a check against the thread index: 142 143 static void BM_MultiThreaded(benchmark::State& state) { 144 if (state.thread_index == 0) { 145 // Setup code here. 146 } 147 for (auto _ : state) { 148 // Run the test as normal. 149 } 150 if (state.thread_index == 0) { 151 // Teardown code here. 152 } 153 } 154 BENCHMARK(BM_MultiThreaded)->Threads(4); 155 156 157 If a benchmark runs a few milliseconds it may be hard to visually compare the 158 measured times, since the output data is given in nanoseconds per default. In 159 order to manually set the time unit, you can specify it manually: 160 161 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); 162 */ 163 164 #ifndef BENCHMARK_BENCHMARK_H_ 165 #define BENCHMARK_BENCHMARK_H_ 166 167 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. 168 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) 169 #define BENCHMARK_HAS_CXX11 170 #endif 171 172 #include <stdint.h> 173 174 #include <algorithm> 175 #include <cassert> 176 #include <cstddef> 177 #include <iosfwd> 178 #include <map> 179 #include <set> 180 #include <string> 181 #include <vector> 182 183 #if defined(BENCHMARK_HAS_CXX11) 184 #include <initializer_list> 185 #include <type_traits> 186 #include <utility> 187 #endif 188 189 #if defined(_MSC_VER) 190 #include <intrin.h> // for _ReadWriteBarrier 191 #endif 192 193 #ifndef BENCHMARK_HAS_CXX11 194 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ 195 TypeName(const TypeName&); \ 196 TypeName& operator=(const TypeName&) 197 #else 198 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ 199 TypeName(const TypeName&) = delete; \ 200 TypeName& operator=(const TypeName&) = delete 201 #endif 202 203 #if defined(__GNUC__) 204 #define BENCHMARK_UNUSED __attribute__((unused)) 205 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) 206 #define BENCHMARK_NOEXCEPT noexcept 207 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) 208 #elif defined(_MSC_VER) && !defined(__clang__) 209 #define BENCHMARK_UNUSED 210 #define BENCHMARK_ALWAYS_INLINE __forceinline 211 #if _MSC_VER >= 1900 212 #define BENCHMARK_NOEXCEPT noexcept 213 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) 214 #else 215 #define BENCHMARK_NOEXCEPT 216 #define BENCHMARK_NOEXCEPT_OP(x) 217 #endif 218 #define __func__ __FUNCTION__ 219 #else 220 #define BENCHMARK_UNUSED 221 #define BENCHMARK_ALWAYS_INLINE 222 #define BENCHMARK_NOEXCEPT 223 #define BENCHMARK_NOEXCEPT_OP(x) 224 #endif 225 226 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x 227 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) 228 229 #if defined(__GNUC__) || defined(__clang__) 230 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) 231 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) 232 #else 233 #define BENCHMARK_BUILTIN_EXPECT(x, y) x 234 #define BENCHMARK_DEPRECATED_MSG(msg) 235 #define BENCHMARK_WARNING_MSG(msg) \ 236 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ 237 __LINE__) ") : warning note: " msg)) 238 #endif 239 240 #if defined(__GNUC__) && !defined(__clang__) 241 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 242 #endif 243 244 #ifndef __has_builtin 245 #define __has_builtin(x) 0 246 #endif 247 248 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable) 249 #define BENCHMARK_UNREACHABLE() __builtin_unreachable() 250 #elif defined(_MSC_VER) 251 #define BENCHMARK_UNREACHABLE() __assume(false) 252 #else 253 #define BENCHMARK_UNREACHABLE() ((void)0) 254 #endif 255 256 namespace benchmark { 257 class BenchmarkReporter; 258 class MemoryManager; 259 260 void Initialize(int* argc, char** argv); 261 262 // Report to stdout all arguments in 'argv' as unrecognized except the first. 263 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). 264 bool ReportUnrecognizedArguments(int argc, char** argv); 265 266 // Generate a list of benchmarks matching the specified --benchmark_filter flag 267 // and if --benchmark_list_tests is specified return after printing the name 268 // of each matching benchmark. Otherwise run each matching benchmark and 269 // report the results. 270 // 271 // The second and third overload use the specified 'display_reporter' and 272 // 'file_reporter' respectively. 'file_reporter' will write to the file 273 // specified 274 // by '--benchmark_output'. If '--benchmark_output' is not given the 275 // 'file_reporter' is ignored. 276 // 277 // RETURNS: The number of matching benchmarks. 278 size_t RunSpecifiedBenchmarks(); 279 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); 280 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, 281 BenchmarkReporter* file_reporter); 282 283 // Register a MemoryManager instance that will be used to collect and report 284 // allocation measurements for benchmark runs. 285 void RegisterMemoryManager(MemoryManager* memory_manager); 286 287 namespace internal { 288 class Benchmark; 289 class BenchmarkImp; 290 class BenchmarkFamilies; 291 292 void UseCharPointer(char const volatile*); 293 294 // Take ownership of the pointer and register the benchmark. Return the 295 // registered benchmark. 296 Benchmark* RegisterBenchmarkInternal(Benchmark*); 297 298 // Ensure that the standard streams are properly initialized in every TU. 299 int InitializeStreams(); 300 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); 301 302 } // namespace internal 303 304 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \ 305 defined(__EMSCRIPTEN__) 306 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY 307 #endif 308 309 // The DoNotOptimize(...) function can be used to prevent a value or 310 // expression from being optimized away by the compiler. This function is 311 // intended to add little to no overhead. 312 // See: https://youtu.be/nXaxk27zwlk?t=2441 313 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY 314 template <class Tp> 315 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { 316 asm volatile("" : : "r,m"(value) : "memory"); 317 } 318 319 template <class Tp> 320 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { 321 #if defined(__clang__) 322 asm volatile("" : "+r,m"(value) : : "memory"); 323 #else 324 asm volatile("" : "+m,r"(value) : : "memory"); 325 #endif 326 } 327 328 // Force the compiler to flush pending writes to global memory. Acts as an 329 // effective read/write barrier 330 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { 331 asm volatile("" : : : "memory"); 332 } 333 #elif defined(_MSC_VER) 334 template <class Tp> 335 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { 336 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); 337 _ReadWriteBarrier(); 338 } 339 340 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } 341 #else 342 template <class Tp> 343 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { 344 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); 345 } 346 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers 347 #endif 348 349 // This class is used for user-defined counters. 350 class Counter { 351 public: 352 enum Flags { 353 kDefaults = 0, 354 // Mark the counter as a rate. It will be presented divided 355 // by the duration of the benchmark. 356 kIsRate = 1U << 0U, 357 // Mark the counter as a thread-average quantity. It will be 358 // presented divided by the number of threads. 359 kAvgThreads = 1U << 1U, 360 // Mark the counter as a thread-average rate. See above. 361 kAvgThreadsRate = kIsRate | kAvgThreads, 362 // Mark the counter as a constant value, valid/same for *every* iteration. 363 // When reporting, it will be *multiplied* by the iteration count. 364 kIsIterationInvariant = 1U << 2U, 365 // Mark the counter as a constant rate. 366 // When reporting, it will be *multiplied* by the iteration count 367 // and then divided by the duration of the benchmark. 368 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, 369 // Mark the counter as a iteration-average quantity. 370 // It will be presented divided by the number of iterations. 371 kAvgIterations = 1U << 3U, 372 // Mark the counter as a iteration-average rate. See above. 373 kAvgIterationsRate = kIsRate | kAvgIterations 374 }; 375 376 enum OneK { 377 // 1'000 items per 1k 378 kIs1000 = 1000, 379 // 1'024 items per 1k 380 kIs1024 = 1024 381 }; 382 383 double value; 384 Flags flags; 385 OneK oneK; 386 387 BENCHMARK_ALWAYS_INLINE 388 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) 389 : value(v), flags(f), oneK(k) {} 390 391 BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } 392 BENCHMARK_ALWAYS_INLINE operator double&() { return value; } 393 }; 394 395 // A helper for user code to create unforeseen combinations of Flags, without 396 // having to do this cast manually each time, or providing this operator. 397 Counter::Flags inline operator|(const Counter::Flags& LHS, 398 const Counter::Flags& RHS) { 399 return static_cast<Counter::Flags>(static_cast<int>(LHS) | 400 static_cast<int>(RHS)); 401 } 402 403 // This is the container for the user-defined counters. 404 typedef std::map<std::string, Counter> UserCounters; 405 406 // TimeUnit is passed to a benchmark in order to specify the order of magnitude 407 // for the measured time. 408 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond }; 409 410 // BigO is passed to a benchmark in order to specify the asymptotic 411 // computational 412 // complexity for the benchmark. In case oAuto is selected, complexity will be 413 // calculated automatically to the best fit. 414 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; 415 416 // BigOFunc is passed to a benchmark in order to specify the asymptotic 417 // computational complexity for the benchmark. 418 typedef double(BigOFunc)(int64_t); 419 420 // StatisticsFunc is passed to a benchmark in order to compute some descriptive 421 // statistics over all the measurements of some type 422 typedef double(StatisticsFunc)(const std::vector<double>&); 423 424 struct Statistics { 425 std::string name_; 426 StatisticsFunc* compute_; 427 428 Statistics(const std::string& name, StatisticsFunc* compute) 429 : name_(name), compute_(compute) {} 430 }; 431 432 namespace internal { 433 struct BenchmarkInstance; 434 class ThreadTimer; 435 class ThreadManager; 436 437 enum AggregationReportMode 438 #if defined(BENCHMARK_HAS_CXX11) 439 : unsigned 440 #else 441 #endif 442 { 443 // The mode has not been manually specified 444 ARM_Unspecified = 0, 445 // The mode is user-specified. 446 // This may or may not be set when the following bit-flags are set. 447 ARM_Default = 1U << 0U, 448 // File reporter should only output aggregates. 449 ARM_FileReportAggregatesOnly = 1U << 1U, 450 // Display reporter should only output aggregates 451 ARM_DisplayReportAggregatesOnly = 1U << 2U, 452 // Both reporters should only display aggregates. 453 ARM_ReportAggregatesOnly = 454 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly 455 }; 456 457 } // namespace internal 458 459 // State is passed to a running Benchmark and contains state for the 460 // benchmark to use. 461 class State { 462 public: 463 struct StateIterator; 464 friend struct StateIterator; 465 466 // Returns iterators used to run each iteration of a benchmark using a 467 // C++11 ranged-based for loop. These functions should not be called directly. 468 // 469 // REQUIRES: The benchmark has not started running yet. Neither begin nor end 470 // have been called previously. 471 // 472 // NOTE: KeepRunning may not be used after calling either of these functions. 473 BENCHMARK_ALWAYS_INLINE StateIterator begin(); 474 BENCHMARK_ALWAYS_INLINE StateIterator end(); 475 476 // Returns true if the benchmark should continue through another iteration. 477 // NOTE: A benchmark may not return from the test until KeepRunning() has 478 // returned false. 479 bool KeepRunning(); 480 481 // Returns true iff the benchmark should run n more iterations. 482 // REQUIRES: 'n' > 0. 483 // NOTE: A benchmark must not return from the test until KeepRunningBatch() 484 // has returned false. 485 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations. 486 // 487 // Intended usage: 488 // while (state.KeepRunningBatch(1000)) { 489 // // process 1000 elements 490 // } 491 bool KeepRunningBatch(size_t n); 492 493 // REQUIRES: timer is running and 'SkipWithError(...)' has not been called 494 // by the current thread. 495 // Stop the benchmark timer. If not called, the timer will be 496 // automatically stopped after the last iteration of the benchmark loop. 497 // 498 // For threaded benchmarks the PauseTiming() function only pauses the timing 499 // for the current thread. 500 // 501 // NOTE: The "real time" measurement is per-thread. If different threads 502 // report different measurements the largest one is reported. 503 // 504 // NOTE: PauseTiming()/ResumeTiming() are relatively 505 // heavyweight, and so their use should generally be avoided 506 // within each benchmark iteration, if possible. 507 void PauseTiming(); 508 509 // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called 510 // by the current thread. 511 // Start the benchmark timer. The timer is NOT running on entrance to the 512 // benchmark function. It begins running after control flow enters the 513 // benchmark loop. 514 // 515 // NOTE: PauseTiming()/ResumeTiming() are relatively 516 // heavyweight, and so their use should generally be avoided 517 // within each benchmark iteration, if possible. 518 void ResumeTiming(); 519 520 // REQUIRES: 'SkipWithError(...)' has not been called previously by the 521 // current thread. 522 // Report the benchmark as resulting in an error with the specified 'msg'. 523 // After this call the user may explicitly 'return' from the benchmark. 524 // 525 // If the ranged-for style of benchmark loop is used, the user must explicitly 526 // break from the loop, otherwise all future iterations will be run. 527 // If the 'KeepRunning()' loop is used the current thread will automatically 528 // exit the loop at the end of the current iteration. 529 // 530 // For threaded benchmarks only the current thread stops executing and future 531 // calls to `KeepRunning()` will block until all threads have completed 532 // the `KeepRunning()` loop. If multiple threads report an error only the 533 // first error message is used. 534 // 535 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit 536 // the current scope immediately. If the function is called from within 537 // the 'KeepRunning()' loop the current iteration will finish. It is the users 538 // responsibility to exit the scope as needed. 539 void SkipWithError(const char* msg); 540 541 // REQUIRES: called exactly once per iteration of the benchmarking loop. 542 // Set the manually measured time for this benchmark iteration, which 543 // is used instead of automatically measured time if UseManualTime() was 544 // specified. 545 // 546 // For threaded benchmarks the final value will be set to the largest 547 // reported values. 548 void SetIterationTime(double seconds); 549 550 // Set the number of bytes processed by the current benchmark 551 // execution. This routine is typically called once at the end of a 552 // throughput oriented benchmark. 553 // 554 // REQUIRES: a benchmark has exited its benchmarking loop. 555 BENCHMARK_ALWAYS_INLINE 556 void SetBytesProcessed(int64_t bytes) { 557 counters["bytes_per_second"] = 558 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024); 559 } 560 561 BENCHMARK_ALWAYS_INLINE 562 int64_t bytes_processed() const { 563 if (counters.find("bytes_per_second") != counters.end()) 564 return static_cast<int64_t>(counters.at("bytes_per_second")); 565 return 0; 566 } 567 568 // If this routine is called with complexity_n > 0 and complexity report is 569 // requested for the 570 // family benchmark, then current benchmark will be part of the computation 571 // and complexity_n will 572 // represent the length of N. 573 BENCHMARK_ALWAYS_INLINE 574 void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; } 575 576 BENCHMARK_ALWAYS_INLINE 577 int64_t complexity_length_n() { return complexity_n_; } 578 579 // If this routine is called with items > 0, then an items/s 580 // label is printed on the benchmark report line for the currently 581 // executing benchmark. It is typically called at the end of a processing 582 // benchmark where a processing items/second output is desired. 583 // 584 // REQUIRES: a benchmark has exited its benchmarking loop. 585 BENCHMARK_ALWAYS_INLINE 586 void SetItemsProcessed(int64_t items) { 587 counters["items_per_second"] = 588 Counter(static_cast<double>(items), benchmark::Counter::kIsRate); 589 } 590 591 BENCHMARK_ALWAYS_INLINE 592 int64_t items_processed() const { 593 if (counters.find("items_per_second") != counters.end()) 594 return static_cast<int64_t>(counters.at("items_per_second")); 595 return 0; 596 } 597 598 // If this routine is called, the specified label is printed at the 599 // end of the benchmark report line for the currently executing 600 // benchmark. Example: 601 // static void BM_Compress(benchmark::State& state) { 602 // ... 603 // double compress = input_size / output_size; 604 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression)); 605 // } 606 // Produces output that looks like: 607 // BM_Compress 50 50 14115038 compress:27.3% 608 // 609 // REQUIRES: a benchmark has exited its benchmarking loop. 610 void SetLabel(const char* label); 611 612 void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { 613 this->SetLabel(str.c_str()); 614 } 615 616 // Range arguments for this run. CHECKs if the argument has been set. 617 BENCHMARK_ALWAYS_INLINE 618 int64_t range(std::size_t pos = 0) const { 619 assert(range_.size() > pos); 620 return range_[pos]; 621 } 622 623 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead") 624 int64_t range_x() const { return range(0); } 625 626 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") 627 int64_t range_y() const { return range(1); } 628 629 BENCHMARK_ALWAYS_INLINE 630 size_t iterations() const { 631 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { 632 return 0; 633 } 634 return max_iterations - total_iterations_ + batch_leftover_; 635 } 636 637 private 638 : // items we expect on the first cache line (ie 64 bytes of the struct) 639 // When total_iterations_ is 0, KeepRunning() and friends will return false. 640 // May be larger than max_iterations. 641 size_t total_iterations_; 642 643 // When using KeepRunningBatch(), batch_leftover_ holds the number of 644 // iterations beyond max_iters that were run. Used to track 645 // completed_iterations_ accurately. 646 size_t batch_leftover_; 647 648 public: 649 const size_t max_iterations; 650 651 private: 652 bool started_; 653 bool finished_; 654 bool error_occurred_; 655 656 private: // items we don't need on the first cache line 657 std::vector<int64_t> range_; 658 659 int64_t complexity_n_; 660 661 public: 662 // Container for user-defined counters. 663 UserCounters counters; 664 // Index of the executing thread. Values from [0, threads). 665 const int thread_index; 666 // Number of threads concurrently executing the benchmark. 667 const int threads; 668 669 private: 670 State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i, 671 int n_threads, internal::ThreadTimer* timer, 672 internal::ThreadManager* manager); 673 674 void StartKeepRunning(); 675 // Implementation of KeepRunning() and KeepRunningBatch(). 676 // is_batch must be true unless n is 1. 677 bool KeepRunningInternal(size_t n, bool is_batch); 678 void FinishKeepRunning(); 679 internal::ThreadTimer* timer_; 680 internal::ThreadManager* manager_; 681 682 friend struct internal::BenchmarkInstance; 683 }; 684 685 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { 686 return KeepRunningInternal(1, /*is_batch=*/false); 687 } 688 689 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(size_t n) { 690 return KeepRunningInternal(n, /*is_batch=*/true); 691 } 692 693 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(size_t n, 694 bool is_batch) { 695 // total_iterations_ is set to 0 by the constructor, and always set to a 696 // nonzero value by StartKepRunning(). 697 assert(n > 0); 698 // n must be 1 unless is_batch is true. 699 assert(is_batch || n == 1); 700 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) { 701 total_iterations_ -= n; 702 return true; 703 } 704 if (!started_) { 705 StartKeepRunning(); 706 if (!error_occurred_ && total_iterations_ >= n) { 707 total_iterations_ -= n; 708 return true; 709 } 710 } 711 // For non-batch runs, total_iterations_ must be 0 by now. 712 if (is_batch && total_iterations_ != 0) { 713 batch_leftover_ = n - total_iterations_; 714 total_iterations_ = 0; 715 return true; 716 } 717 FinishKeepRunning(); 718 return false; 719 } 720 721 struct State::StateIterator { 722 struct BENCHMARK_UNUSED Value {}; 723 typedef std::forward_iterator_tag iterator_category; 724 typedef Value value_type; 725 typedef Value reference; 726 typedef Value pointer; 727 typedef std::ptrdiff_t difference_type; 728 729 private: 730 friend class State; 731 BENCHMARK_ALWAYS_INLINE 732 StateIterator() : cached_(0), parent_() {} 733 734 BENCHMARK_ALWAYS_INLINE 735 explicit StateIterator(State* st) 736 : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {} 737 738 public: 739 BENCHMARK_ALWAYS_INLINE 740 Value operator*() const { return Value(); } 741 742 BENCHMARK_ALWAYS_INLINE 743 StateIterator& operator++() { 744 assert(cached_ > 0); 745 --cached_; 746 return *this; 747 } 748 749 BENCHMARK_ALWAYS_INLINE 750 bool operator!=(StateIterator const&) const { 751 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true; 752 parent_->FinishKeepRunning(); 753 return false; 754 } 755 756 private: 757 size_t cached_; 758 State* const parent_; 759 }; 760 761 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() { 762 return StateIterator(this); 763 } 764 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() { 765 StartKeepRunning(); 766 return StateIterator(); 767 } 768 769 namespace internal { 770 771 typedef void(Function)(State&); 772 773 // ------------------------------------------------------ 774 // Benchmark registration object. The BENCHMARK() macro expands 775 // into an internal::Benchmark* object. Various methods can 776 // be called on this object to change the properties of the benchmark. 777 // Each method returns "this" so that multiple method calls can 778 // chained into one expression. 779 class Benchmark { 780 public: 781 virtual ~Benchmark(); 782 783 // Note: the following methods all return "this" so that multiple 784 // method calls can be chained together in one expression. 785 786 // Run this benchmark once with "x" as the extra argument passed 787 // to the function. 788 // REQUIRES: The function passed to the constructor must accept an arg1. 789 Benchmark* Arg(int64_t x); 790 791 // Run this benchmark with the given time unit for the generated output report 792 Benchmark* Unit(TimeUnit unit); 793 794 // Run this benchmark once for a number of values picked from the 795 // range [start..limit]. (start and limit are always picked.) 796 // REQUIRES: The function passed to the constructor must accept an arg1. 797 Benchmark* Range(int64_t start, int64_t limit); 798 799 // Run this benchmark once for all values in the range [start..limit] with 800 // specific step 801 // REQUIRES: The function passed to the constructor must accept an arg1. 802 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1); 803 804 // Run this benchmark once with "args" as the extra arguments passed 805 // to the function. 806 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... 807 Benchmark* Args(const std::vector<int64_t>& args); 808 809 // Equivalent to Args({x, y}) 810 // NOTE: This is a legacy C++03 interface provided for compatibility only. 811 // New code should use 'Args'. 812 Benchmark* ArgPair(int64_t x, int64_t y) { 813 std::vector<int64_t> args; 814 args.push_back(x); 815 args.push_back(y); 816 return Args(args); 817 } 818 819 // Run this benchmark once for a number of values picked from the 820 // ranges [start..limit]. (starts and limits are always picked.) 821 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... 822 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges); 823 824 // Equivalent to ArgNames({name}) 825 Benchmark* ArgName(const std::string& name); 826 827 // Set the argument names to display in the benchmark name. If not called, 828 // only argument values will be shown. 829 Benchmark* ArgNames(const std::vector<std::string>& names); 830 831 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}). 832 // NOTE: This is a legacy C++03 interface provided for compatibility only. 833 // New code should use 'Ranges'. 834 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) { 835 std::vector<std::pair<int64_t, int64_t> > ranges; 836 ranges.push_back(std::make_pair(lo1, hi1)); 837 ranges.push_back(std::make_pair(lo2, hi2)); 838 return Ranges(ranges); 839 } 840 841 // Pass this benchmark object to *func, which can customize 842 // the benchmark by calling various methods like Arg, Args, 843 // Threads, etc. 844 Benchmark* Apply(void (*func)(Benchmark* benchmark)); 845 846 // Set the range multiplier for non-dense range. If not called, the range 847 // multiplier kRangeMultiplier will be used. 848 Benchmark* RangeMultiplier(int multiplier); 849 850 // Set the minimum amount of time to use when running this benchmark. This 851 // option overrides the `benchmark_min_time` flag. 852 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. 853 Benchmark* MinTime(double t); 854 855 // Specify the amount of iterations that should be run by this benchmark. 856 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. 857 // 858 // NOTE: This function should only be used when *exact* iteration control is 859 // needed and never to control or limit how long a benchmark runs, where 860 // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. 861 Benchmark* Iterations(size_t n); 862 863 // Specify the amount of times to repeat this benchmark. This option overrides 864 // the `benchmark_repetitions` flag. 865 // REQUIRES: `n > 0` 866 Benchmark* Repetitions(int n); 867 868 // Specify if each repetition of the benchmark should be reported separately 869 // or if only the final statistics should be reported. If the benchmark 870 // is not repeated then the single result is always reported. 871 // Applies to *ALL* reporters (display and file). 872 Benchmark* ReportAggregatesOnly(bool value = true); 873 874 // Same as ReportAggregatesOnly(), but applies to display reporter only. 875 Benchmark* DisplayAggregatesOnly(bool value = true); 876 877 // If a particular benchmark is I/O bound, runs multiple threads internally or 878 // if for some reason CPU timings are not representative, call this method. If 879 // called, the elapsed time will be used to control how many iterations are 880 // run, and in the printing of items/second or MB/seconds values. If not 881 // called, the cpu time used by the benchmark will be used. 882 Benchmark* UseRealTime(); 883 884 // If a benchmark must measure time manually (e.g. if GPU execution time is 885 // being 886 // measured), call this method. If called, each benchmark iteration should 887 // call 888 // SetIterationTime(seconds) to report the measured time, which will be used 889 // to control how many iterations are run, and in the printing of items/second 890 // or MB/second values. 891 Benchmark* UseManualTime(); 892 893 // Set the asymptotic computational complexity for the benchmark. If called 894 // the asymptotic computational complexity will be shown on the output. 895 Benchmark* Complexity(BigO complexity = benchmark::oAuto); 896 897 // Set the asymptotic computational complexity for the benchmark. If called 898 // the asymptotic computational complexity will be shown on the output. 899 Benchmark* Complexity(BigOFunc* complexity); 900 901 // Add this statistics to be computed over all the values of benchmark run 902 Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); 903 904 // Support for running multiple copies of the same benchmark concurrently 905 // in multiple threads. This may be useful when measuring the scaling 906 // of some piece of code. 907 908 // Run one instance of this benchmark concurrently in t threads. 909 Benchmark* Threads(int t); 910 911 // Pick a set of values T from [min_threads,max_threads]. 912 // min_threads and max_threads are always included in T. Run this 913 // benchmark once for each value in T. The benchmark run for a 914 // particular value t consists of t threads running the benchmark 915 // function concurrently. For example, consider: 916 // BENCHMARK(Foo)->ThreadRange(1,16); 917 // This will run the following benchmarks: 918 // Foo in 1 thread 919 // Foo in 2 threads 920 // Foo in 4 threads 921 // Foo in 8 threads 922 // Foo in 16 threads 923 Benchmark* ThreadRange(int min_threads, int max_threads); 924 925 // For each value n in the range, run this benchmark once using n threads. 926 // min_threads and max_threads are always included in the range. 927 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts 928 // a benchmark with 1, 4, 7 and 8 threads. 929 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1); 930 931 // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) 932 Benchmark* ThreadPerCpu(); 933 934 virtual void Run(State& state) = 0; 935 936 protected: 937 explicit Benchmark(const char* name); 938 Benchmark(Benchmark const&); 939 void SetName(const char* name); 940 941 int ArgsCnt() const; 942 943 private: 944 friend class BenchmarkFamilies; 945 946 std::string name_; 947 AggregationReportMode aggregation_report_mode_; 948 std::vector<std::string> arg_names_; // Args for all benchmark runs 949 std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs 950 TimeUnit time_unit_; 951 int range_multiplier_; 952 double min_time_; 953 size_t iterations_; 954 int repetitions_; 955 bool use_real_time_; 956 bool use_manual_time_; 957 BigO complexity_; 958 BigOFunc* complexity_lambda_; 959 std::vector<Statistics> statistics_; 960 std::vector<int> thread_counts_; 961 962 Benchmark& operator=(Benchmark const&); 963 }; 964 965 } // namespace internal 966 967 // Create and register a benchmark with the specified 'name' that invokes 968 // the specified functor 'fn'. 969 // 970 // RETURNS: A pointer to the registered benchmark. 971 internal::Benchmark* RegisterBenchmark(const char* name, 972 internal::Function* fn); 973 974 #if defined(BENCHMARK_HAS_CXX11) 975 template <class Lambda> 976 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn); 977 #endif 978 979 // Remove all registered benchmarks. All pointers to previously registered 980 // benchmarks are invalidated. 981 void ClearRegisteredBenchmarks(); 982 983 namespace internal { 984 // The class used to hold all Benchmarks created from static function. 985 // (ie those created using the BENCHMARK(...) macros. 986 class FunctionBenchmark : public Benchmark { 987 public: 988 FunctionBenchmark(const char* name, Function* func) 989 : Benchmark(name), func_(func) {} 990 991 virtual void Run(State& st); 992 993 private: 994 Function* func_; 995 }; 996 997 #ifdef BENCHMARK_HAS_CXX11 998 template <class Lambda> 999 class LambdaBenchmark : public Benchmark { 1000 public: 1001 virtual void Run(State& st) { lambda_(st); } 1002 1003 private: 1004 template <class OLambda> 1005 LambdaBenchmark(const char* name, OLambda&& lam) 1006 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {} 1007 1008 LambdaBenchmark(LambdaBenchmark const&) = delete; 1009 1010 private: 1011 template <class Lam> 1012 friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); 1013 1014 Lambda lambda_; 1015 }; 1016 #endif 1017 1018 } // namespace internal 1019 1020 inline internal::Benchmark* RegisterBenchmark(const char* name, 1021 internal::Function* fn) { 1022 return internal::RegisterBenchmarkInternal( 1023 ::new internal::FunctionBenchmark(name, fn)); 1024 } 1025 1026 #ifdef BENCHMARK_HAS_CXX11 1027 template <class Lambda> 1028 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { 1029 using BenchType = 1030 internal::LambdaBenchmark<typename std::decay<Lambda>::type>; 1031 return internal::RegisterBenchmarkInternal( 1032 ::new BenchType(name, std::forward<Lambda>(fn))); 1033 } 1034 #endif 1035 1036 #if defined(BENCHMARK_HAS_CXX11) && \ 1037 (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409) 1038 template <class Lambda, class... Args> 1039 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn, 1040 Args&&... args) { 1041 return benchmark::RegisterBenchmark( 1042 name, [=](benchmark::State& st) { fn(st, args...); }); 1043 } 1044 #else 1045 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK 1046 #endif 1047 1048 // The base class for all fixture tests. 1049 class Fixture : public internal::Benchmark { 1050 public: 1051 Fixture() : internal::Benchmark("") {} 1052 1053 virtual void Run(State& st) { 1054 this->SetUp(st); 1055 this->BenchmarkCase(st); 1056 this->TearDown(st); 1057 } 1058 1059 // These will be deprecated ... 1060 virtual void SetUp(const State&) {} 1061 virtual void TearDown(const State&) {} 1062 // ... In favor of these. 1063 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); } 1064 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); } 1065 1066 protected: 1067 virtual void BenchmarkCase(State&) = 0; 1068 }; 1069 1070 } // namespace benchmark 1071 1072 // ------------------------------------------------------ 1073 // Macro to register benchmarks 1074 1075 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1 1076 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be 1077 // empty. If X is empty the expression becomes (+1 == +0). 1078 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) 1079 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ 1080 #else 1081 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ 1082 #endif 1083 1084 // Helpers for generating unique variable names 1085 #define BENCHMARK_PRIVATE_NAME(n) \ 1086 BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) 1087 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) 1088 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c 1089 1090 #define BENCHMARK_PRIVATE_DECLARE(n) \ 1091 static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ 1092 BENCHMARK_UNUSED 1093 1094 #define BENCHMARK(n) \ 1095 BENCHMARK_PRIVATE_DECLARE(n) = \ 1096 (::benchmark::internal::RegisterBenchmarkInternal( \ 1097 new ::benchmark::internal::FunctionBenchmark(#n, n))) 1098 1099 // Old-style macros 1100 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) 1101 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)}) 1102 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t)) 1103 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) 1104 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ 1105 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}}) 1106 1107 #ifdef BENCHMARK_HAS_CXX11 1108 1109 // Register a benchmark which invokes the function specified by `func` 1110 // with the additional arguments specified by `...`. 1111 // 1112 // For example: 1113 // 1114 // template <class ...ExtraArgs>` 1115 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { 1116 // [...] 1117 //} 1118 // /* Registers a benchmark named "BM_takes_args/int_string_test` */ 1119 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); 1120 #define BENCHMARK_CAPTURE(func, test_case_name, ...) \ 1121 BENCHMARK_PRIVATE_DECLARE(func) = \ 1122 (::benchmark::internal::RegisterBenchmarkInternal( \ 1123 new ::benchmark::internal::FunctionBenchmark( \ 1124 #func "/" #test_case_name, \ 1125 [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) 1126 1127 #endif // BENCHMARK_HAS_CXX11 1128 1129 // This will register a benchmark for a templatized function. For example: 1130 // 1131 // template<int arg> 1132 // void BM_Foo(int iters); 1133 // 1134 // BENCHMARK_TEMPLATE(BM_Foo, 1); 1135 // 1136 // will register BM_Foo<1> as a benchmark. 1137 #define BENCHMARK_TEMPLATE1(n, a) \ 1138 BENCHMARK_PRIVATE_DECLARE(n) = \ 1139 (::benchmark::internal::RegisterBenchmarkInternal( \ 1140 new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>))) 1141 1142 #define BENCHMARK_TEMPLATE2(n, a, b) \ 1143 BENCHMARK_PRIVATE_DECLARE(n) = \ 1144 (::benchmark::internal::RegisterBenchmarkInternal( \ 1145 new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \ 1146 n<a, b>))) 1147 1148 #ifdef BENCHMARK_HAS_CXX11 1149 #define BENCHMARK_TEMPLATE(n, ...) \ 1150 BENCHMARK_PRIVATE_DECLARE(n) = \ 1151 (::benchmark::internal::RegisterBenchmarkInternal( \ 1152 new ::benchmark::internal::FunctionBenchmark( \ 1153 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>))) 1154 #else 1155 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) 1156 #endif 1157 1158 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ 1159 class BaseClass##_##Method##_Benchmark : public BaseClass { \ 1160 public: \ 1161 BaseClass##_##Method##_Benchmark() : BaseClass() { \ 1162 this->SetName(#BaseClass "/" #Method); \ 1163 } \ 1164 \ 1165 protected: \ 1166 virtual void BenchmarkCase(::benchmark::State&); \ 1167 }; 1168 1169 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ 1170 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \ 1171 public: \ 1172 BaseClass##_##Method##_Benchmark() : BaseClass<a>() { \ 1173 this->SetName(#BaseClass "<" #a ">/" #Method); \ 1174 } \ 1175 \ 1176 protected: \ 1177 virtual void BenchmarkCase(::benchmark::State&); \ 1178 }; 1179 1180 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ 1181 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \ 1182 public: \ 1183 BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() { \ 1184 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ 1185 } \ 1186 \ 1187 protected: \ 1188 virtual void BenchmarkCase(::benchmark::State&); \ 1189 }; 1190 1191 #ifdef BENCHMARK_HAS_CXX11 1192 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ 1193 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ 1194 public: \ 1195 BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \ 1196 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ 1197 } \ 1198 \ 1199 protected: \ 1200 virtual void BenchmarkCase(::benchmark::State&); \ 1201 }; 1202 #else 1203 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ 1204 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a) 1205 #endif 1206 1207 #define BENCHMARK_DEFINE_F(BaseClass, Method) \ 1208 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ 1209 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1210 1211 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \ 1212 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ 1213 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1214 1215 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \ 1216 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ 1217 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1218 1219 #ifdef BENCHMARK_HAS_CXX11 1220 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \ 1221 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ 1222 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1223 #else 1224 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \ 1225 BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) 1226 #endif 1227 1228 #define BENCHMARK_REGISTER_F(BaseClass, Method) \ 1229 BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark) 1230 1231 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ 1232 BENCHMARK_PRIVATE_DECLARE(TestName) = \ 1233 (::benchmark::internal::RegisterBenchmarkInternal(new TestName())) 1234 1235 // This macro will define and register a benchmark within a fixture class. 1236 #define BENCHMARK_F(BaseClass, Method) \ 1237 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ 1238 BENCHMARK_REGISTER_F(BaseClass, Method); \ 1239 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1240 1241 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \ 1242 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ 1243 BENCHMARK_REGISTER_F(BaseClass, Method); \ 1244 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1245 1246 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \ 1247 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ 1248 BENCHMARK_REGISTER_F(BaseClass, Method); \ 1249 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1250 1251 #ifdef BENCHMARK_HAS_CXX11 1252 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \ 1253 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ 1254 BENCHMARK_REGISTER_F(BaseClass, Method); \ 1255 void BaseClass##_##Method##_Benchmark::BenchmarkCase 1256 #else 1257 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \ 1258 BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) 1259 #endif 1260 1261 // Helper macro to create a main routine in a test that runs the benchmarks 1262 #define BENCHMARK_MAIN() \ 1263 int main(int argc, char** argv) { \ 1264 ::benchmark::Initialize(&argc, argv); \ 1265 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ 1266 ::benchmark::RunSpecifiedBenchmarks(); \ 1267 } \ 1268 int main(int, char**) 1269 1270 // ------------------------------------------------------ 1271 // Benchmark Reporters 1272 1273 namespace benchmark { 1274 1275 struct CPUInfo { 1276 struct CacheInfo { 1277 std::string type; 1278 int level; 1279 int size; 1280 int num_sharing; 1281 }; 1282 1283 int num_cpus; 1284 double cycles_per_second; 1285 std::vector<CacheInfo> caches; 1286 bool scaling_enabled; 1287 std::vector<double> load_avg; 1288 1289 static const CPUInfo& Get(); 1290 1291 private: 1292 CPUInfo(); 1293 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); 1294 }; 1295 1296 //Adding Struct for System Information 1297 struct SystemInfo { 1298 std::string name; 1299 static const SystemInfo& Get(); 1300 private: 1301 SystemInfo(); 1302 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo); 1303 }; 1304 1305 // Interface for custom benchmark result printers. 1306 // By default, benchmark reports are printed to stdout. However an application 1307 // can control the destination of the reports by calling 1308 // RunSpecifiedBenchmarks and passing it a custom reporter object. 1309 // The reporter object must implement the following interface. 1310 class BenchmarkReporter { 1311 public: 1312 struct Context { 1313 CPUInfo const& cpu_info; 1314 SystemInfo const& sys_info; 1315 // The number of chars in the longest benchmark name. 1316 size_t name_field_width; 1317 static const char* executable_name; 1318 Context(); 1319 }; 1320 1321 struct Run { 1322 enum RunType { RT_Iteration, RT_Aggregate }; 1323 1324 Run() 1325 : run_type(RT_Iteration), 1326 error_occurred(false), 1327 iterations(1), 1328 time_unit(kNanosecond), 1329 real_accumulated_time(0), 1330 cpu_accumulated_time(0), 1331 max_heapbytes_used(0), 1332 complexity(oNone), 1333 complexity_lambda(), 1334 complexity_n(0), 1335 report_big_o(false), 1336 report_rms(false), 1337 counters(), 1338 has_memory_result(false), 1339 allocs_per_iter(0.0), 1340 max_bytes_used(0) {} 1341 1342 std::string benchmark_name() const; 1343 std::string run_name; 1344 RunType run_type; // is this a measurement, or an aggregate? 1345 std::string aggregate_name; 1346 std::string report_label; // Empty if not set by benchmark. 1347 bool error_occurred; 1348 std::string error_message; 1349 1350 int64_t iterations; 1351 TimeUnit time_unit; 1352 double real_accumulated_time; 1353 double cpu_accumulated_time; 1354 1355 // Return a value representing the real time per iteration in the unit 1356 // specified by 'time_unit'. 1357 // NOTE: If 'iterations' is zero the returned value represents the 1358 // accumulated time. 1359 double GetAdjustedRealTime() const; 1360 1361 // Return a value representing the cpu time per iteration in the unit 1362 // specified by 'time_unit'. 1363 // NOTE: If 'iterations' is zero the returned value represents the 1364 // accumulated time. 1365 double GetAdjustedCPUTime() const; 1366 1367 // This is set to 0.0 if memory tracing is not enabled. 1368 double max_heapbytes_used; 1369 1370 // Keep track of arguments to compute asymptotic complexity 1371 BigO complexity; 1372 BigOFunc* complexity_lambda; 1373 int64_t complexity_n; 1374 1375 // what statistics to compute from the measurements 1376 const std::vector<Statistics>* statistics; 1377 1378 // Inform print function whether the current run is a complexity report 1379 bool report_big_o; 1380 bool report_rms; 1381 1382 UserCounters counters; 1383 1384 // Memory metrics. 1385 bool has_memory_result; 1386 double allocs_per_iter; 1387 int64_t max_bytes_used; 1388 }; 1389 1390 // Construct a BenchmarkReporter with the output stream set to 'std::cout' 1391 // and the error stream set to 'std::cerr' 1392 BenchmarkReporter(); 1393 1394 // Called once for every suite of benchmarks run. 1395 // The parameter "context" contains information that the 1396 // reporter may wish to use when generating its report, for example the 1397 // platform under which the benchmarks are running. The benchmark run is 1398 // never started if this function returns false, allowing the reporter 1399 // to skip runs based on the context information. 1400 virtual bool ReportContext(const Context& context) = 0; 1401 1402 // Called once for each group of benchmark runs, gives information about 1403 // cpu-time and heap memory usage during the benchmark run. If the group 1404 // of runs contained more than two entries then 'report' contains additional 1405 // elements representing the mean and standard deviation of those runs. 1406 // Additionally if this group of runs was the last in a family of benchmarks 1407 // 'reports' contains additional entries representing the asymptotic 1408 // complexity and RMS of that benchmark family. 1409 virtual void ReportRuns(const std::vector<Run>& report) = 0; 1410 1411 // Called once and only once after ever group of benchmarks is run and 1412 // reported. 1413 virtual void Finalize() {} 1414 1415 // REQUIRES: The object referenced by 'out' is valid for the lifetime 1416 // of the reporter. 1417 void SetOutputStream(std::ostream* out) { 1418 assert(out); 1419 output_stream_ = out; 1420 } 1421 1422 // REQUIRES: The object referenced by 'err' is valid for the lifetime 1423 // of the reporter. 1424 void SetErrorStream(std::ostream* err) { 1425 assert(err); 1426 error_stream_ = err; 1427 } 1428 1429 std::ostream& GetOutputStream() const { return *output_stream_; } 1430 1431 std::ostream& GetErrorStream() const { return *error_stream_; } 1432 1433 virtual ~BenchmarkReporter(); 1434 1435 // Write a human readable string to 'out' representing the specified 1436 // 'context'. 1437 // REQUIRES: 'out' is non-null. 1438 static void PrintBasicContext(std::ostream* out, Context const& context); 1439 1440 private: 1441 std::ostream* output_stream_; 1442 std::ostream* error_stream_; 1443 }; 1444 1445 // Simple reporter that outputs benchmark data to the console. This is the 1446 // default reporter used by RunSpecifiedBenchmarks(). 1447 class ConsoleReporter : public BenchmarkReporter { 1448 public: 1449 enum OutputOptions { 1450 OO_None = 0, 1451 OO_Color = 1, 1452 OO_Tabular = 2, 1453 OO_ColorTabular = OO_Color | OO_Tabular, 1454 OO_Defaults = OO_ColorTabular 1455 }; 1456 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) 1457 : output_options_(opts_), 1458 name_field_width_(0), 1459 prev_counters_(), 1460 printed_header_(false) {} 1461 1462 virtual bool ReportContext(const Context& context); 1463 virtual void ReportRuns(const std::vector<Run>& reports); 1464 1465 protected: 1466 virtual void PrintRunData(const Run& report); 1467 virtual void PrintHeader(const Run& report); 1468 1469 OutputOptions output_options_; 1470 size_t name_field_width_; 1471 UserCounters prev_counters_; 1472 bool printed_header_; 1473 }; 1474 1475 class JSONReporter : public BenchmarkReporter { 1476 public: 1477 JSONReporter() : first_report_(true) {} 1478 virtual bool ReportContext(const Context& context); 1479 virtual void ReportRuns(const std::vector<Run>& reports); 1480 virtual void Finalize(); 1481 1482 private: 1483 void PrintRunData(const Run& report); 1484 1485 bool first_report_; 1486 }; 1487 1488 class BENCHMARK_DEPRECATED_MSG("The CSV Reporter will be removed in a future release") 1489 CSVReporter : public BenchmarkReporter { 1490 public: 1491 CSVReporter() : printed_header_(false) {} 1492 virtual bool ReportContext(const Context& context); 1493 virtual void ReportRuns(const std::vector<Run>& reports); 1494 1495 private: 1496 void PrintRunData(const Run& report); 1497 1498 bool printed_header_; 1499 std::set<std::string> user_counter_names_; 1500 }; 1501 1502 // If a MemoryManager is registered, it can be used to collect and report 1503 // allocation metrics for a run of the benchmark. 1504 class MemoryManager { 1505 public: 1506 struct Result { 1507 Result() : num_allocs(0), max_bytes_used(0) {} 1508 1509 // The number of allocations made in total between Start and Stop. 1510 int64_t num_allocs; 1511 1512 // The peak memory use between Start and Stop. 1513 int64_t max_bytes_used; 1514 }; 1515 1516 virtual ~MemoryManager() {} 1517 1518 // Implement this to start recording allocation information. 1519 virtual void Start() = 0; 1520 1521 // Implement this to stop recording and fill out the given Result structure. 1522 virtual void Stop(Result* result) = 0; 1523 }; 1524 1525 inline const char* GetTimeUnitString(TimeUnit unit) { 1526 switch (unit) { 1527 case kMillisecond: 1528 return "ms"; 1529 case kMicrosecond: 1530 return "us"; 1531 case kNanosecond: 1532 return "ns"; 1533 } 1534 BENCHMARK_UNREACHABLE(); 1535 } 1536 1537 inline double GetTimeUnitMultiplier(TimeUnit unit) { 1538 switch (unit) { 1539 case kMillisecond: 1540 return 1e3; 1541 case kMicrosecond: 1542 return 1e6; 1543 case kNanosecond: 1544 return 1e9; 1545 } 1546 BENCHMARK_UNREACHABLE(); 1547 } 1548 1549 } // namespace benchmark 1550 1551 #endif // BENCHMARK_BENCHMARK_H_ 1552