Home | History | Annotate | Download | only in benchmark
      1 // Support for registering benchmarks for functions.
      2 
      3 /* Example usage:
      4 // Define a function that executes the code to be measured a
      5 // specified number of times:
      6 static void BM_StringCreation(benchmark::State& state) {
      7   while (state.KeepRunning())
      8     std::string empty_string;
      9 }
     10 
     11 // Register the function as a benchmark
     12 BENCHMARK(BM_StringCreation);
     13 
     14 // Define another benchmark
     15 static void BM_StringCopy(benchmark::State& state) {
     16   std::string x = "hello";
     17   while (state.KeepRunning())
     18     std::string copy(x);
     19 }
     20 BENCHMARK(BM_StringCopy);
     21 
     22 // Augment the main() program to invoke benchmarks if specified
     23 // via the --benchmarks command line flag.  E.g.,
     24 //       my_unittest --benchmark_filter=all
     25 //       my_unittest --benchmark_filter=BM_StringCreation
     26 //       my_unittest --benchmark_filter=String
     27 //       my_unittest --benchmark_filter='Copy|Creation'
     28 int main(int argc, char** argv) {
     29   benchmark::Initialize(&argc, argv);
     30   benchmark::RunSpecifiedBenchmarks();
     31   return 0;
     32 }
     33 
     34 // Sometimes a family of microbenchmarks can be implemented with
     35 // just one routine that takes an extra argument to specify which
     36 // one of the family of benchmarks to run.  For example, the following
     37 // code defines a family of microbenchmarks for measuring the speed
     38 // of memcpy() calls of different lengths:
     39 
     40 static void BM_memcpy(benchmark::State& state) {
     41   char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
     42   memset(src, 'x', state.range(0));
     43   while (state.KeepRunning())
     44     memcpy(dst, src, state.range(0));
     45   state.SetBytesProcessed(int64_t(state.iterations()) *
     46                           int64_t(state.range(0)));
     47   delete[] src; delete[] dst;
     48 }
     49 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
     50 
     51 // The preceding code is quite repetitive, and can be replaced with the
     52 // following short-hand.  The following invocation will pick a few
     53 // appropriate arguments in the specified range and will generate a
     54 // microbenchmark for each such argument.
     55 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
     56 
     57 // You might have a microbenchmark that depends on two inputs.  For
     58 // example, the following code defines a family of microbenchmarks for
     59 // measuring the speed of set insertion.
     60 static void BM_SetInsert(benchmark::State& state) {
     61   while (state.KeepRunning()) {
     62     state.PauseTiming();
     63     set<int> data = ConstructRandomSet(state.range(0));
     64     state.ResumeTiming();
     65     for (int j = 0; j < state.range(1); ++j)
     66       data.insert(RandomNumber());
     67   }
     68 }
     69 BENCHMARK(BM_SetInsert)
     70    ->Args({1<<10, 1})
     71    ->Args({1<<10, 8})
     72    ->Args({1<<10, 64})
     73    ->Args({1<<10, 512})
     74    ->Args({8<<10, 1})
     75    ->Args({8<<10, 8})
     76    ->Args({8<<10, 64})
     77    ->Args({8<<10, 512});
     78 
     79 // The preceding code is quite repetitive, and can be replaced with
     80 // the following short-hand.  The following macro will pick a few
     81 // appropriate arguments in the product of the two specified ranges
     82 // and will generate a microbenchmark for each such pair.
     83 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});
     84 
     85 // For more complex patterns of inputs, passing a custom function
     86 // to Apply allows programmatic specification of an
     87 // arbitrary set of arguments to run the microbenchmark on.
     88 // The following example enumerates a dense range on
     89 // one parameter, and a sparse range on the second.
     90 static void CustomArguments(benchmark::internal::Benchmark* b) {
     91   for (int i = 0; i <= 10; ++i)
     92     for (int j = 32; j <= 1024*1024; j *= 8)
     93       b->Args({i, j});
     94 }
     95 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
     96 
     97 // Templated microbenchmarks work the same way:
     98 // Produce then consume 'size' messages 'iters' times
     99 // Measures throughput in the absence of multiprogramming.
    100 template <class Q> int BM_Sequential(benchmark::State& state) {
    101   Q q;
    102   typename Q::value_type v;
    103   while (state.KeepRunning()) {
    104     for (int i = state.range(0); i--; )
    105       q.push(v);
    106     for (int e = state.range(0); e--; )
    107       q.Wait(&v);
    108   }
    109   // actually messages, not bytes:
    110   state.SetBytesProcessed(
    111       static_cast<int64_t>(state.iterations())*state.range(0));
    112 }
    113 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
    114 
    115 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
    116 benchmark. This option overrides the `benchmark_min_time` flag.
    117 
    118 void BM_test(benchmark::State& state) {
    119  ... body ...
    120 }
    121 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
    122 
    123 In a multithreaded test, it is guaranteed that none of the threads will start
    124 until all have called KeepRunning, and all will have finished before KeepRunning
    125 returns false. As such, any global setup or teardown you want to do can be
    126 wrapped in a check against the thread index:
    127 
    128 static void BM_MultiThreaded(benchmark::State& state) {
    129   if (state.thread_index == 0) {
    130     // Setup code here.
    131   }
    132   while (state.KeepRunning()) {
    133     // Run the test as normal.
    134   }
    135   if (state.thread_index == 0) {
    136     // Teardown code here.
    137   }
    138 }
    139 BENCHMARK(BM_MultiThreaded)->Threads(4);
    140 
    141 
    142 If a benchmark runs a few milliseconds it may be hard to visually compare the
    143 measured times, since the output data is given in nanoseconds per default. In
    144 order to manually set the time unit, you can specify it manually:
    145 
    146 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
    147 */
    148 
    149 #ifndef BENCHMARK_BENCHMARK_API_H_
    150 #define BENCHMARK_BENCHMARK_API_H_
    151 
    152 #include <assert.h>
    153 #include <stddef.h>
    154 #include <stdint.h>
    155 
    156 #include <string>
    157 #include <vector>
    158 #include <map>
    159 
    160 #include "macros.h"
    161 
    162 #if defined(BENCHMARK_HAS_CXX11)
    163 #include <type_traits>
    164 #include <initializer_list>
    165 #include <utility>
    166 #endif
    167 
    168 #if defined(_MSC_VER)
    169 #include <intrin.h> // for _ReadWriteBarrier
    170 #endif
    171 
    172 namespace benchmark {
    173 class BenchmarkReporter;
    174 
    175 void Initialize(int* argc, char** argv);
    176 
    177 // Report to stdout all arguments in 'argv' as unrecognized except the first.
    178 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
    179 bool ReportUnrecognizedArguments(int argc, char** argv);
    180 
    181 // Generate a list of benchmarks matching the specified --benchmark_filter flag
    182 // and if --benchmark_list_tests is specified return after printing the name
    183 // of each matching benchmark. Otherwise run each matching benchmark and
    184 // report the results.
    185 //
    186 // The second and third overload use the specified 'console_reporter' and
    187 //  'file_reporter' respectively. 'file_reporter' will write to the file
    188 //  specified
    189 //   by '--benchmark_output'. If '--benchmark_output' is not given the
    190 //  'file_reporter' is ignored.
    191 //
    192 // RETURNS: The number of matching benchmarks.
    193 size_t RunSpecifiedBenchmarks();
    194 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
    195 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
    196                               BenchmarkReporter* file_reporter);
    197 
    198 // If this routine is called, peak memory allocation past this point in the
    199 // benchmark is reported at the end of the benchmark report line. (It is
    200 // computed by running the benchmark once with a single iteration and a memory
    201 // tracer.)
    202 // TODO(dominic)
    203 // void MemoryUsage();
    204 
    205 namespace internal {
    206 class Benchmark;
    207 class BenchmarkImp;
    208 class BenchmarkFamilies;
    209 
    210 void UseCharPointer(char const volatile*);
    211 
    212 // Take ownership of the pointer and register the benchmark. Return the
    213 // registered benchmark.
    214 Benchmark* RegisterBenchmarkInternal(Benchmark*);
    215 
    216 // Ensure that the standard streams are properly initialized in every TU.
    217 int InitializeStreams();
    218 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
    219 
    220 }  // end namespace internal
    221 
    222 
    223 #if !defined(__GNUC__) || defined(__pnacl__) || defined(EMSCRIPTN)
    224 # define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
    225 #endif
    226 
    227 // The DoNotOptimize(...) function can be used to prevent a value or
    228 // expression from being optimized away by the compiler. This function is
    229 // intended to add little to no overhead.
    230 // See: https://youtu.be/nXaxk27zwlk?t=2441
    231 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
    232 template <class Tp>
    233 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
    234   asm volatile("" : : "g"(value) : "memory");
    235 }
    236 // Force the compiler to flush pending writes to global memory. Acts as an
    237 // effective read/write barrier
    238 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
    239   asm volatile("" : : : "memory");
    240 }
    241 #elif defined(_MSC_VER)
    242 template <class Tp>
    243 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
    244   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
    245   _ReadWriteBarrier();
    246 }
    247 
    248 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
    249   _ReadWriteBarrier();
    250 }
    251 #else
    252 template <class Tp>
    253 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
    254   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
    255 }
    256 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
    257 #endif
    258 
    259 
    260 
    261 // This class is used for user-defined counters.
    262 class Counter {
    263 public:
    264 
    265   enum Flags {
    266     kDefaults   = 0,
    267     // Mark the counter as a rate. It will be presented divided
    268     // by the duration of the benchmark.
    269     kIsRate     = 1,
    270     // Mark the counter as a thread-average quantity. It will be
    271     // presented divided by the number of threads.
    272     kAvgThreads = 2,
    273     // Mark the counter as a thread-average rate. See above.
    274     kAvgThreadsRate = kIsRate|kAvgThreads
    275   };
    276 
    277   double value;
    278   Flags  flags;
    279 
    280   BENCHMARK_ALWAYS_INLINE
    281   Counter(double v = 0., Flags f = kDefaults) : value(v), flags(f) {}
    282 
    283   BENCHMARK_ALWAYS_INLINE operator double const& () const { return value; }
    284   BENCHMARK_ALWAYS_INLINE operator double      & ()       { return value; }
    285 
    286 };
    287 
    288 // This is the container for the user-defined counters.
    289 typedef std::map<std::string, Counter> UserCounters;
    290 
    291 
    292 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
    293 // for the measured time.
    294 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
    295 
    296 // BigO is passed to a benchmark in order to specify the asymptotic
    297 // computational
    298 // complexity for the benchmark. In case oAuto is selected, complexity will be
    299 // calculated automatically to the best fit.
    300 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
    301 
    302 // BigOFunc is passed to a benchmark in order to specify the asymptotic
    303 // computational complexity for the benchmark.
    304 typedef double(BigOFunc)(int);
    305 
    306 namespace internal {
    307 class ThreadTimer;
    308 class ThreadManager;
    309 
    310 #if defined(BENCHMARK_HAS_CXX11)
    311 enum ReportMode : unsigned {
    312 #else
    313 enum ReportMode {
    314 #endif
    315   RM_Unspecified,  // The mode has not been manually specified
    316   RM_Default,      // The mode is user-specified as default.
    317   RM_ReportAggregatesOnly
    318 };
    319 }
    320 
    321 // State is passed to a running Benchmark and contains state for the
    322 // benchmark to use.
    323 class State {
    324  public:
    325   // Returns true if the benchmark should continue through another iteration.
    326   // NOTE: A benchmark may not return from the test until KeepRunning() has
    327   // returned false.
    328   bool KeepRunning() {
    329     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
    330       StartKeepRunning();
    331     }
    332     bool const res = total_iterations_++ < max_iterations;
    333     if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
    334       FinishKeepRunning();
    335     }
    336     return res;
    337   }
    338 
    339   // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
    340   //           by the current thread.
    341   // Stop the benchmark timer.  If not called, the timer will be
    342   // automatically stopped after KeepRunning() returns false for the first time.
    343   //
    344   // For threaded benchmarks the PauseTiming() function only pauses the timing
    345   // for the current thread.
    346   //
    347   // NOTE: The "real time" measurement is per-thread. If different threads
    348   // report different measurements the largest one is reported.
    349   //
    350   // NOTE: PauseTiming()/ResumeTiming() are relatively
    351   // heavyweight, and so their use should generally be avoided
    352   // within each benchmark iteration, if possible.
    353   void PauseTiming();
    354 
    355   // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
    356   //           by the current thread.
    357   // Start the benchmark timer.  The timer is NOT running on entrance to the
    358   // benchmark function. It begins running after the first call to KeepRunning()
    359   //
    360   // NOTE: PauseTiming()/ResumeTiming() are relatively
    361   // heavyweight, and so their use should generally be avoided
    362   // within each benchmark iteration, if possible.
    363   void ResumeTiming();
    364 
    365   // REQUIRES: 'SkipWithError(...)' has not been called previously by the
    366   //            current thread.
    367   // Skip any future iterations of the 'KeepRunning()' loop in the current
    368   // thread and report an error with the specified 'msg'. After this call
    369   // the user may explicitly 'return' from the benchmark.
    370   //
    371   // For threaded benchmarks only the current thread stops executing and future
    372   // calls to `KeepRunning()` will block until all threads have completed
    373   // the `KeepRunning()` loop. If multiple threads report an error only the
    374   // first error message is used.
    375   //
    376   // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
    377   // the current scope immediately. If the function is called from within
    378   // the 'KeepRunning()' loop the current iteration will finish. It is the users
    379   // responsibility to exit the scope as needed.
    380   void SkipWithError(const char* msg);
    381 
    382   // REQUIRES: called exactly once per iteration of the KeepRunning loop.
    383   // Set the manually measured time for this benchmark iteration, which
    384   // is used instead of automatically measured time if UseManualTime() was
    385   // specified.
    386   //
    387   // For threaded benchmarks the final value will be set to the largest
    388   // reported values.
    389   void SetIterationTime(double seconds);
    390 
    391   // Set the number of bytes processed by the current benchmark
    392   // execution.  This routine is typically called once at the end of a
    393   // throughput oriented benchmark.  If this routine is called with a
    394   // value > 0, the report is printed in MB/sec instead of nanoseconds
    395   // per iteration.
    396   //
    397   // REQUIRES: a benchmark has exited its KeepRunning loop.
    398   BENCHMARK_ALWAYS_INLINE
    399   void SetBytesProcessed(size_t bytes) { bytes_processed_ = bytes; }
    400 
    401   BENCHMARK_ALWAYS_INLINE
    402   size_t bytes_processed() const { return bytes_processed_; }
    403 
    404   // If this routine is called with complexity_n > 0 and complexity report is
    405   // requested for the
    406   // family benchmark, then current benchmark will be part of the computation
    407   // and complexity_n will
    408   // represent the length of N.
    409   BENCHMARK_ALWAYS_INLINE
    410   void SetComplexityN(int complexity_n) { complexity_n_ = complexity_n; }
    411 
    412   BENCHMARK_ALWAYS_INLINE
    413   int complexity_length_n() { return complexity_n_; }
    414 
    415   // If this routine is called with items > 0, then an items/s
    416   // label is printed on the benchmark report line for the currently
    417   // executing benchmark. It is typically called at the end of a processing
    418   // benchmark where a processing items/second output is desired.
    419   //
    420   // REQUIRES: a benchmark has exited its KeepRunning loop.
    421   BENCHMARK_ALWAYS_INLINE
    422   void SetItemsProcessed(size_t items) { items_processed_ = items; }
    423 
    424   BENCHMARK_ALWAYS_INLINE
    425   size_t items_processed() const { return items_processed_; }
    426 
    427   // If this routine is called, the specified label is printed at the
    428   // end of the benchmark report line for the currently executing
    429   // benchmark.  Example:
    430   //  static void BM_Compress(benchmark::State& state) {
    431   //    ...
    432   //    double compress = input_size / output_size;
    433   //    state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
    434   //  }
    435   // Produces output that looks like:
    436   //  BM_Compress   50         50   14115038  compress:27.3%
    437   //
    438   // REQUIRES: a benchmark has exited its KeepRunning loop.
    439   void SetLabel(const char* label);
    440 
    441   void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
    442     this->SetLabel(str.c_str());
    443   }
    444 
    445   // Range arguments for this run. CHECKs if the argument has been set.
    446   BENCHMARK_ALWAYS_INLINE
    447   int range(std::size_t pos = 0) const {
    448     assert(range_.size() > pos);
    449     return range_[pos];
    450   }
    451 
    452   BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
    453   int range_x() const { return range(0); }
    454 
    455   BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
    456   int range_y() const { return range(1); }
    457 
    458   BENCHMARK_ALWAYS_INLINE
    459   size_t iterations() const { return total_iterations_; }
    460 
    461  private:
    462   bool started_;
    463   bool finished_;
    464   size_t total_iterations_;
    465 
    466   std::vector<int> range_;
    467 
    468   size_t bytes_processed_;
    469   size_t items_processed_;
    470 
    471   int complexity_n_;
    472 
    473   bool error_occurred_;
    474 
    475  public:
    476   // Container for user-defined counters.
    477   UserCounters counters;
    478   // Index of the executing thread. Values from [0, threads).
    479   const int thread_index;
    480   // Number of threads concurrently executing the benchmark.
    481   const int threads;
    482   const size_t max_iterations;
    483 
    484   // TODO make me private
    485   State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
    486         int n_threads, internal::ThreadTimer* timer,
    487         internal::ThreadManager* manager);
    488 
    489  private:
    490   void StartKeepRunning();
    491   void FinishKeepRunning();
    492   internal::ThreadTimer* timer_;
    493   internal::ThreadManager* manager_;
    494   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
    495 };
    496 
    497 namespace internal {
    498 
    499 typedef void(Function)(State&);
    500 
    501 // ------------------------------------------------------
    502 // Benchmark registration object.  The BENCHMARK() macro expands
    503 // into an internal::Benchmark* object.  Various methods can
    504 // be called on this object to change the properties of the benchmark.
    505 // Each method returns "this" so that multiple method calls can
    506 // chained into one expression.
    507 class Benchmark {
    508  public:
    509   virtual ~Benchmark();
    510 
    511   // Note: the following methods all return "this" so that multiple
    512   // method calls can be chained together in one expression.
    513 
    514   // Run this benchmark once with "x" as the extra argument passed
    515   // to the function.
    516   // REQUIRES: The function passed to the constructor must accept an arg1.
    517   Benchmark* Arg(int x);
    518 
    519   // Run this benchmark with the given time unit for the generated output report
    520   Benchmark* Unit(TimeUnit unit);
    521 
    522   // Run this benchmark once for a number of values picked from the
    523   // range [start..limit].  (start and limit are always picked.)
    524   // REQUIRES: The function passed to the constructor must accept an arg1.
    525   Benchmark* Range(int start, int limit);
    526 
    527   // Run this benchmark once for all values in the range [start..limit] with
    528   // specific step
    529   // REQUIRES: The function passed to the constructor must accept an arg1.
    530   Benchmark* DenseRange(int start, int limit, int step = 1);
    531 
    532   // Run this benchmark once with "args" as the extra arguments passed
    533   // to the function.
    534   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
    535   Benchmark* Args(const std::vector<int>& args);
    536 
    537   // Equivalent to Args({x, y})
    538   // NOTE: This is a legacy C++03 interface provided for compatibility only.
    539   //   New code should use 'Args'.
    540   Benchmark* ArgPair(int x, int y) {
    541     std::vector<int> args;
    542     args.push_back(x);
    543     args.push_back(y);
    544     return Args(args);
    545   }
    546 
    547   // Run this benchmark once for a number of values picked from the
    548   // ranges [start..limit].  (starts and limits are always picked.)
    549   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
    550   Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);
    551 
    552   // Equivalent to ArgNames({name})
    553   Benchmark* ArgName(const std::string& name);
    554 
    555   // Set the argument names to display in the benchmark name. If not called,
    556   // only argument values will be shown.
    557   Benchmark* ArgNames(const std::vector<std::string>& names);
    558 
    559   // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
    560   // NOTE: This is a legacy C++03 interface provided for compatibility only.
    561   //   New code should use 'Ranges'.
    562   Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2) {
    563     std::vector<std::pair<int, int> > ranges;
    564     ranges.push_back(std::make_pair(lo1, hi1));
    565     ranges.push_back(std::make_pair(lo2, hi2));
    566     return Ranges(ranges);
    567   }
    568 
    569   // Pass this benchmark object to *func, which can customize
    570   // the benchmark by calling various methods like Arg, Args,
    571   // Threads, etc.
    572   Benchmark* Apply(void (*func)(Benchmark* benchmark));
    573 
    574   // Set the range multiplier for non-dense range. If not called, the range
    575   // multiplier kRangeMultiplier will be used.
    576   Benchmark* RangeMultiplier(int multiplier);
    577 
    578   // Set the minimum amount of time to use when running this benchmark. This
    579   // option overrides the `benchmark_min_time` flag.
    580   // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
    581   Benchmark* MinTime(double t);
    582 
    583   // Specify the amount of iterations that should be run by this benchmark.
    584   // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
    585   //
    586   // NOTE: This function should only be used when *exact* iteration control is
    587   //   needed and never to control or limit how long a benchmark runs, where
    588   // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
    589   Benchmark* Iterations(size_t n);
    590 
    591   // Specify the amount of times to repeat this benchmark. This option overrides
    592   // the `benchmark_repetitions` flag.
    593   // REQUIRES: `n > 0`
    594   Benchmark* Repetitions(int n);
    595 
    596   // Specify if each repetition of the benchmark should be reported separately
    597   // or if only the final statistics should be reported. If the benchmark
    598   // is not repeated then the single result is always reported.
    599   Benchmark* ReportAggregatesOnly(bool v = true);
    600 
    601   // If a particular benchmark is I/O bound, runs multiple threads internally or
    602   // if for some reason CPU timings are not representative, call this method. If
    603   // called, the elapsed time will be used to control how many iterations are
    604   // run, and in the printing of items/second or MB/seconds values.  If not
    605   // called, the cpu time used by the benchmark will be used.
    606   Benchmark* UseRealTime();
    607 
    608   // If a benchmark must measure time manually (e.g. if GPU execution time is
    609   // being
    610   // measured), call this method. If called, each benchmark iteration should
    611   // call
    612   // SetIterationTime(seconds) to report the measured time, which will be used
    613   // to control how many iterations are run, and in the printing of items/second
    614   // or MB/second values.
    615   Benchmark* UseManualTime();
    616 
    617   // Set the asymptotic computational complexity for the benchmark. If called
    618   // the asymptotic computational complexity will be shown on the output.
    619   Benchmark* Complexity(BigO complexity = benchmark::oAuto);
    620 
    621   // Set the asymptotic computational complexity for the benchmark. If called
    622   // the asymptotic computational complexity will be shown on the output.
    623   Benchmark* Complexity(BigOFunc* complexity);
    624 
    625   // Support for running multiple copies of the same benchmark concurrently
    626   // in multiple threads.  This may be useful when measuring the scaling
    627   // of some piece of code.
    628 
    629   // Run one instance of this benchmark concurrently in t threads.
    630   Benchmark* Threads(int t);
    631 
    632   // Pick a set of values T from [min_threads,max_threads].
    633   // min_threads and max_threads are always included in T.  Run this
    634   // benchmark once for each value in T.  The benchmark run for a
    635   // particular value t consists of t threads running the benchmark
    636   // function concurrently.  For example, consider:
    637   //    BENCHMARK(Foo)->ThreadRange(1,16);
    638   // This will run the following benchmarks:
    639   //    Foo in 1 thread
    640   //    Foo in 2 threads
    641   //    Foo in 4 threads
    642   //    Foo in 8 threads
    643   //    Foo in 16 threads
    644   Benchmark* ThreadRange(int min_threads, int max_threads);
    645 
    646   // For each value n in the range, run this benchmark once using n threads.
    647   // min_threads and max_threads are always included in the range.
    648   // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
    649   // a benchmark with 1, 4, 7 and 8 threads.
    650   Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
    651 
    652   // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
    653   Benchmark* ThreadPerCpu();
    654 
    655   virtual void Run(State& state) = 0;
    656 
    657   // Used inside the benchmark implementation
    658   struct Instance;
    659 
    660  protected:
    661   explicit Benchmark(const char* name);
    662   Benchmark(Benchmark const&);
    663   void SetName(const char* name);
    664 
    665   int ArgsCnt() const;
    666 
    667   static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
    668 
    669  private:
    670   friend class BenchmarkFamilies;
    671 
    672   std::string name_;
    673   ReportMode report_mode_;
    674   std::vector<std::string> arg_names_;   // Args for all benchmark runs
    675   std::vector<std::vector<int> > args_;  // Args for all benchmark runs
    676   TimeUnit time_unit_;
    677   int range_multiplier_;
    678   double min_time_;
    679   size_t iterations_;
    680   int repetitions_;
    681   bool use_real_time_;
    682   bool use_manual_time_;
    683   BigO complexity_;
    684   BigOFunc* complexity_lambda_;
    685   std::vector<int> thread_counts_;
    686 
    687   Benchmark& operator=(Benchmark const&);
    688 };
    689 
    690 }  // namespace internal
    691 
    692 // Create and register a benchmark with the specified 'name' that invokes
    693 // the specified functor 'fn'.
    694 //
    695 // RETURNS: A pointer to the registered benchmark.
    696 internal::Benchmark* RegisterBenchmark(const char* name,
    697                                        internal::Function* fn);
    698 
    699 #if defined(BENCHMARK_HAS_CXX11)
    700 template <class Lambda>
    701 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
    702 #endif
    703 
    704 namespace internal {
    705 // The class used to hold all Benchmarks created from static function.
    706 // (ie those created using the BENCHMARK(...) macros.
    707 class FunctionBenchmark : public Benchmark {
    708  public:
    709   FunctionBenchmark(const char* name, Function* func)
    710       : Benchmark(name), func_(func) {}
    711 
    712   virtual void Run(State& st);
    713 
    714  private:
    715   Function* func_;
    716 };
    717 
    718 #ifdef BENCHMARK_HAS_CXX11
    719 template <class Lambda>
    720 class LambdaBenchmark : public Benchmark {
    721  public:
    722   virtual void Run(State& st) { lambda_(st); }
    723 
    724  private:
    725   template <class OLambda>
    726   LambdaBenchmark(const char* name, OLambda&& lam)
    727       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
    728 
    729   LambdaBenchmark(LambdaBenchmark const&) = delete;
    730 
    731  private:
    732   template <class Lam>
    733   friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
    734 
    735   Lambda lambda_;
    736 };
    737 #endif
    738 
    739 }  // end namespace internal
    740 
    741 inline internal::Benchmark* RegisterBenchmark(const char* name,
    742                                               internal::Function* fn) {
    743   return internal::RegisterBenchmarkInternal(
    744       ::new internal::FunctionBenchmark(name, fn));
    745 }
    746 
    747 #ifdef BENCHMARK_HAS_CXX11
    748 template <class Lambda>
    749 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
    750   using BenchType =
    751       internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
    752   return internal::RegisterBenchmarkInternal(
    753       ::new BenchType(name, std::forward<Lambda>(fn)));
    754 }
    755 #endif
    756 
    757 #if defined(BENCHMARK_HAS_CXX11) && \
    758     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
    759 template <class Lambda, class... Args>
    760 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
    761                                        Args&&... args) {
    762   return benchmark::RegisterBenchmark(
    763       name, [=](benchmark::State& st) { fn(st, args...); });
    764 }
    765 #else
    766 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
    767 #endif
    768 
    769 // The base class for all fixture tests.
    770 class Fixture : public internal::Benchmark {
    771  public:
    772   Fixture() : internal::Benchmark("") {}
    773 
    774   virtual void Run(State& st) {
    775     this->SetUp(st);
    776     this->BenchmarkCase(st);
    777     this->TearDown(st);
    778   }
    779 
    780   // These will be deprecated ...
    781   virtual void SetUp(const State&) {}
    782   virtual void TearDown(const State&) {}
    783   // ... In favor of these.
    784   virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
    785   virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
    786 
    787  protected:
    788   virtual void BenchmarkCase(State&) = 0;
    789 };
    790 
    791 }  // end namespace benchmark
    792 
    793 // ------------------------------------------------------
    794 // Macro to register benchmarks
    795 
    796 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
    797 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
    798 // empty. If X is empty the expression becomes (+1 == +0).
    799 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
    800 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
    801 #else
    802 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
    803 #endif
    804 
    805 // Helpers for generating unique variable names
    806 #define BENCHMARK_PRIVATE_NAME(n) \
    807   BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
    808 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
    809 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
    810 
    811 #define BENCHMARK_PRIVATE_DECLARE(n)                                 \
    812   static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
    813       BENCHMARK_UNUSED
    814 
    815 #define BENCHMARK(n)                                     \
    816   BENCHMARK_PRIVATE_DECLARE(n) =                         \
    817       (::benchmark::internal::RegisterBenchmarkInternal( \
    818           new ::benchmark::internal::FunctionBenchmark(#n, n)))
    819 
    820 // Old-style macros
    821 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
    822 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
    823 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
    824 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
    825 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
    826   BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
    827 
    828 #if __cplusplus >= 201103L
    829 
    830 // Register a benchmark which invokes the function specified by `func`
    831 // with the additional arguments specified by `...`.
    832 //
    833 // For example:
    834 //
    835 // template <class ...ExtraArgs>`
    836 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
    837 //  [...]
    838 //}
    839 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
    840 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
    841 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
    842   BENCHMARK_PRIVATE_DECLARE(func) =                      \
    843       (::benchmark::internal::RegisterBenchmarkInternal( \
    844           new ::benchmark::internal::FunctionBenchmark(  \
    845               #func "/" #test_case_name,                 \
    846               [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
    847 
    848 #endif  // __cplusplus >= 11
    849 
    850 // This will register a benchmark for a templatized function.  For example:
    851 //
    852 // template<int arg>
    853 // void BM_Foo(int iters);
    854 //
    855 // BENCHMARK_TEMPLATE(BM_Foo, 1);
    856 //
    857 // will register BM_Foo<1> as a benchmark.
    858 #define BENCHMARK_TEMPLATE1(n, a)                        \
    859   BENCHMARK_PRIVATE_DECLARE(n) =                         \
    860       (::benchmark::internal::RegisterBenchmarkInternal( \
    861           new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
    862 
    863 #define BENCHMARK_TEMPLATE2(n, a, b)                                         \
    864   BENCHMARK_PRIVATE_DECLARE(n) =                                             \
    865       (::benchmark::internal::RegisterBenchmarkInternal(                     \
    866           new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
    867                                                        n<a, b>)))
    868 
    869 #if __cplusplus >= 201103L
    870 #define BENCHMARK_TEMPLATE(n, ...)                       \
    871   BENCHMARK_PRIVATE_DECLARE(n) =                         \
    872       (::benchmark::internal::RegisterBenchmarkInternal( \
    873           new ::benchmark::internal::FunctionBenchmark(  \
    874               #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
    875 #else
    876 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
    877 #endif
    878 
    879 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
    880   class BaseClass##_##Method##_Benchmark : public BaseClass { \
    881    public:                                                    \
    882     BaseClass##_##Method##_Benchmark() : BaseClass() {        \
    883       this->SetName(#BaseClass "/" #Method);                  \
    884     }                                                         \
    885                                                               \
    886    protected:                                                 \
    887     virtual void BenchmarkCase(::benchmark::State&);          \
    888   };
    889 
    890 #define BENCHMARK_DEFINE_F(BaseClass, Method)    \
    891   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
    892   void BaseClass##_##Method##_Benchmark::BenchmarkCase
    893 
    894 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
    895   BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
    896 
    897 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
    898   BENCHMARK_PRIVATE_DECLARE(TestName) =        \
    899       (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
    900 
    901 // This macro will define and register a benchmark within a fixture class.
    902 #define BENCHMARK_F(BaseClass, Method)           \
    903   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
    904   BENCHMARK_REGISTER_F(BaseClass, Method);       \
    905   void BaseClass##_##Method##_Benchmark::BenchmarkCase
    906 
    907 // Helper macro to create a main routine in a test that runs the benchmarks
    908 #define BENCHMARK_MAIN()                   \
    909   int main(int argc, char** argv) {        \
    910     ::benchmark::Initialize(&argc, argv);  \
    911     if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
    912     ::benchmark::RunSpecifiedBenchmarks(); \
    913   }
    914 
    915 #endif  // BENCHMARK_BENCHMARK_API_H_
    916