1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "benchmark/benchmark.h" 16 #include "benchmark_api_internal.h" 17 #include "internal_macros.h" 18 19 #ifndef BENCHMARK_OS_WINDOWS 20 #include <sys/resource.h> 21 #include <sys/time.h> 22 #include <unistd.h> 23 #endif 24 25 #include <algorithm> 26 #include <atomic> 27 #include <condition_variable> 28 #include <cstdio> 29 #include <cstdlib> 30 #include <cstring> 31 #include <fstream> 32 #include <iostream> 33 #include <memory> 34 #include <thread> 35 36 #include "check.h" 37 #include "commandlineflags.h" 38 #include "complexity.h" 39 #include "log.h" 40 #include "mutex.h" 41 #include "re.h" 42 #include "stat.h" 43 #include "string_util.h" 44 #include "sysinfo.h" 45 #include "timers.h" 46 47 namespace benchmark { 48 49 namespace { 50 // For non-dense Range, intermediate values are powers of kRangeMultiplier. 51 static const int kRangeMultiplier = 8; 52 // The size of a benchmark family determines is the number of inputs to repeat 53 // the benchmark on. If this is "large" then warn the user during configuration. 54 static const size_t kMaxFamilySize = 100; 55 } // end namespace 56 57 namespace internal { 58 59 //=============================================================================// 60 // BenchmarkFamilies 61 //=============================================================================// 62 63 // Class for managing registered benchmarks. Note that each registered 64 // benchmark identifies a family of related benchmarks to run. 65 class BenchmarkFamilies { 66 public: 67 static BenchmarkFamilies* GetInstance(); 68 69 // Registers a benchmark family and returns the index assigned to it. 70 size_t AddBenchmark(std::unique_ptr<Benchmark> family); 71 72 // Extract the list of benchmark instances that match the specified 73 // regular expression. 74 bool FindBenchmarks(const std::string& re, 75 std::vector<Benchmark::Instance>* benchmarks, 76 std::ostream* Err); 77 78 private: 79 BenchmarkFamilies() {} 80 81 std::vector<std::unique_ptr<Benchmark>> families_; 82 Mutex mutex_; 83 }; 84 85 BenchmarkFamilies* BenchmarkFamilies::GetInstance() { 86 static BenchmarkFamilies instance; 87 return &instance; 88 } 89 90 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) { 91 MutexLock l(mutex_); 92 size_t index = families_.size(); 93 families_.push_back(std::move(family)); 94 return index; 95 } 96 97 bool BenchmarkFamilies::FindBenchmarks( 98 const std::string& spec, std::vector<Benchmark::Instance>* benchmarks, 99 std::ostream* ErrStream) { 100 CHECK(ErrStream); 101 auto& Err = *ErrStream; 102 // Make regular expression out of command-line flag 103 std::string error_msg; 104 Regex re; 105 if (!re.Init(spec, &error_msg)) { 106 Err << "Could not compile benchmark re: " << error_msg << std::endl; 107 return false; 108 } 109 110 // Special list of thread counts to use when none are specified 111 const std::vector<int> one_thread = {1}; 112 113 MutexLock l(mutex_); 114 for (std::unique_ptr<Benchmark>& family : families_) { 115 // Family was deleted or benchmark doesn't match 116 if (!family) continue; 117 118 if (family->ArgsCnt() == -1) { 119 family->Args({}); 120 } 121 const std::vector<int>* thread_counts = 122 (family->thread_counts_.empty() 123 ? &one_thread 124 : &static_cast<const std::vector<int>&>(family->thread_counts_)); 125 const size_t family_size = family->args_.size() * thread_counts->size(); 126 // The benchmark will be run at least 'family_size' different inputs. 127 // If 'family_size' is very large warn the user. 128 if (family_size > kMaxFamilySize) { 129 Err << "The number of inputs is very large. " << family->name_ 130 << " will be repeated at least " << family_size << " times.\n"; 131 } 132 // reserve in the special case the regex ".", since we know the final 133 // family size. 134 if (spec == ".") benchmarks->reserve(family_size); 135 136 for (auto const& args : family->args_) { 137 for (int num_threads : *thread_counts) { 138 Benchmark::Instance instance; 139 instance.name = family->name_; 140 instance.benchmark = family.get(); 141 instance.report_mode = family->report_mode_; 142 instance.arg = args; 143 instance.time_unit = family->time_unit_; 144 instance.range_multiplier = family->range_multiplier_; 145 instance.min_time = family->min_time_; 146 instance.iterations = family->iterations_; 147 instance.repetitions = family->repetitions_; 148 instance.use_real_time = family->use_real_time_; 149 instance.use_manual_time = family->use_manual_time_; 150 instance.complexity = family->complexity_; 151 instance.complexity_lambda = family->complexity_lambda_; 152 instance.threads = num_threads; 153 154 // Add arguments to instance name 155 size_t arg_i = 0; 156 for (auto const& arg : args) { 157 instance.name += "/"; 158 159 if (arg_i < family->arg_names_.size()) { 160 const auto& arg_name = family->arg_names_[arg_i]; 161 if (!arg_name.empty()) { 162 instance.name += 163 StringPrintF("%s:", family->arg_names_[arg_i].c_str()); 164 } 165 } 166 167 instance.name += std::to_string(arg); 168 ++arg_i; 169 } 170 171 if (!IsZero(family->min_time_)) 172 instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); 173 if (family->iterations_ != 0) 174 instance.name += StringPrintF("/iterations:%d", family->iterations_); 175 if (family->repetitions_ != 0) 176 instance.name += StringPrintF("/repeats:%d", family->repetitions_); 177 178 if (family->use_manual_time_) { 179 instance.name += "/manual_time"; 180 } else if (family->use_real_time_) { 181 instance.name += "/real_time"; 182 } 183 184 // Add the number of threads used to the name 185 if (!family->thread_counts_.empty()) { 186 instance.name += StringPrintF("/threads:%d", instance.threads); 187 } 188 189 if (re.Match(instance.name)) { 190 instance.last_benchmark_instance = (&args == &family->args_.back()); 191 benchmarks->push_back(std::move(instance)); 192 } 193 } 194 } 195 } 196 return true; 197 } 198 199 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { 200 std::unique_ptr<Benchmark> bench_ptr(bench); 201 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); 202 families->AddBenchmark(std::move(bench_ptr)); 203 return bench; 204 } 205 206 // FIXME: This function is a hack so that benchmark.cc can access 207 // `BenchmarkFamilies` 208 bool FindBenchmarksInternal(const std::string& re, 209 std::vector<Benchmark::Instance>* benchmarks, 210 std::ostream* Err) { 211 return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); 212 } 213 214 //=============================================================================// 215 // Benchmark 216 //=============================================================================// 217 218 Benchmark::Benchmark(const char* name) 219 : name_(name), 220 report_mode_(RM_Unspecified), 221 time_unit_(kNanosecond), 222 range_multiplier_(kRangeMultiplier), 223 min_time_(0), 224 iterations_(0), 225 repetitions_(0), 226 use_real_time_(false), 227 use_manual_time_(false), 228 complexity_(oNone), 229 complexity_lambda_(nullptr) {} 230 231 Benchmark::~Benchmark() {} 232 233 void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) { 234 CHECK_GE(lo, 0); 235 CHECK_GE(hi, lo); 236 CHECK_GE(mult, 2); 237 238 // Add "lo" 239 dst->push_back(lo); 240 241 static const int kint32max = std::numeric_limits<int32_t>::max(); 242 243 // Now space out the benchmarks in multiples of "mult" 244 for (int32_t i = 1; i < kint32max / mult; i *= mult) { 245 if (i >= hi) break; 246 if (i > lo) { 247 dst->push_back(i); 248 } 249 } 250 // Add "hi" (if different from "lo") 251 if (hi != lo) { 252 dst->push_back(hi); 253 } 254 } 255 256 Benchmark* Benchmark::Arg(int x) { 257 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 258 args_.push_back({x}); 259 return this; 260 } 261 262 Benchmark* Benchmark::Unit(TimeUnit unit) { 263 time_unit_ = unit; 264 return this; 265 } 266 267 Benchmark* Benchmark::Range(int start, int limit) { 268 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 269 std::vector<int> arglist; 270 AddRange(&arglist, start, limit, range_multiplier_); 271 272 for (int i : arglist) { 273 args_.push_back({i}); 274 } 275 return this; 276 } 277 278 Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges) { 279 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); 280 std::vector<std::vector<int>> arglists(ranges.size()); 281 std::size_t total = 1; 282 for (std::size_t i = 0; i < ranges.size(); i++) { 283 AddRange(&arglists[i], ranges[i].first, ranges[i].second, 284 range_multiplier_); 285 total *= arglists[i].size(); 286 } 287 288 std::vector<std::size_t> ctr(arglists.size(), 0); 289 290 for (std::size_t i = 0; i < total; i++) { 291 std::vector<int> tmp; 292 tmp.reserve(arglists.size()); 293 294 for (std::size_t j = 0; j < arglists.size(); j++) { 295 tmp.push_back(arglists[j].at(ctr[j])); 296 } 297 298 args_.push_back(std::move(tmp)); 299 300 for (std::size_t j = 0; j < arglists.size(); j++) { 301 if (ctr[j] + 1 < arglists[j].size()) { 302 ++ctr[j]; 303 break; 304 } 305 ctr[j] = 0; 306 } 307 } 308 return this; 309 } 310 311 Benchmark* Benchmark::ArgName(const std::string& name) { 312 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 313 arg_names_ = {name}; 314 return this; 315 } 316 317 Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) { 318 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size())); 319 arg_names_ = names; 320 return this; 321 } 322 323 Benchmark* Benchmark::DenseRange(int start, int limit, int step) { 324 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 325 CHECK_GE(start, 0); 326 CHECK_LE(start, limit); 327 for (int arg = start; arg <= limit; arg += step) { 328 args_.push_back({arg}); 329 } 330 return this; 331 } 332 333 Benchmark* Benchmark::Args(const std::vector<int>& args) { 334 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size())); 335 args_.push_back(args); 336 return this; 337 } 338 339 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { 340 custom_arguments(this); 341 return this; 342 } 343 344 Benchmark* Benchmark::RangeMultiplier(int multiplier) { 345 CHECK(multiplier > 1); 346 range_multiplier_ = multiplier; 347 return this; 348 } 349 350 351 Benchmark* Benchmark::MinTime(double t) { 352 CHECK(t > 0.0); 353 CHECK(iterations_ == 0); 354 min_time_ = t; 355 return this; 356 } 357 358 359 Benchmark* Benchmark::Iterations(size_t n) { 360 CHECK(n > 0); 361 CHECK(IsZero(min_time_)); 362 iterations_ = n; 363 return this; 364 } 365 366 Benchmark* Benchmark::Repetitions(int n) { 367 CHECK(n > 0); 368 repetitions_ = n; 369 return this; 370 } 371 372 Benchmark* Benchmark::ReportAggregatesOnly(bool value) { 373 report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default; 374 return this; 375 } 376 377 Benchmark* Benchmark::UseRealTime() { 378 CHECK(!use_manual_time_) 379 << "Cannot set UseRealTime and UseManualTime simultaneously."; 380 use_real_time_ = true; 381 return this; 382 } 383 384 Benchmark* Benchmark::UseManualTime() { 385 CHECK(!use_real_time_) 386 << "Cannot set UseRealTime and UseManualTime simultaneously."; 387 use_manual_time_ = true; 388 return this; 389 } 390 391 Benchmark* Benchmark::Complexity(BigO complexity) { 392 complexity_ = complexity; 393 return this; 394 } 395 396 Benchmark* Benchmark::Complexity(BigOFunc* complexity) { 397 complexity_lambda_ = complexity; 398 complexity_ = oLambda; 399 return this; 400 } 401 402 Benchmark* Benchmark::Threads(int t) { 403 CHECK_GT(t, 0); 404 thread_counts_.push_back(t); 405 return this; 406 } 407 408 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { 409 CHECK_GT(min_threads, 0); 410 CHECK_GE(max_threads, min_threads); 411 412 AddRange(&thread_counts_, min_threads, max_threads, 2); 413 return this; 414 } 415 416 Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, 417 int stride) { 418 CHECK_GT(min_threads, 0); 419 CHECK_GE(max_threads, min_threads); 420 CHECK_GE(stride, 1); 421 422 for (auto i = min_threads; i < max_threads; i += stride) { 423 thread_counts_.push_back(i); 424 } 425 thread_counts_.push_back(max_threads); 426 return this; 427 } 428 429 Benchmark* Benchmark::ThreadPerCpu() { 430 static int num_cpus = NumCPUs(); 431 thread_counts_.push_back(num_cpus); 432 return this; 433 } 434 435 void Benchmark::SetName(const char* name) { name_ = name; } 436 437 int Benchmark::ArgsCnt() const { 438 if (args_.empty()) { 439 if (arg_names_.empty()) return -1; 440 return static_cast<int>(arg_names_.size()); 441 } 442 return static_cast<int>(args_.front().size()); 443 } 444 445 //=============================================================================// 446 // FunctionBenchmark 447 //=============================================================================// 448 449 void FunctionBenchmark::Run(State& st) { func_(st); } 450 451 } // end namespace internal 452 } // end namespace benchmark 453