1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "benchmark/benchmark.h" 16 #include "benchmark_api_internal.h" 17 #include "internal_macros.h" 18 19 #ifndef BENCHMARK_OS_WINDOWS 20 #include <sys/resource.h> 21 #include <sys/time.h> 22 #include <unistd.h> 23 #endif 24 25 #include <algorithm> 26 #include <atomic> 27 #include <condition_variable> 28 #include <cstdio> 29 #include <cstdlib> 30 #include <cstring> 31 #include <fstream> 32 #include <iostream> 33 #include <memory> 34 #include <thread> 35 36 #include "check.h" 37 #include "commandlineflags.h" 38 #include "complexity.h" 39 #include "log.h" 40 #include "mutex.h" 41 #include "re.h" 42 #include "stat.h" 43 #include "string_util.h" 44 #include "sysinfo.h" 45 #include "timers.h" 46 47 namespace benchmark { 48 49 namespace { 50 // For non-dense Range, intermediate values are powers of kRangeMultiplier. 51 static const int kRangeMultiplier = 8; 52 // The size of a benchmark family determines is the number of inputs to repeat 53 // the benchmark on. If this is "large" then warn the user during configuration. 54 static const size_t kMaxFamilySize = 100; 55 } // end namespace 56 57 namespace internal { 58 59 //=============================================================================// 60 // BenchmarkFamilies 61 //=============================================================================// 62 63 // Class for managing registered benchmarks. Note that each registered 64 // benchmark identifies a family of related benchmarks to run. 65 class BenchmarkFamilies { 66 public: 67 static BenchmarkFamilies* GetInstance(); 68 69 // Registers a benchmark family and returns the index assigned to it. 70 size_t AddBenchmark(std::unique_ptr<Benchmark> family); 71 72 // Extract the list of benchmark instances that match the specified 73 // regular expression. 74 bool FindBenchmarks(const std::string& re, 75 std::vector<Benchmark::Instance>* benchmarks, 76 std::ostream* Err); 77 78 private: 79 BenchmarkFamilies() {} 80 81 std::vector<std::unique_ptr<Benchmark>> families_; 82 Mutex mutex_; 83 }; 84 85 BenchmarkFamilies* BenchmarkFamilies::GetInstance() { 86 static BenchmarkFamilies instance; 87 return &instance; 88 } 89 90 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) { 91 MutexLock l(mutex_); 92 size_t index = families_.size(); 93 families_.push_back(std::move(family)); 94 return index; 95 } 96 97 bool BenchmarkFamilies::FindBenchmarks( 98 const std::string& spec, std::vector<Benchmark::Instance>* benchmarks, 99 std::ostream* ErrStream) { 100 CHECK(ErrStream); 101 auto& Err = *ErrStream; 102 // Make regular expression out of command-line flag 103 std::string error_msg; 104 Regex re; 105 if (!re.Init(spec, &error_msg)) { 106 Err << "Could not compile benchmark re: " << error_msg << std::endl; 107 return false; 108 } 109 110 // Special list of thread counts to use when none are specified 111 const std::vector<int> one_thread = {1}; 112 113 MutexLock l(mutex_); 114 for (std::unique_ptr<Benchmark>& family : families_) { 115 // Family was deleted or benchmark doesn't match 116 if (!family) continue; 117 118 if (family->ArgsCnt() == -1) { 119 family->Args({}); 120 } 121 const std::vector<int>* thread_counts = 122 (family->thread_counts_.empty() 123 ? &one_thread 124 : &static_cast<const std::vector<int>&>(family->thread_counts_)); 125 const size_t family_size = family->args_.size() * thread_counts->size(); 126 // The benchmark will be run at least 'family_size' different inputs. 127 // If 'family_size' is very large warn the user. 128 if (family_size > kMaxFamilySize) { 129 Err << "The number of inputs is very large. " << family->name_ 130 << " will be repeated at least " << family_size << " times.\n"; 131 } 132 // reserve in the special case the regex ".", since we know the final 133 // family size. 134 if (spec == ".") benchmarks->reserve(family_size); 135 136 for (auto const& args : family->args_) { 137 for (int num_threads : *thread_counts) { 138 Benchmark::Instance instance; 139 instance.name = family->name_; 140 instance.benchmark = family.get(); 141 instance.report_mode = family->report_mode_; 142 instance.arg = args; 143 instance.time_unit = family->time_unit_; 144 instance.range_multiplier = family->range_multiplier_; 145 instance.min_time = family->min_time_; 146 instance.repetitions = family->repetitions_; 147 instance.use_real_time = family->use_real_time_; 148 instance.use_manual_time = family->use_manual_time_; 149 instance.complexity = family->complexity_; 150 instance.complexity_lambda = family->complexity_lambda_; 151 instance.threads = num_threads; 152 153 // Add arguments to instance name 154 size_t arg_i = 0; 155 for (auto const& arg : args) { 156 instance.name += "/"; 157 158 if (arg_i < family->arg_names_.size()) { 159 const auto& arg_name = family->arg_names_[arg_i]; 160 if (!arg_name.empty()) { 161 instance.name += 162 StringPrintF("%s:", family->arg_names_[arg_i].c_str()); 163 } 164 } 165 166 AppendHumanReadable(arg, &instance.name); 167 ++arg_i; 168 } 169 170 if (!IsZero(family->min_time_)) { 171 instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); 172 } 173 if (family->repetitions_ != 0) { 174 instance.name += StringPrintF("/repeats:%d", family->repetitions_); 175 } 176 if (family->use_manual_time_) { 177 instance.name += "/manual_time"; 178 } else if (family->use_real_time_) { 179 instance.name += "/real_time"; 180 } 181 182 // Add the number of threads used to the name 183 if (!family->thread_counts_.empty()) { 184 instance.name += StringPrintF("/threads:%d", instance.threads); 185 } 186 187 if (re.Match(instance.name)) { 188 instance.last_benchmark_instance = (&args == &family->args_.back()); 189 benchmarks->push_back(std::move(instance)); 190 } 191 } 192 } 193 } 194 return true; 195 } 196 197 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { 198 std::unique_ptr<Benchmark> bench_ptr(bench); 199 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); 200 families->AddBenchmark(std::move(bench_ptr)); 201 return bench; 202 } 203 204 // FIXME: This function is a hack so that benchmark.cc can access 205 // `BenchmarkFamilies` 206 bool FindBenchmarksInternal(const std::string& re, 207 std::vector<Benchmark::Instance>* benchmarks, 208 std::ostream* Err) { 209 return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); 210 } 211 212 //=============================================================================// 213 // Benchmark 214 //=============================================================================// 215 216 Benchmark::Benchmark(const char* name) 217 : name_(name), 218 report_mode_(RM_Unspecified), 219 time_unit_(kNanosecond), 220 range_multiplier_(kRangeMultiplier), 221 min_time_(0), 222 repetitions_(0), 223 use_real_time_(false), 224 use_manual_time_(false), 225 complexity_(oNone), 226 complexity_lambda_(nullptr) {} 227 228 Benchmark::~Benchmark() {} 229 230 void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) { 231 CHECK_GE(lo, 0); 232 CHECK_GE(hi, lo); 233 CHECK_GE(mult, 2); 234 235 // Add "lo" 236 dst->push_back(lo); 237 238 static const int kint32max = std::numeric_limits<int32_t>::max(); 239 240 // Now space out the benchmarks in multiples of "mult" 241 for (int32_t i = 1; i < kint32max / mult; i *= mult) { 242 if (i >= hi) break; 243 if (i > lo) { 244 dst->push_back(i); 245 } 246 } 247 // Add "hi" (if different from "lo") 248 if (hi != lo) { 249 dst->push_back(hi); 250 } 251 } 252 253 Benchmark* Benchmark::Arg(int x) { 254 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 255 args_.push_back({x}); 256 return this; 257 } 258 259 Benchmark* Benchmark::Unit(TimeUnit unit) { 260 time_unit_ = unit; 261 return this; 262 } 263 264 Benchmark* Benchmark::Range(int start, int limit) { 265 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 266 std::vector<int> arglist; 267 AddRange(&arglist, start, limit, range_multiplier_); 268 269 for (int i : arglist) { 270 args_.push_back({i}); 271 } 272 return this; 273 } 274 275 Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges) { 276 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); 277 std::vector<std::vector<int>> arglists(ranges.size()); 278 std::size_t total = 1; 279 for (std::size_t i = 0; i < ranges.size(); i++) { 280 AddRange(&arglists[i], ranges[i].first, ranges[i].second, 281 range_multiplier_); 282 total *= arglists[i].size(); 283 } 284 285 std::vector<std::size_t> ctr(arglists.size(), 0); 286 287 for (std::size_t i = 0; i < total; i++) { 288 std::vector<int> tmp; 289 tmp.reserve(arglists.size()); 290 291 for (std::size_t j = 0; j < arglists.size(); j++) { 292 tmp.push_back(arglists[j].at(ctr[j])); 293 } 294 295 args_.push_back(std::move(tmp)); 296 297 for (std::size_t j = 0; j < arglists.size(); j++) { 298 if (ctr[j] + 1 < arglists[j].size()) { 299 ++ctr[j]; 300 break; 301 } 302 ctr[j] = 0; 303 } 304 } 305 return this; 306 } 307 308 Benchmark* Benchmark::ArgName(const std::string& name) { 309 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 310 arg_names_ = {name}; 311 return this; 312 } 313 314 Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) { 315 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size())); 316 arg_names_ = names; 317 return this; 318 } 319 320 Benchmark* Benchmark::DenseRange(int start, int limit, int step) { 321 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 322 CHECK_GE(start, 0); 323 CHECK_LE(start, limit); 324 for (int arg = start; arg <= limit; arg += step) { 325 args_.push_back({arg}); 326 } 327 return this; 328 } 329 330 Benchmark* Benchmark::Args(const std::vector<int>& args) { 331 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size())); 332 args_.push_back(args); 333 return this; 334 } 335 336 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { 337 custom_arguments(this); 338 return this; 339 } 340 341 Benchmark* Benchmark::RangeMultiplier(int multiplier) { 342 CHECK(multiplier > 1); 343 range_multiplier_ = multiplier; 344 return this; 345 } 346 347 Benchmark* Benchmark::Repetitions(int n) { 348 CHECK(n > 0); 349 repetitions_ = n; 350 return this; 351 } 352 353 Benchmark* Benchmark::ReportAggregatesOnly(bool value) { 354 report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default; 355 return this; 356 } 357 358 Benchmark* Benchmark::MinTime(double t) { 359 CHECK(t > 0.0); 360 min_time_ = t; 361 return this; 362 } 363 364 Benchmark* Benchmark::UseRealTime() { 365 CHECK(!use_manual_time_) 366 << "Cannot set UseRealTime and UseManualTime simultaneously."; 367 use_real_time_ = true; 368 return this; 369 } 370 371 Benchmark* Benchmark::UseManualTime() { 372 CHECK(!use_real_time_) 373 << "Cannot set UseRealTime and UseManualTime simultaneously."; 374 use_manual_time_ = true; 375 return this; 376 } 377 378 Benchmark* Benchmark::Complexity(BigO complexity) { 379 complexity_ = complexity; 380 return this; 381 } 382 383 Benchmark* Benchmark::Complexity(BigOFunc* complexity) { 384 complexity_lambda_ = complexity; 385 complexity_ = oLambda; 386 return this; 387 } 388 389 Benchmark* Benchmark::Threads(int t) { 390 CHECK_GT(t, 0); 391 thread_counts_.push_back(t); 392 return this; 393 } 394 395 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { 396 CHECK_GT(min_threads, 0); 397 CHECK_GE(max_threads, min_threads); 398 399 AddRange(&thread_counts_, min_threads, max_threads, 2); 400 return this; 401 } 402 403 Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, 404 int stride) { 405 CHECK_GT(min_threads, 0); 406 CHECK_GE(max_threads, min_threads); 407 CHECK_GE(stride, 1); 408 409 for (auto i = min_threads; i < max_threads; i += stride) { 410 thread_counts_.push_back(i); 411 } 412 thread_counts_.push_back(max_threads); 413 return this; 414 } 415 416 Benchmark* Benchmark::ThreadPerCpu() { 417 static int num_cpus = NumCPUs(); 418 thread_counts_.push_back(num_cpus); 419 return this; 420 } 421 422 void Benchmark::SetName(const char* name) { name_ = name; } 423 424 int Benchmark::ArgsCnt() const { 425 if (args_.empty()) { 426 if (arg_names_.empty()) return -1; 427 return static_cast<int>(arg_names_.size()); 428 } 429 return static_cast<int>(args_.front().size()); 430 } 431 432 //=============================================================================// 433 // FunctionBenchmark 434 //=============================================================================// 435 436 void FunctionBenchmark::Run(State& st) { func_(st); } 437 438 } // end namespace internal 439 } // end namespace benchmark 440