1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "benchmark_runner.h" 16 #include "benchmark/benchmark.h" 17 #include "benchmark_api_internal.h" 18 #include "internal_macros.h" 19 20 #ifndef BENCHMARK_OS_WINDOWS 21 #ifndef BENCHMARK_OS_FUCHSIA 22 #include <sys/resource.h> 23 #endif 24 #include <sys/time.h> 25 #include <unistd.h> 26 #endif 27 28 #include <algorithm> 29 #include <atomic> 30 #include <condition_variable> 31 #include <cstdio> 32 #include <cstdlib> 33 #include <fstream> 34 #include <iostream> 35 #include <memory> 36 #include <string> 37 #include <thread> 38 #include <utility> 39 40 #include "check.h" 41 #include "colorprint.h" 42 #include "commandlineflags.h" 43 #include "complexity.h" 44 #include "counter.h" 45 #include "internal_macros.h" 46 #include "log.h" 47 #include "mutex.h" 48 #include "re.h" 49 #include "statistics.h" 50 #include "string_util.h" 51 #include "thread_manager.h" 52 #include "thread_timer.h" 53 54 namespace benchmark { 55 56 namespace internal { 57 58 MemoryManager* memory_manager = nullptr; 59 60 namespace { 61 62 static const size_t kMaxIterations = 1000000000; 63 64 BenchmarkReporter::Run CreateRunReport( 65 const benchmark::internal::BenchmarkInstance& b, 66 const internal::ThreadManager::Result& results, size_t memory_iterations, 67 const MemoryManager::Result& memory_result, double seconds) { 68 // Create report about this benchmark run. 69 BenchmarkReporter::Run report; 70 71 report.run_name = b.name; 72 report.error_occurred = results.has_error_; 73 report.error_message = results.error_message_; 74 report.report_label = results.report_label_; 75 // This is the total iterations across all threads. 76 report.iterations = results.iterations; 77 report.time_unit = b.time_unit; 78 79 if (!report.error_occurred) { 80 if (b.use_manual_time) { 81 report.real_accumulated_time = results.manual_time_used; 82 } else { 83 report.real_accumulated_time = results.real_time_used; 84 } 85 report.cpu_accumulated_time = results.cpu_time_used; 86 report.complexity_n = results.complexity_n; 87 report.complexity = b.complexity; 88 report.complexity_lambda = b.complexity_lambda; 89 report.statistics = b.statistics; 90 report.counters = results.counters; 91 92 if (memory_iterations > 0) { 93 report.has_memory_result = true; 94 report.allocs_per_iter = 95 memory_iterations ? static_cast<double>(memory_result.num_allocs) / 96 memory_iterations 97 : 0; 98 report.max_bytes_used = memory_result.max_bytes_used; 99 } 100 101 internal::Finish(&report.counters, results.iterations, seconds, b.threads); 102 } 103 return report; 104 } 105 106 // Execute one thread of benchmark b for the specified number of iterations. 107 // Adds the stats collected for the thread into *total. 108 void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id, 109 ThreadManager* manager) { 110 internal::ThreadTimer timer; 111 State st = b->Run(iters, thread_id, &timer, manager); 112 CHECK(st.iterations() >= st.max_iterations) 113 << "Benchmark returned before State::KeepRunning() returned false!"; 114 { 115 MutexLock l(manager->GetBenchmarkMutex()); 116 internal::ThreadManager::Result& results = manager->results; 117 results.iterations += st.iterations(); 118 results.cpu_time_used += timer.cpu_time_used(); 119 results.real_time_used += timer.real_time_used(); 120 results.manual_time_used += timer.manual_time_used(); 121 results.complexity_n += st.complexity_length_n(); 122 internal::Increment(&results.counters, st.counters); 123 } 124 manager->NotifyThreadComplete(); 125 } 126 127 class BenchmarkRunner { 128 public: 129 BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, 130 std::vector<BenchmarkReporter::Run>* complexity_reports_) 131 : b(b_), 132 complexity_reports(*complexity_reports_), 133 min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), 134 repeats(b.repetitions != 0 ? b.repetitions 135 : FLAGS_benchmark_repetitions), 136 has_explicit_iteration_count(b.iterations != 0), 137 pool(b.threads - 1), 138 iters(has_explicit_iteration_count ? b.iterations : 1) { 139 run_results.display_report_aggregates_only = 140 (FLAGS_benchmark_report_aggregates_only || 141 FLAGS_benchmark_display_aggregates_only); 142 run_results.file_report_aggregates_only = 143 FLAGS_benchmark_report_aggregates_only; 144 if (b.aggregation_report_mode != internal::ARM_Unspecified) { 145 run_results.display_report_aggregates_only = 146 (b.aggregation_report_mode & 147 internal::ARM_DisplayReportAggregatesOnly); 148 run_results.file_report_aggregates_only = 149 (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); 150 } 151 152 for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { 153 const bool is_the_first_repetition = repetition_num == 0; 154 DoOneRepetition(is_the_first_repetition); 155 } 156 157 // Calculate additional statistics 158 run_results.aggregates_only = ComputeStats(run_results.non_aggregates); 159 160 // Maybe calculate complexity report 161 if ((b.complexity != oNone) && b.last_benchmark_instance) { 162 auto additional_run_stats = ComputeBigO(complexity_reports); 163 run_results.aggregates_only.insert(run_results.aggregates_only.end(), 164 additional_run_stats.begin(), 165 additional_run_stats.end()); 166 complexity_reports.clear(); 167 } 168 } 169 170 RunResults&& get_results() { return std::move(run_results); } 171 172 private: 173 RunResults run_results; 174 175 const benchmark::internal::BenchmarkInstance& b; 176 std::vector<BenchmarkReporter::Run>& complexity_reports; 177 178 const double min_time; 179 const int repeats; 180 const bool has_explicit_iteration_count; 181 182 std::vector<std::thread> pool; 183 184 size_t iters; // preserved between repetitions! 185 // So only the first repetition has to find/calculate it, 186 // the other repetitions will just use that precomputed iteration count. 187 188 struct IterationResults { 189 internal::ThreadManager::Result results; 190 size_t iters; 191 double seconds; 192 }; 193 IterationResults DoNIterations() { 194 VLOG(2) << "Running " << b.name << " for " << iters << "\n"; 195 196 std::unique_ptr<internal::ThreadManager> manager; 197 manager.reset(new internal::ThreadManager(b.threads)); 198 199 // Run all but one thread in separate threads 200 for (std::size_t ti = 0; ti < pool.size(); ++ti) { 201 pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), 202 manager.get()); 203 } 204 // And run one thread here directly. 205 // (If we were asked to run just one thread, we don't create new threads.) 206 // Yes, we need to do this here *after* we start the separate threads. 207 RunInThread(&b, iters, 0, manager.get()); 208 209 // The main thread has finished. Now let's wait for the other threads. 210 manager->WaitForAllThreads(); 211 for (std::thread& thread : pool) thread.join(); 212 213 IterationResults i; 214 // Acquire the measurements/counters from the manager, UNDER THE LOCK! 215 { 216 MutexLock l(manager->GetBenchmarkMutex()); 217 i.results = manager->results; 218 } 219 220 // And get rid of the manager. 221 manager.reset(); 222 223 // Adjust real/manual time stats since they were reported per thread. 224 i.results.real_time_used /= b.threads; 225 i.results.manual_time_used /= b.threads; 226 227 VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" 228 << i.results.real_time_used << "\n"; 229 230 // So for how long were we running? 231 i.iters = iters; 232 // Base decisions off of real time if requested by this benchmark. 233 i.seconds = i.results.cpu_time_used; 234 if (b.use_manual_time) { 235 i.seconds = i.results.manual_time_used; 236 } else if (b.use_real_time) { 237 i.seconds = i.results.real_time_used; 238 } 239 240 return i; 241 } 242 243 size_t PredictNumItersNeeded(const IterationResults& i) const { 244 // See how much iterations should be increased by. 245 // Note: Avoid division by zero with max(seconds, 1ns). 246 double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); 247 // If our last run was at least 10% of FLAGS_benchmark_min_time then we 248 // use the multiplier directly. 249 // Otherwise we use at most 10 times expansion. 250 // NOTE: When the last run was at least 10% of the min time the max 251 // expansion should be 14x. 252 bool is_significant = (i.seconds / min_time) > 0.1; 253 multiplier = is_significant ? multiplier : std::min(10.0, multiplier); 254 if (multiplier <= 1.0) multiplier = 2.0; 255 256 // So what seems to be the sufficiently-large iteration count? Round up. 257 const size_t max_next_iters = 258 0.5 + std::max(multiplier * i.iters, i.iters + 1.0); 259 // But we do have *some* sanity limits though.. 260 const size_t next_iters = std::min(max_next_iters, kMaxIterations); 261 262 VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; 263 return next_iters; // round up before conversion to integer. 264 } 265 266 bool ShouldReportIterationResults(const IterationResults& i) const { 267 // Determine if this run should be reported; 268 // Either it has run for a sufficient amount of time 269 // or because an error was reported. 270 return i.results.has_error_ || 271 i.iters >= kMaxIterations || // Too many iterations already. 272 i.seconds >= min_time || // The elapsed time is large enough. 273 // CPU time is specified but the elapsed real time greatly exceeds 274 // the minimum time. 275 // Note that user provided timers are except from this sanity check. 276 ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); 277 } 278 279 void DoOneRepetition(bool is_the_first_repetition) { 280 IterationResults i; 281 282 // We *may* be gradually increasing the length (iteration count) 283 // of the benchmark until we decide the results are significant. 284 // And once we do, we report those last results and exit. 285 // Please do note that the if there are repetitions, the iteration count 286 // is *only* calculated for the *first* repetition, and other repetitions 287 // simply use that precomputed iteration count. 288 for (;;) { 289 i = DoNIterations(); 290 291 // Do we consider the results to be significant? 292 // If we are doing repetitions, and the first repetition was already done, 293 // it has calculated the correct iteration time, so we have run that very 294 // iteration count just now. No need to calculate anything. Just report. 295 // Else, the normal rules apply. 296 const bool results_are_significant = !is_the_first_repetition || 297 has_explicit_iteration_count || 298 ShouldReportIterationResults(i); 299 300 if (results_are_significant) break; // Good, let's report them! 301 302 // Nope, bad iteration. Let's re-estimate the hopefully-sufficient 303 // iteration count, and run the benchmark again... 304 305 iters = PredictNumItersNeeded(i); 306 assert(iters > i.iters && 307 "if we did more iterations than we want to do the next time, " 308 "then we should have accepted the current iteration run."); 309 } 310 311 // Oh, one last thing, we need to also produce the 'memory measurements'.. 312 MemoryManager::Result memory_result; 313 size_t memory_iterations = 0; 314 if (memory_manager != nullptr) { 315 // Only run a few iterations to reduce the impact of one-time 316 // allocations in benchmarks that are not properly managed. 317 memory_iterations = std::min<size_t>(16, iters); 318 memory_manager->Start(); 319 std::unique_ptr<internal::ThreadManager> manager; 320 manager.reset(new internal::ThreadManager(1)); 321 RunInThread(&b, memory_iterations, 0, manager.get()); 322 manager->WaitForAllThreads(); 323 manager.reset(); 324 325 memory_manager->Stop(&memory_result); 326 } 327 328 // Ok, now actualy report. 329 BenchmarkReporter::Run report = CreateRunReport( 330 b, i.results, memory_iterations, memory_result, i.seconds); 331 332 if (!report.error_occurred && b.complexity != oNone) 333 complexity_reports.push_back(report); 334 335 run_results.non_aggregates.push_back(report); 336 } 337 }; 338 339 } // end namespace 340 341 RunResults RunBenchmark( 342 const benchmark::internal::BenchmarkInstance& b, 343 std::vector<BenchmarkReporter::Run>* complexity_reports) { 344 internal::BenchmarkRunner r(b, complexity_reports); 345 return r.get_results(); 346 } 347 348 } // end namespace internal 349 350 } // end namespace benchmark 351