1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/tracked_objects.h" 6 7 #include <limits.h> 8 #include <stdlib.h> 9 10 #include "base/atomicops.h" 11 #include "base/base_switches.h" 12 #include "base/command_line.h" 13 #include "base/compiler_specific.h" 14 #include "base/debug/leak_annotations.h" 15 #include "base/logging.h" 16 #include "base/process/process_handle.h" 17 #include "base/strings/stringprintf.h" 18 #include "base/third_party/valgrind/memcheck.h" 19 #include "base/threading/worker_pool.h" 20 #include "base/tracking_info.h" 21 #include "build/build_config.h" 22 23 using base::TimeDelta; 24 25 namespace base { 26 class TimeDelta; 27 } 28 29 namespace tracked_objects { 30 31 namespace { 32 // When ThreadData is first initialized, should we start in an ACTIVE state to 33 // record all of the startup-time tasks, or should we start up DEACTIVATED, so 34 // that we only record after parsing the command line flag --enable-tracking. 35 // Note that the flag may force either state, so this really controls only the 36 // period of time up until that flag is parsed. If there is no flag seen, then 37 // this state may prevail for much or all of the process lifetime. 38 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE; 39 40 // Possible states of the profiler timing enabledness. 41 enum { 42 UNDEFINED_TIMING, 43 ENABLED_TIMING, 44 DISABLED_TIMING, 45 }; 46 47 // State of the profiler timing enabledness. 48 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING; 49 50 // Returns whether profiler timing is enabled. The default is true, but this 51 // may be overridden by a command-line flag. Some platforms may 52 // programmatically set this command-line flag to the "off" value if it's not 53 // specified. 54 // This in turn can be overridden by explicitly calling 55 // ThreadData::EnableProfilerTiming, say, based on a field trial. 56 inline bool IsProfilerTimingEnabled() { 57 // Reading |g_profiler_timing_enabled| is done without barrier because 58 // multiple initialization is not an issue while the barrier can be relatively 59 // costly given that this method is sometimes called in a tight loop. 60 base::subtle::Atomic32 current_timing_enabled = 61 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled); 62 if (current_timing_enabled == UNDEFINED_TIMING) { 63 if (!base::CommandLine::InitializedForCurrentProcess()) 64 return true; 65 current_timing_enabled = 66 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( 67 switches::kProfilerTiming) == 68 switches::kProfilerTimingDisabledValue) 69 ? DISABLED_TIMING 70 : ENABLED_TIMING; 71 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, 72 current_timing_enabled); 73 } 74 return current_timing_enabled == ENABLED_TIMING; 75 } 76 77 } // namespace 78 79 //------------------------------------------------------------------------------ 80 // DeathData tallies durations when a death takes place. 81 82 DeathData::DeathData() 83 : count_(0), 84 sample_probability_count_(0), 85 run_duration_sum_(0), 86 queue_duration_sum_(0), 87 run_duration_max_(0), 88 queue_duration_max_(0), 89 run_duration_sample_(0), 90 queue_duration_sample_(0), 91 last_phase_snapshot_(nullptr) { 92 } 93 94 DeathData::DeathData(const DeathData& other) 95 : count_(other.count_), 96 sample_probability_count_(other.sample_probability_count_), 97 run_duration_sum_(other.run_duration_sum_), 98 queue_duration_sum_(other.queue_duration_sum_), 99 run_duration_max_(other.run_duration_max_), 100 queue_duration_max_(other.queue_duration_max_), 101 run_duration_sample_(other.run_duration_sample_), 102 queue_duration_sample_(other.queue_duration_sample_), 103 last_phase_snapshot_(nullptr) { 104 // This constructor will be used by std::map when adding new DeathData values 105 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't 106 // need to worry about ownership transfer. 107 DCHECK(other.last_phase_snapshot_ == nullptr); 108 } 109 110 DeathData::~DeathData() { 111 while (last_phase_snapshot_) { 112 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_; 113 last_phase_snapshot_ = snapshot->prev; 114 delete snapshot; 115 } 116 } 117 118 // TODO(jar): I need to see if this macro to optimize branching is worth using. 119 // 120 // This macro has no branching, so it is surely fast, and is equivalent to: 121 // if (assign_it) 122 // target = source; 123 // We use a macro rather than a template to force this to inline. 124 // Related code for calculating max is discussed on the web. 125 #define CONDITIONAL_ASSIGN(assign_it, target, source) \ 126 ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it)) 127 128 void DeathData::RecordDeath(const int32_t queue_duration, 129 const int32_t run_duration, 130 const uint32_t random_number) { 131 // We'll just clamp at INT_MAX, but we should note this in the UI as such. 132 if (count_ < INT_MAX) 133 base::subtle::NoBarrier_Store(&count_, count_ + 1); 134 135 int sample_probability_count = 136 base::subtle::NoBarrier_Load(&sample_probability_count_); 137 if (sample_probability_count < INT_MAX) 138 ++sample_probability_count; 139 base::subtle::NoBarrier_Store(&sample_probability_count_, 140 sample_probability_count); 141 142 base::subtle::NoBarrier_Store(&queue_duration_sum_, 143 queue_duration_sum_ + queue_duration); 144 base::subtle::NoBarrier_Store(&run_duration_sum_, 145 run_duration_sum_ + run_duration); 146 147 if (queue_duration_max() < queue_duration) 148 base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration); 149 if (run_duration_max() < run_duration) 150 base::subtle::NoBarrier_Store(&run_duration_max_, run_duration); 151 152 // Take a uniformly distributed sample over all durations ever supplied during 153 // the current profiling phase. 154 // The probability that we (instead) use this new sample is 155 // 1/sample_probability_count_. This results in a completely uniform selection 156 // of the sample (at least when we don't clamp sample_probability_count_... 157 // but that should be inconsequentially likely). We ignore the fact that we 158 // correlated our selection of a sample to the run and queue times (i.e., we 159 // used them to generate random_number). 160 CHECK_GT(sample_probability_count, 0); 161 if (0 == (random_number % sample_probability_count)) { 162 base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration); 163 base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration); 164 } 165 } 166 167 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) { 168 // Snapshotting and storing current state. 169 last_phase_snapshot_ = new DeathDataPhaseSnapshot( 170 profiling_phase, count(), run_duration_sum(), run_duration_max(), 171 run_duration_sample(), queue_duration_sum(), queue_duration_max(), 172 queue_duration_sample(), last_phase_snapshot_); 173 174 // Not touching fields for which a delta can be computed by comparing with a 175 // snapshot from the previous phase. Resetting other fields. Sample values 176 // will be reset upon next death recording because sample_probability_count_ 177 // is set to 0. 178 // We avoid resetting to 0 in favor of deltas whenever possible. The reason 179 // is that for incrementable fields, resetting to 0 from the snapshot thread 180 // potentially in parallel with incrementing in the death thread may result in 181 // significant data corruption that has a potential to grow with time. Not 182 // resetting incrementable fields and using deltas will cause any 183 // off-by-little corruptions to be likely fixed at the next snapshot. 184 // The max values are not incrementable, and cannot be deduced using deltas 185 // for a given phase. Hence, we have to reset them to 0. But the potential 186 // damage is limited to getting the previous phase's max to apply for the next 187 // phase, and the error doesn't have a potential to keep growing with new 188 // resets. 189 // sample_probability_count_ is incrementable, but must be reset to 0 at the 190 // phase end, so that we start a new uniformly randomized sample selection 191 // after the reset. These fields are updated using atomics. However, race 192 // conditions are possible since these are updated individually and not 193 // together atomically, resulting in the values being mutually inconsistent. 194 // The damage is limited to selecting a wrong sample, which is not something 195 // that can cause accumulating or cascading effects. 196 // If there were no inconsistencies caused by race conditions, we never send a 197 // sample for the previous phase in the next phase's snapshot because 198 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count. 199 base::subtle::NoBarrier_Store(&sample_probability_count_, 0); 200 base::subtle::NoBarrier_Store(&run_duration_max_, 0); 201 base::subtle::NoBarrier_Store(&queue_duration_max_, 0); 202 } 203 204 //------------------------------------------------------------------------------ 205 DeathDataSnapshot::DeathDataSnapshot() 206 : count(-1), 207 run_duration_sum(-1), 208 run_duration_max(-1), 209 run_duration_sample(-1), 210 queue_duration_sum(-1), 211 queue_duration_max(-1), 212 queue_duration_sample(-1) { 213 } 214 215 DeathDataSnapshot::DeathDataSnapshot(int count, 216 int32_t run_duration_sum, 217 int32_t run_duration_max, 218 int32_t run_duration_sample, 219 int32_t queue_duration_sum, 220 int32_t queue_duration_max, 221 int32_t queue_duration_sample) 222 : count(count), 223 run_duration_sum(run_duration_sum), 224 run_duration_max(run_duration_max), 225 run_duration_sample(run_duration_sample), 226 queue_duration_sum(queue_duration_sum), 227 queue_duration_max(queue_duration_max), 228 queue_duration_sample(queue_duration_sample) {} 229 230 DeathDataSnapshot::~DeathDataSnapshot() { 231 } 232 233 DeathDataSnapshot DeathDataSnapshot::Delta( 234 const DeathDataSnapshot& older) const { 235 return DeathDataSnapshot(count - older.count, 236 run_duration_sum - older.run_duration_sum, 237 run_duration_max, run_duration_sample, 238 queue_duration_sum - older.queue_duration_sum, 239 queue_duration_max, queue_duration_sample); 240 } 241 242 //------------------------------------------------------------------------------ 243 BirthOnThread::BirthOnThread(const Location& location, 244 const ThreadData& current) 245 : location_(location), 246 birth_thread_(¤t) { 247 } 248 249 //------------------------------------------------------------------------------ 250 BirthOnThreadSnapshot::BirthOnThreadSnapshot() { 251 } 252 253 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth) 254 : location(birth.location()), 255 thread_name(birth.birth_thread()->thread_name()) { 256 } 257 258 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() { 259 } 260 261 //------------------------------------------------------------------------------ 262 Births::Births(const Location& location, const ThreadData& current) 263 : BirthOnThread(location, current), 264 birth_count_(1) { } 265 266 int Births::birth_count() const { return birth_count_; } 267 268 void Births::RecordBirth() { ++birth_count_; } 269 270 //------------------------------------------------------------------------------ 271 // ThreadData maintains the central data for all births and deaths on a single 272 // thread. 273 274 // TODO(jar): We should pull all these static vars together, into a struct, and 275 // optimize layout so that we benefit from locality of reference during accesses 276 // to them. 277 278 // static 279 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL; 280 281 // A TLS slot which points to the ThreadData instance for the current thread. 282 // We do a fake initialization here (zeroing out data), and then the real 283 // in-place construction happens when we call tls_index_.Initialize(). 284 // static 285 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER; 286 287 // static 288 int ThreadData::worker_thread_data_creation_count_ = 0; 289 290 // static 291 int ThreadData::cleanup_count_ = 0; 292 293 // static 294 int ThreadData::incarnation_counter_ = 0; 295 296 // static 297 ThreadData* ThreadData::all_thread_data_list_head_ = NULL; 298 299 // static 300 ThreadData* ThreadData::first_retired_worker_ = NULL; 301 302 // static 303 base::LazyInstance<base::Lock>::Leaky 304 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER; 305 306 // static 307 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED; 308 309 ThreadData::ThreadData(const std::string& suggested_name) 310 : next_(NULL), 311 next_retired_worker_(NULL), 312 worker_thread_number_(0), 313 incarnation_count_for_pool_(-1), 314 current_stopwatch_(NULL) { 315 DCHECK_GE(suggested_name.size(), 0u); 316 thread_name_ = suggested_name; 317 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. 318 } 319 320 ThreadData::ThreadData(int thread_number) 321 : next_(NULL), 322 next_retired_worker_(NULL), 323 worker_thread_number_(thread_number), 324 incarnation_count_for_pool_(-1), 325 current_stopwatch_(NULL) { 326 CHECK_GT(thread_number, 0); 327 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number); 328 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. 329 } 330 331 ThreadData::~ThreadData() { 332 } 333 334 void ThreadData::PushToHeadOfList() { 335 // Toss in a hint of randomness (atop the uniniitalized value). 336 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_, 337 sizeof(random_number_)); 338 MSAN_UNPOISON(&random_number_, sizeof(random_number_)); 339 random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0)); 340 random_number_ ^= (Now() - TrackedTime()).InMilliseconds(); 341 342 DCHECK(!next_); 343 base::AutoLock lock(*list_lock_.Pointer()); 344 incarnation_count_for_pool_ = incarnation_counter_; 345 next_ = all_thread_data_list_head_; 346 all_thread_data_list_head_ = this; 347 } 348 349 // static 350 ThreadData* ThreadData::first() { 351 base::AutoLock lock(*list_lock_.Pointer()); 352 return all_thread_data_list_head_; 353 } 354 355 ThreadData* ThreadData::next() const { return next_; } 356 357 // static 358 void ThreadData::InitializeThreadContext(const std::string& suggested_name) { 359 if (base::WorkerPool::RunsTasksOnCurrentThread()) 360 return; 361 EnsureTlsInitialization(); 362 ThreadData* current_thread_data = 363 reinterpret_cast<ThreadData*>(tls_index_.Get()); 364 if (current_thread_data) 365 return; // Browser tests instigate this. 366 current_thread_data = new ThreadData(suggested_name); 367 tls_index_.Set(current_thread_data); 368 } 369 370 // static 371 ThreadData* ThreadData::Get() { 372 if (!tls_index_.initialized()) 373 return NULL; // For unittests only. 374 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get()); 375 if (registered) 376 return registered; 377 378 // We must be a worker thread, since we didn't pre-register. 379 ThreadData* worker_thread_data = NULL; 380 int worker_thread_number = 0; 381 { 382 base::AutoLock lock(*list_lock_.Pointer()); 383 if (first_retired_worker_) { 384 worker_thread_data = first_retired_worker_; 385 first_retired_worker_ = first_retired_worker_->next_retired_worker_; 386 worker_thread_data->next_retired_worker_ = NULL; 387 } else { 388 worker_thread_number = ++worker_thread_data_creation_count_; 389 } 390 } 391 392 // If we can't find a previously used instance, then we have to create one. 393 if (!worker_thread_data) { 394 DCHECK_GT(worker_thread_number, 0); 395 worker_thread_data = new ThreadData(worker_thread_number); 396 } 397 DCHECK_GT(worker_thread_data->worker_thread_number_, 0); 398 399 tls_index_.Set(worker_thread_data); 400 return worker_thread_data; 401 } 402 403 // static 404 void ThreadData::OnThreadTermination(void* thread_data) { 405 DCHECK(thread_data); // TLS should *never* call us with a NULL. 406 // We must NOT do any allocations during this callback. There is a chance 407 // that the allocator is no longer active on this thread. 408 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup(); 409 } 410 411 void ThreadData::OnThreadTerminationCleanup() { 412 // The list_lock_ was created when we registered the callback, so it won't be 413 // allocated here despite the lazy reference. 414 base::AutoLock lock(*list_lock_.Pointer()); 415 if (incarnation_counter_ != incarnation_count_for_pool_) 416 return; // ThreadData was constructed in an earlier unit test. 417 ++cleanup_count_; 418 // Only worker threads need to be retired and reused. 419 if (!worker_thread_number_) { 420 return; 421 } 422 // We must NOT do any allocations during this callback. 423 // Using the simple linked lists avoids all allocations. 424 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL)); 425 this->next_retired_worker_ = first_retired_worker_; 426 first_retired_worker_ = this; 427 } 428 429 // static 430 void ThreadData::Snapshot(int current_profiling_phase, 431 ProcessDataSnapshot* process_data_snapshot) { 432 // Get an unchanging copy of a ThreadData list. 433 ThreadData* my_list = ThreadData::first(); 434 435 // Gather data serially. 436 // This hackish approach *can* get some slightly corrupt tallies, as we are 437 // grabbing values without the protection of a lock, but it has the advantage 438 // of working even with threads that don't have message loops. If a user 439 // sees any strangeness, they can always just run their stats gathering a 440 // second time. 441 BirthCountMap birth_counts; 442 for (ThreadData* thread_data = my_list; thread_data; 443 thread_data = thread_data->next()) { 444 thread_data->SnapshotExecutedTasks(current_profiling_phase, 445 &process_data_snapshot->phased_snapshots, 446 &birth_counts); 447 } 448 449 // Add births that are still active -- i.e. objects that have tallied a birth, 450 // but have not yet tallied a matching death, and hence must be either 451 // running, queued up, or being held in limbo for future posting. 452 auto* current_phase_tasks = 453 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks; 454 for (const auto& birth_count : birth_counts) { 455 if (birth_count.second > 0) { 456 current_phase_tasks->push_back( 457 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first), 458 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0), 459 "Still_Alive")); 460 } 461 } 462 } 463 464 // static 465 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) { 466 // Get an unchanging copy of a ThreadData list. 467 ThreadData* my_list = ThreadData::first(); 468 469 // Add snapshots for all instances of death data in all threads serially. 470 // This hackish approach *can* get some slightly corrupt tallies, as we are 471 // grabbing values without the protection of a lock, but it has the advantage 472 // of working even with threads that don't have message loops. Any corruption 473 // shouldn't cause "cascading damage" to anything else (in later phases). 474 for (ThreadData* thread_data = my_list; thread_data; 475 thread_data = thread_data->next()) { 476 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase); 477 } 478 } 479 480 Births* ThreadData::TallyABirth(const Location& location) { 481 BirthMap::iterator it = birth_map_.find(location); 482 Births* child; 483 if (it != birth_map_.end()) { 484 child = it->second; 485 child->RecordBirth(); 486 } else { 487 child = new Births(location, *this); // Leak this. 488 // Lock since the map may get relocated now, and other threads sometimes 489 // snapshot it (but they lock before copying it). 490 base::AutoLock lock(map_lock_); 491 birth_map_[location] = child; 492 } 493 494 return child; 495 } 496 497 void ThreadData::TallyADeath(const Births& births, 498 int32_t queue_duration, 499 const TaskStopwatch& stopwatch) { 500 int32_t run_duration = stopwatch.RunDurationMs(); 501 502 // Stir in some randomness, plus add constant in case durations are zero. 503 const uint32_t kSomePrimeNumber = 2147483647; 504 random_number_ += queue_duration + run_duration + kSomePrimeNumber; 505 // An address is going to have some randomness to it as well ;-). 506 random_number_ ^= 507 static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0)); 508 509 DeathMap::iterator it = death_map_.find(&births); 510 DeathData* death_data; 511 if (it != death_map_.end()) { 512 death_data = &it->second; 513 } else { 514 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now. 515 death_data = &death_map_[&births]; 516 } // Release lock ASAP. 517 death_data->RecordDeath(queue_duration, run_duration, random_number_); 518 } 519 520 // static 521 Births* ThreadData::TallyABirthIfActive(const Location& location) { 522 if (!TrackingStatus()) 523 return NULL; 524 ThreadData* current_thread_data = Get(); 525 if (!current_thread_data) 526 return NULL; 527 return current_thread_data->TallyABirth(location); 528 } 529 530 // static 531 void ThreadData::TallyRunOnNamedThreadIfTracking( 532 const base::TrackingInfo& completed_task, 533 const TaskStopwatch& stopwatch) { 534 // Even if we have been DEACTIVATED, we will process any pending births so 535 // that our data structures (which counted the outstanding births) remain 536 // consistent. 537 const Births* births = completed_task.birth_tally; 538 if (!births) 539 return; 540 ThreadData* current_thread_data = stopwatch.GetThreadData(); 541 if (!current_thread_data) 542 return; 543 544 // Watch out for a race where status_ is changing, and hence one or both 545 // of start_of_run or end_of_run is zero. In that case, we didn't bother to 546 // get a time value since we "weren't tracking" and we were trying to be 547 // efficient by not calling for a genuine time value. For simplicity, we'll 548 // use a default zero duration when we can't calculate a true value. 549 TrackedTime start_of_run = stopwatch.StartTime(); 550 int32_t queue_duration = 0; 551 if (!start_of_run.is_null()) { 552 queue_duration = (start_of_run - completed_task.EffectiveTimePosted()) 553 .InMilliseconds(); 554 } 555 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 556 } 557 558 // static 559 void ThreadData::TallyRunOnWorkerThreadIfTracking( 560 const Births* births, 561 const TrackedTime& time_posted, 562 const TaskStopwatch& stopwatch) { 563 // Even if we have been DEACTIVATED, we will process any pending births so 564 // that our data structures (which counted the outstanding births) remain 565 // consistent. 566 if (!births) 567 return; 568 569 // TODO(jar): Support the option to coalesce all worker-thread activity under 570 // one ThreadData instance that uses locks to protect *all* access. This will 571 // reduce memory (making it provably bounded), but run incrementally slower 572 // (since we'll use locks on TallyABirth and TallyADeath). The good news is 573 // that the locks on TallyADeath will be *after* the worker thread has run, 574 // and hence nothing will be waiting for the completion (... besides some 575 // other thread that might like to run). Also, the worker threads tasks are 576 // generally longer, and hence the cost of the lock may perchance be amortized 577 // over the long task's lifetime. 578 ThreadData* current_thread_data = stopwatch.GetThreadData(); 579 if (!current_thread_data) 580 return; 581 582 TrackedTime start_of_run = stopwatch.StartTime(); 583 int32_t queue_duration = 0; 584 if (!start_of_run.is_null()) { 585 queue_duration = (start_of_run - time_posted).InMilliseconds(); 586 } 587 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 588 } 589 590 // static 591 void ThreadData::TallyRunInAScopedRegionIfTracking( 592 const Births* births, 593 const TaskStopwatch& stopwatch) { 594 // Even if we have been DEACTIVATED, we will process any pending births so 595 // that our data structures (which counted the outstanding births) remain 596 // consistent. 597 if (!births) 598 return; 599 600 ThreadData* current_thread_data = stopwatch.GetThreadData(); 601 if (!current_thread_data) 602 return; 603 604 int32_t queue_duration = 0; 605 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 606 } 607 608 void ThreadData::SnapshotExecutedTasks( 609 int current_profiling_phase, 610 PhasedProcessDataSnapshotMap* phased_snapshots, 611 BirthCountMap* birth_counts) { 612 // Get copy of data, so that the data will not change during the iterations 613 // and processing. 614 BirthMap birth_map; 615 DeathsSnapshot deaths; 616 SnapshotMaps(current_profiling_phase, &birth_map, &deaths); 617 618 for (const auto& birth : birth_map) { 619 (*birth_counts)[birth.second] += birth.second->birth_count(); 620 } 621 622 for (const auto& death : deaths) { 623 (*birth_counts)[death.first] -= death.first->birth_count(); 624 625 // For the current death data, walk through all its snapshots, starting from 626 // the current one, then from the previous profiling phase etc., and for 627 // each snapshot calculate the delta between the snapshot and the previous 628 // phase, if any. Store the deltas in the result. 629 for (const DeathDataPhaseSnapshot* phase = &death.second; phase; 630 phase = phase->prev) { 631 const DeathDataSnapshot& death_data = 632 phase->prev ? phase->death_data.Delta(phase->prev->death_data) 633 : phase->death_data; 634 635 if (death_data.count > 0) { 636 (*phased_snapshots)[phase->profiling_phase].tasks.push_back( 637 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data, 638 thread_name())); 639 } 640 } 641 } 642 } 643 644 // This may be called from another thread. 645 void ThreadData::SnapshotMaps(int profiling_phase, 646 BirthMap* birth_map, 647 DeathsSnapshot* deaths) { 648 base::AutoLock lock(map_lock_); 649 650 for (const auto& birth : birth_map_) 651 (*birth_map)[birth.first] = birth.second; 652 653 for (const auto& death : death_map_) { 654 deaths->push_back(std::make_pair( 655 death.first, 656 DeathDataPhaseSnapshot(profiling_phase, death.second.count(), 657 death.second.run_duration_sum(), 658 death.second.run_duration_max(), 659 death.second.run_duration_sample(), 660 death.second.queue_duration_sum(), 661 death.second.queue_duration_max(), 662 death.second.queue_duration_sample(), 663 death.second.last_phase_snapshot()))); 664 } 665 } 666 667 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) { 668 base::AutoLock lock(map_lock_); 669 670 for (auto& death : death_map_) { 671 death.second.OnProfilingPhaseCompleted(profiling_phase); 672 } 673 } 674 675 void ThreadData::EnsureTlsInitialization() { 676 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) 677 return; // Someone else did the initialization. 678 // Due to racy lazy initialization in tests, we'll need to recheck status_ 679 // after we acquire the lock. 680 681 // Ensure that we don't double initialize tls. We are called when single 682 // threaded in the product, but some tests may be racy and lazy about our 683 // initialization. 684 base::AutoLock lock(*list_lock_.Pointer()); 685 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) 686 return; // Someone raced in here and beat us. 687 688 // Perform the "real" TLS initialization now, and leave it intact through 689 // process termination. 690 if (!tls_index_.initialized()) { // Testing may have initialized this. 691 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED); 692 tls_index_.Initialize(&ThreadData::OnThreadTermination); 693 DCHECK(tls_index_.initialized()); 694 } else { 695 // TLS was initialzed for us earlier. 696 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS); 697 } 698 699 // Incarnation counter is only significant to testing, as it otherwise will 700 // never again change in this process. 701 ++incarnation_counter_; 702 703 // The lock is not critical for setting status_, but it doesn't hurt. It also 704 // ensures that if we have a racy initialization, that we'll bail as soon as 705 // we get the lock earlier in this method. 706 base::subtle::Release_Store(&status_, kInitialStartupState); 707 DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED); 708 } 709 710 // static 711 void ThreadData::InitializeAndSetTrackingStatus(Status status) { 712 DCHECK_GE(status, DEACTIVATED); 713 DCHECK_LE(status, PROFILING_ACTIVE); 714 715 EnsureTlsInitialization(); // No-op if already initialized. 716 717 if (status > DEACTIVATED) 718 status = PROFILING_ACTIVE; 719 base::subtle::Release_Store(&status_, status); 720 } 721 722 // static 723 ThreadData::Status ThreadData::status() { 724 return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_)); 725 } 726 727 // static 728 bool ThreadData::TrackingStatus() { 729 return base::subtle::Acquire_Load(&status_) > DEACTIVATED; 730 } 731 732 // static 733 void ThreadData::EnableProfilerTiming() { 734 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING); 735 } 736 737 // static 738 TrackedTime ThreadData::Now() { 739 if (now_function_for_testing_) 740 return TrackedTime::FromMilliseconds((*now_function_for_testing_)()); 741 if (IsProfilerTimingEnabled() && TrackingStatus()) 742 return TrackedTime::Now(); 743 return TrackedTime(); // Super fast when disabled, or not compiled. 744 } 745 746 // static 747 void ThreadData::EnsureCleanupWasCalled(int /*major_threads_shutdown_count*/) { 748 base::AutoLock lock(*list_lock_.Pointer()); 749 if (worker_thread_data_creation_count_ == 0) 750 return; // We haven't really run much, and couldn't have leaked. 751 752 // TODO(jar): until this is working on XP, don't run the real test. 753 #if 0 754 // Verify that we've at least shutdown/cleanup the major namesd threads. The 755 // caller should tell us how many thread shutdowns should have taken place by 756 // now. 757 CHECK_GT(cleanup_count_, major_threads_shutdown_count); 758 #endif 759 } 760 761 // static 762 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { 763 // This is only called from test code, where we need to cleanup so that 764 // additional tests can be run. 765 // We must be single threaded... but be careful anyway. 766 InitializeAndSetTrackingStatus(DEACTIVATED); 767 768 ThreadData* thread_data_list; 769 { 770 base::AutoLock lock(*list_lock_.Pointer()); 771 thread_data_list = all_thread_data_list_head_; 772 all_thread_data_list_head_ = NULL; 773 ++incarnation_counter_; 774 // To be clean, break apart the retired worker list (though we leak them). 775 while (first_retired_worker_) { 776 ThreadData* worker = first_retired_worker_; 777 CHECK_GT(worker->worker_thread_number_, 0); 778 first_retired_worker_ = worker->next_retired_worker_; 779 worker->next_retired_worker_ = NULL; 780 } 781 } 782 783 // Put most global static back in pristine shape. 784 worker_thread_data_creation_count_ = 0; 785 cleanup_count_ = 0; 786 tls_index_.Set(NULL); 787 // Almost UNINITIALIZED. 788 base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS); 789 790 // To avoid any chance of racing in unit tests, which is the only place we 791 // call this function, we may sometimes leak all the data structures we 792 // recovered, as they may still be in use on threads from prior tests! 793 if (leak) { 794 ThreadData* thread_data = thread_data_list; 795 while (thread_data) { 796 ANNOTATE_LEAKING_OBJECT_PTR(thread_data); 797 thread_data = thread_data->next(); 798 } 799 return; 800 } 801 802 // When we want to cleanup (on a single thread), here is what we do. 803 804 // Do actual recursive delete in all ThreadData instances. 805 while (thread_data_list) { 806 ThreadData* next_thread_data = thread_data_list; 807 thread_data_list = thread_data_list->next(); 808 809 for (BirthMap::iterator it = next_thread_data->birth_map_.begin(); 810 next_thread_data->birth_map_.end() != it; ++it) 811 delete it->second; // Delete the Birth Records. 812 delete next_thread_data; // Includes all Death Records. 813 } 814 } 815 816 //------------------------------------------------------------------------------ 817 TaskStopwatch::TaskStopwatch() 818 : wallclock_duration_ms_(0), 819 current_thread_data_(NULL), 820 excluded_duration_ms_(0), 821 parent_(NULL) { 822 #if DCHECK_IS_ON() 823 state_ = CREATED; 824 child_ = NULL; 825 #endif 826 } 827 828 TaskStopwatch::~TaskStopwatch() { 829 #if DCHECK_IS_ON() 830 DCHECK(state_ != RUNNING); 831 DCHECK(child_ == NULL); 832 #endif 833 } 834 835 void TaskStopwatch::Start() { 836 #if DCHECK_IS_ON() 837 DCHECK(state_ == CREATED); 838 state_ = RUNNING; 839 #endif 840 841 start_time_ = ThreadData::Now(); 842 843 current_thread_data_ = ThreadData::Get(); 844 if (!current_thread_data_) 845 return; 846 847 parent_ = current_thread_data_->current_stopwatch_; 848 #if DCHECK_IS_ON() 849 if (parent_) { 850 DCHECK(parent_->state_ == RUNNING); 851 DCHECK(parent_->child_ == NULL); 852 parent_->child_ = this; 853 } 854 #endif 855 current_thread_data_->current_stopwatch_ = this; 856 } 857 858 void TaskStopwatch::Stop() { 859 const TrackedTime end_time = ThreadData::Now(); 860 #if DCHECK_IS_ON() 861 DCHECK(state_ == RUNNING); 862 state_ = STOPPED; 863 DCHECK(child_ == NULL); 864 #endif 865 866 if (!start_time_.is_null() && !end_time.is_null()) { 867 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds(); 868 } 869 870 if (!current_thread_data_) 871 return; 872 873 DCHECK(current_thread_data_->current_stopwatch_ == this); 874 current_thread_data_->current_stopwatch_ = parent_; 875 if (!parent_) 876 return; 877 878 #if DCHECK_IS_ON() 879 DCHECK(parent_->state_ == RUNNING); 880 DCHECK(parent_->child_ == this); 881 parent_->child_ = NULL; 882 #endif 883 parent_->excluded_duration_ms_ += wallclock_duration_ms_; 884 parent_ = NULL; 885 } 886 887 TrackedTime TaskStopwatch::StartTime() const { 888 #if DCHECK_IS_ON() 889 DCHECK(state_ != CREATED); 890 #endif 891 892 return start_time_; 893 } 894 895 int32_t TaskStopwatch::RunDurationMs() const { 896 #if DCHECK_IS_ON() 897 DCHECK(state_ == STOPPED); 898 #endif 899 900 return wallclock_duration_ms_ - excluded_duration_ms_; 901 } 902 903 ThreadData* TaskStopwatch::GetThreadData() const { 904 #if DCHECK_IS_ON() 905 DCHECK(state_ != CREATED); 906 #endif 907 908 return current_thread_data_; 909 } 910 911 //------------------------------------------------------------------------------ 912 // DeathDataPhaseSnapshot 913 914 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot( 915 int profiling_phase, 916 int count, 917 int32_t run_duration_sum, 918 int32_t run_duration_max, 919 int32_t run_duration_sample, 920 int32_t queue_duration_sum, 921 int32_t queue_duration_max, 922 int32_t queue_duration_sample, 923 const DeathDataPhaseSnapshot* prev) 924 : profiling_phase(profiling_phase), 925 death_data(count, 926 run_duration_sum, 927 run_duration_max, 928 run_duration_sample, 929 queue_duration_sum, 930 queue_duration_max, 931 queue_duration_sample), 932 prev(prev) {} 933 934 //------------------------------------------------------------------------------ 935 // TaskSnapshot 936 937 TaskSnapshot::TaskSnapshot() { 938 } 939 940 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth, 941 const DeathDataSnapshot& death_data, 942 const std::string& death_thread_name) 943 : birth(birth), 944 death_data(death_data), 945 death_thread_name(death_thread_name) { 946 } 947 948 TaskSnapshot::~TaskSnapshot() { 949 } 950 951 //------------------------------------------------------------------------------ 952 // ProcessDataPhaseSnapshot 953 954 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() { 955 } 956 957 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot( 958 const ProcessDataPhaseSnapshot& other) = default; 959 960 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() { 961 } 962 963 //------------------------------------------------------------------------------ 964 // ProcessDataPhaseSnapshot 965 966 ProcessDataSnapshot::ProcessDataSnapshot() 967 #if !defined(OS_NACL) 968 : process_id(base::GetCurrentProcId()) { 969 #else 970 : process_id(base::kNullProcessId) { 971 #endif 972 } 973 974 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) = 975 default; 976 977 ProcessDataSnapshot::~ProcessDataSnapshot() { 978 } 979 980 } // namespace tracked_objects 981