1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/tracked_objects.h" 6 7 #include <limits.h> 8 #include <stdlib.h> 9 10 #include "base/atomicops.h" 11 #include "base/base_switches.h" 12 #include "base/command_line.h" 13 #include "base/compiler_specific.h" 14 #include "base/logging.h" 15 #include "base/process/process_handle.h" 16 #include "base/profiler/alternate_timer.h" 17 #include "base/strings/stringprintf.h" 18 #include "base/tracking_info.h" 19 #include "build/build_config.h" 20 #include "third_party/valgrind/memcheck.h" 21 22 using base::TimeDelta; 23 24 namespace base { 25 class TimeDelta; 26 } 27 28 namespace tracked_objects { 29 30 namespace { 31 // When ThreadData is first initialized, should we start in an ACTIVE state to 32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so 33 // that we only record after parsing the command line flag --enable-tracking. 34 // Note that the flag may force either state, so this really controls only the 35 // period of time up until that flag is parsed. If there is no flag seen, then 36 // this state may prevail for much or all of the process lifetime. 37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE; 38 39 // Control whether an alternate time source (Now() function) is supported by 40 // the ThreadData class. This compile time flag should be set to true if we 41 // want other modules (such as a memory allocator, or a thread-specific CPU time 42 // clock) to be able to provide a thread-specific Now() function. Without this 43 // compile-time flag, the code will only support the wall-clock time. This flag 44 // can be flipped to efficiently disable this path (if there is a performance 45 // problem with its presence). 46 static const bool kAllowAlternateTimeSourceHandling = true; 47 48 // Possible states of the profiler timing enabledness. 49 enum { 50 UNDEFINED_TIMING, 51 ENABLED_TIMING, 52 DISABLED_TIMING, 53 }; 54 55 // State of the profiler timing enabledness. 56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING; 57 58 // Returns whether profiler timing is enabled. The default is true, but this 59 // may be overridden by a command-line flag. Some platforms may 60 // programmatically set this command-line flag to the "off" value if it's not 61 // specified. 62 // This in turn can be overridden by explicitly calling 63 // ThreadData::EnableProfilerTiming, say, based on a field trial. 64 inline bool IsProfilerTimingEnabled() { 65 // Reading |g_profiler_timing_enabled| is done without barrier because 66 // multiple initialization is not an issue while the barrier can be relatively 67 // costly given that this method is sometimes called in a tight loop. 68 base::subtle::Atomic32 current_timing_enabled = 69 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled); 70 if (current_timing_enabled == UNDEFINED_TIMING) { 71 if (!base::CommandLine::InitializedForCurrentProcess()) 72 return true; 73 current_timing_enabled = 74 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( 75 switches::kProfilerTiming) == 76 switches::kProfilerTimingDisabledValue) 77 ? DISABLED_TIMING 78 : ENABLED_TIMING; 79 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, 80 current_timing_enabled); 81 } 82 return current_timing_enabled == ENABLED_TIMING; 83 } 84 85 } // namespace 86 87 //------------------------------------------------------------------------------ 88 // DeathData tallies durations when a death takes place. 89 90 DeathData::DeathData() 91 : count_(0), 92 sample_probability_count_(0), 93 run_duration_sum_(0), 94 queue_duration_sum_(0), 95 run_duration_max_(0), 96 queue_duration_max_(0), 97 run_duration_sample_(0), 98 queue_duration_sample_(0), 99 last_phase_snapshot_(nullptr) { 100 } 101 102 DeathData::DeathData(const DeathData& other) 103 : count_(other.count_), 104 sample_probability_count_(other.sample_probability_count_), 105 run_duration_sum_(other.run_duration_sum_), 106 queue_duration_sum_(other.queue_duration_sum_), 107 run_duration_max_(other.run_duration_max_), 108 queue_duration_max_(other.queue_duration_max_), 109 run_duration_sample_(other.run_duration_sample_), 110 queue_duration_sample_(other.queue_duration_sample_), 111 last_phase_snapshot_(nullptr) { 112 // This constructor will be used by std::map when adding new DeathData values 113 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't 114 // need to worry about ownership transfer. 115 DCHECK(other.last_phase_snapshot_ == nullptr); 116 } 117 118 DeathData::~DeathData() { 119 while (last_phase_snapshot_) { 120 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_; 121 last_phase_snapshot_ = snapshot->prev; 122 delete snapshot; 123 } 124 } 125 126 // TODO(jar): I need to see if this macro to optimize branching is worth using. 127 // 128 // This macro has no branching, so it is surely fast, and is equivalent to: 129 // if (assign_it) 130 // target = source; 131 // We use a macro rather than a template to force this to inline. 132 // Related code for calculating max is discussed on the web. 133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \ 134 ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it)) 135 136 void DeathData::RecordDeath(const int32_t queue_duration, 137 const int32_t run_duration, 138 const uint32_t random_number) { 139 // We'll just clamp at INT_MAX, but we should note this in the UI as such. 140 if (count_ < INT_MAX) 141 base::subtle::NoBarrier_Store(&count_, count_ + 1); 142 143 int sample_probability_count = 144 base::subtle::NoBarrier_Load(&sample_probability_count_); 145 if (sample_probability_count < INT_MAX) 146 ++sample_probability_count; 147 base::subtle::NoBarrier_Store(&sample_probability_count_, 148 sample_probability_count); 149 150 base::subtle::NoBarrier_Store(&queue_duration_sum_, 151 queue_duration_sum_ + queue_duration); 152 base::subtle::NoBarrier_Store(&run_duration_sum_, 153 run_duration_sum_ + run_duration); 154 155 if (queue_duration_max() < queue_duration) 156 base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration); 157 if (run_duration_max() < run_duration) 158 base::subtle::NoBarrier_Store(&run_duration_max_, run_duration); 159 160 // Take a uniformly distributed sample over all durations ever supplied during 161 // the current profiling phase. 162 // The probability that we (instead) use this new sample is 163 // 1/sample_probability_count_. This results in a completely uniform selection 164 // of the sample (at least when we don't clamp sample_probability_count_... 165 // but that should be inconsequentially likely). We ignore the fact that we 166 // correlated our selection of a sample to the run and queue times (i.e., we 167 // used them to generate random_number). 168 CHECK_GT(sample_probability_count, 0); 169 if (0 == (random_number % sample_probability_count)) { 170 base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration); 171 base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration); 172 } 173 } 174 175 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) { 176 // Snapshotting and storing current state. 177 last_phase_snapshot_ = new DeathDataPhaseSnapshot( 178 profiling_phase, count(), run_duration_sum(), run_duration_max(), 179 run_duration_sample(), queue_duration_sum(), queue_duration_max(), 180 queue_duration_sample(), last_phase_snapshot_); 181 182 // Not touching fields for which a delta can be computed by comparing with a 183 // snapshot from the previous phase. Resetting other fields. Sample values 184 // will be reset upon next death recording because sample_probability_count_ 185 // is set to 0. 186 // We avoid resetting to 0 in favor of deltas whenever possible. The reason 187 // is that for incrementable fields, resetting to 0 from the snapshot thread 188 // potentially in parallel with incrementing in the death thread may result in 189 // significant data corruption that has a potential to grow with time. Not 190 // resetting incrementable fields and using deltas will cause any 191 // off-by-little corruptions to be likely fixed at the next snapshot. 192 // The max values are not incrementable, and cannot be deduced using deltas 193 // for a given phase. Hence, we have to reset them to 0. But the potential 194 // damage is limited to getting the previous phase's max to apply for the next 195 // phase, and the error doesn't have a potential to keep growing with new 196 // resets. 197 // sample_probability_count_ is incrementable, but must be reset to 0 at the 198 // phase end, so that we start a new uniformly randomized sample selection 199 // after the reset. These fields are updated using atomics. However, race 200 // conditions are possible since these are updated individually and not 201 // together atomically, resulting in the values being mutually inconsistent. 202 // The damage is limited to selecting a wrong sample, which is not something 203 // that can cause accumulating or cascading effects. 204 // If there were no inconsistencies caused by race conditions, we never send a 205 // sample for the previous phase in the next phase's snapshot because 206 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count. 207 base::subtle::NoBarrier_Store(&sample_probability_count_, 0); 208 base::subtle::NoBarrier_Store(&run_duration_max_, 0); 209 base::subtle::NoBarrier_Store(&queue_duration_max_, 0); 210 } 211 212 //------------------------------------------------------------------------------ 213 DeathDataSnapshot::DeathDataSnapshot() 214 : count(-1), 215 run_duration_sum(-1), 216 run_duration_max(-1), 217 run_duration_sample(-1), 218 queue_duration_sum(-1), 219 queue_duration_max(-1), 220 queue_duration_sample(-1) { 221 } 222 223 DeathDataSnapshot::DeathDataSnapshot(int count, 224 int32_t run_duration_sum, 225 int32_t run_duration_max, 226 int32_t run_duration_sample, 227 int32_t queue_duration_sum, 228 int32_t queue_duration_max, 229 int32_t queue_duration_sample) 230 : count(count), 231 run_duration_sum(run_duration_sum), 232 run_duration_max(run_duration_max), 233 run_duration_sample(run_duration_sample), 234 queue_duration_sum(queue_duration_sum), 235 queue_duration_max(queue_duration_max), 236 queue_duration_sample(queue_duration_sample) {} 237 238 DeathDataSnapshot::~DeathDataSnapshot() { 239 } 240 241 DeathDataSnapshot DeathDataSnapshot::Delta( 242 const DeathDataSnapshot& older) const { 243 return DeathDataSnapshot(count - older.count, 244 run_duration_sum - older.run_duration_sum, 245 run_duration_max, run_duration_sample, 246 queue_duration_sum - older.queue_duration_sum, 247 queue_duration_max, queue_duration_sample); 248 } 249 250 //------------------------------------------------------------------------------ 251 BirthOnThread::BirthOnThread(const Location& location, 252 const ThreadData& current) 253 : location_(location), 254 birth_thread_(¤t) { 255 } 256 257 //------------------------------------------------------------------------------ 258 BirthOnThreadSnapshot::BirthOnThreadSnapshot() { 259 } 260 261 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth) 262 : location(birth.location()), 263 thread_name(birth.birth_thread()->thread_name()) { 264 } 265 266 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() { 267 } 268 269 //------------------------------------------------------------------------------ 270 Births::Births(const Location& location, const ThreadData& current) 271 : BirthOnThread(location, current), 272 birth_count_(1) { } 273 274 int Births::birth_count() const { return birth_count_; } 275 276 void Births::RecordBirth() { ++birth_count_; } 277 278 //------------------------------------------------------------------------------ 279 // ThreadData maintains the central data for all births and deaths on a single 280 // thread. 281 282 // TODO(jar): We should pull all these static vars together, into a struct, and 283 // optimize layout so that we benefit from locality of reference during accesses 284 // to them. 285 286 // static 287 NowFunction* ThreadData::now_function_ = NULL; 288 289 // static 290 bool ThreadData::now_function_is_time_ = false; 291 292 // A TLS slot which points to the ThreadData instance for the current thread. 293 // We do a fake initialization here (zeroing out data), and then the real 294 // in-place construction happens when we call tls_index_.Initialize(). 295 // static 296 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER; 297 298 // static 299 int ThreadData::worker_thread_data_creation_count_ = 0; 300 301 // static 302 int ThreadData::cleanup_count_ = 0; 303 304 // static 305 int ThreadData::incarnation_counter_ = 0; 306 307 // static 308 ThreadData* ThreadData::all_thread_data_list_head_ = NULL; 309 310 // static 311 ThreadData* ThreadData::first_retired_worker_ = NULL; 312 313 // static 314 base::LazyInstance<base::Lock>::Leaky 315 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER; 316 317 // static 318 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED; 319 320 ThreadData::ThreadData(const std::string& suggested_name) 321 : next_(NULL), 322 next_retired_worker_(NULL), 323 worker_thread_number_(0), 324 incarnation_count_for_pool_(-1), 325 current_stopwatch_(NULL) { 326 DCHECK_GE(suggested_name.size(), 0u); 327 thread_name_ = suggested_name; 328 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. 329 } 330 331 ThreadData::ThreadData(int thread_number) 332 : next_(NULL), 333 next_retired_worker_(NULL), 334 worker_thread_number_(thread_number), 335 incarnation_count_for_pool_(-1), 336 current_stopwatch_(NULL) { 337 CHECK_GT(thread_number, 0); 338 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number); 339 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. 340 } 341 342 ThreadData::~ThreadData() { 343 } 344 345 void ThreadData::PushToHeadOfList() { 346 // Toss in a hint of randomness (atop the uniniitalized value). 347 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_, 348 sizeof(random_number_)); 349 MSAN_UNPOISON(&random_number_, sizeof(random_number_)); 350 random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0)); 351 random_number_ ^= (Now() - TrackedTime()).InMilliseconds(); 352 353 DCHECK(!next_); 354 base::AutoLock lock(*list_lock_.Pointer()); 355 incarnation_count_for_pool_ = incarnation_counter_; 356 next_ = all_thread_data_list_head_; 357 all_thread_data_list_head_ = this; 358 } 359 360 // static 361 ThreadData* ThreadData::first() { 362 base::AutoLock lock(*list_lock_.Pointer()); 363 return all_thread_data_list_head_; 364 } 365 366 ThreadData* ThreadData::next() const { return next_; } 367 368 // static 369 void ThreadData::InitializeThreadContext(const std::string& suggested_name) { 370 Initialize(); 371 ThreadData* current_thread_data = 372 reinterpret_cast<ThreadData*>(tls_index_.Get()); 373 if (current_thread_data) 374 return; // Browser tests instigate this. 375 current_thread_data = new ThreadData(suggested_name); 376 tls_index_.Set(current_thread_data); 377 } 378 379 // static 380 ThreadData* ThreadData::Get() { 381 if (!tls_index_.initialized()) 382 return NULL; // For unittests only. 383 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get()); 384 if (registered) 385 return registered; 386 387 // We must be a worker thread, since we didn't pre-register. 388 ThreadData* worker_thread_data = NULL; 389 int worker_thread_number = 0; 390 { 391 base::AutoLock lock(*list_lock_.Pointer()); 392 if (first_retired_worker_) { 393 worker_thread_data = first_retired_worker_; 394 first_retired_worker_ = first_retired_worker_->next_retired_worker_; 395 worker_thread_data->next_retired_worker_ = NULL; 396 } else { 397 worker_thread_number = ++worker_thread_data_creation_count_; 398 } 399 } 400 401 // If we can't find a previously used instance, then we have to create one. 402 if (!worker_thread_data) { 403 DCHECK_GT(worker_thread_number, 0); 404 worker_thread_data = new ThreadData(worker_thread_number); 405 } 406 DCHECK_GT(worker_thread_data->worker_thread_number_, 0); 407 408 tls_index_.Set(worker_thread_data); 409 return worker_thread_data; 410 } 411 412 // static 413 void ThreadData::OnThreadTermination(void* thread_data) { 414 DCHECK(thread_data); // TLS should *never* call us with a NULL. 415 // We must NOT do any allocations during this callback. There is a chance 416 // that the allocator is no longer active on this thread. 417 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup(); 418 } 419 420 void ThreadData::OnThreadTerminationCleanup() { 421 // The list_lock_ was created when we registered the callback, so it won't be 422 // allocated here despite the lazy reference. 423 base::AutoLock lock(*list_lock_.Pointer()); 424 if (incarnation_counter_ != incarnation_count_for_pool_) 425 return; // ThreadData was constructed in an earlier unit test. 426 ++cleanup_count_; 427 // Only worker threads need to be retired and reused. 428 if (!worker_thread_number_) { 429 return; 430 } 431 // We must NOT do any allocations during this callback. 432 // Using the simple linked lists avoids all allocations. 433 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL)); 434 this->next_retired_worker_ = first_retired_worker_; 435 first_retired_worker_ = this; 436 } 437 438 // static 439 void ThreadData::Snapshot(int current_profiling_phase, 440 ProcessDataSnapshot* process_data_snapshot) { 441 // Get an unchanging copy of a ThreadData list. 442 ThreadData* my_list = ThreadData::first(); 443 444 // Gather data serially. 445 // This hackish approach *can* get some slightly corrupt tallies, as we are 446 // grabbing values without the protection of a lock, but it has the advantage 447 // of working even with threads that don't have message loops. If a user 448 // sees any strangeness, they can always just run their stats gathering a 449 // second time. 450 BirthCountMap birth_counts; 451 for (ThreadData* thread_data = my_list; thread_data; 452 thread_data = thread_data->next()) { 453 thread_data->SnapshotExecutedTasks(current_profiling_phase, 454 &process_data_snapshot->phased_snapshots, 455 &birth_counts); 456 } 457 458 // Add births that are still active -- i.e. objects that have tallied a birth, 459 // but have not yet tallied a matching death, and hence must be either 460 // running, queued up, or being held in limbo for future posting. 461 auto* current_phase_tasks = 462 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks; 463 for (const auto& birth_count : birth_counts) { 464 if (birth_count.second > 0) { 465 current_phase_tasks->push_back( 466 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first), 467 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0), 468 "Still_Alive")); 469 } 470 } 471 } 472 473 // static 474 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) { 475 // Get an unchanging copy of a ThreadData list. 476 ThreadData* my_list = ThreadData::first(); 477 478 // Add snapshots for all instances of death data in all threads serially. 479 // This hackish approach *can* get some slightly corrupt tallies, as we are 480 // grabbing values without the protection of a lock, but it has the advantage 481 // of working even with threads that don't have message loops. Any corruption 482 // shouldn't cause "cascading damage" to anything else (in later phases). 483 for (ThreadData* thread_data = my_list; thread_data; 484 thread_data = thread_data->next()) { 485 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase); 486 } 487 } 488 489 Births* ThreadData::TallyABirth(const Location& location) { 490 BirthMap::iterator it = birth_map_.find(location); 491 Births* child; 492 if (it != birth_map_.end()) { 493 child = it->second; 494 child->RecordBirth(); 495 } else { 496 child = new Births(location, *this); // Leak this. 497 // Lock since the map may get relocated now, and other threads sometimes 498 // snapshot it (but they lock before copying it). 499 base::AutoLock lock(map_lock_); 500 birth_map_[location] = child; 501 } 502 503 return child; 504 } 505 506 void ThreadData::TallyADeath(const Births& births, 507 int32_t queue_duration, 508 const TaskStopwatch& stopwatch) { 509 int32_t run_duration = stopwatch.RunDurationMs(); 510 511 // Stir in some randomness, plus add constant in case durations are zero. 512 const uint32_t kSomePrimeNumber = 2147483647; 513 random_number_ += queue_duration + run_duration + kSomePrimeNumber; 514 // An address is going to have some randomness to it as well ;-). 515 random_number_ ^= 516 static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0)); 517 518 // We don't have queue durations without OS timer. OS timer is automatically 519 // used for task-post-timing, so the use of an alternate timer implies all 520 // queue times are invalid, unless it was explicitly said that we can trust 521 // the alternate timer. 522 if (kAllowAlternateTimeSourceHandling && 523 now_function_ && 524 !now_function_is_time_) { 525 queue_duration = 0; 526 } 527 528 DeathMap::iterator it = death_map_.find(&births); 529 DeathData* death_data; 530 if (it != death_map_.end()) { 531 death_data = &it->second; 532 } else { 533 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now. 534 death_data = &death_map_[&births]; 535 } // Release lock ASAP. 536 death_data->RecordDeath(queue_duration, run_duration, random_number_); 537 } 538 539 // static 540 Births* ThreadData::TallyABirthIfActive(const Location& location) { 541 if (!TrackingStatus()) 542 return NULL; 543 ThreadData* current_thread_data = Get(); 544 if (!current_thread_data) 545 return NULL; 546 return current_thread_data->TallyABirth(location); 547 } 548 549 // static 550 void ThreadData::TallyRunOnNamedThreadIfTracking( 551 const base::TrackingInfo& completed_task, 552 const TaskStopwatch& stopwatch) { 553 // Even if we have been DEACTIVATED, we will process any pending births so 554 // that our data structures (which counted the outstanding births) remain 555 // consistent. 556 const Births* births = completed_task.birth_tally; 557 if (!births) 558 return; 559 ThreadData* current_thread_data = stopwatch.GetThreadData(); 560 if (!current_thread_data) 561 return; 562 563 // Watch out for a race where status_ is changing, and hence one or both 564 // of start_of_run or end_of_run is zero. In that case, we didn't bother to 565 // get a time value since we "weren't tracking" and we were trying to be 566 // efficient by not calling for a genuine time value. For simplicity, we'll 567 // use a default zero duration when we can't calculate a true value. 568 TrackedTime start_of_run = stopwatch.StartTime(); 569 int32_t queue_duration = 0; 570 if (!start_of_run.is_null()) { 571 queue_duration = (start_of_run - completed_task.EffectiveTimePosted()) 572 .InMilliseconds(); 573 } 574 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 575 } 576 577 // static 578 void ThreadData::TallyRunOnWorkerThreadIfTracking( 579 const Births* births, 580 const TrackedTime& time_posted, 581 const TaskStopwatch& stopwatch) { 582 // Even if we have been DEACTIVATED, we will process any pending births so 583 // that our data structures (which counted the outstanding births) remain 584 // consistent. 585 if (!births) 586 return; 587 588 // TODO(jar): Support the option to coalesce all worker-thread activity under 589 // one ThreadData instance that uses locks to protect *all* access. This will 590 // reduce memory (making it provably bounded), but run incrementally slower 591 // (since we'll use locks on TallyABirth and TallyADeath). The good news is 592 // that the locks on TallyADeath will be *after* the worker thread has run, 593 // and hence nothing will be waiting for the completion (... besides some 594 // other thread that might like to run). Also, the worker threads tasks are 595 // generally longer, and hence the cost of the lock may perchance be amortized 596 // over the long task's lifetime. 597 ThreadData* current_thread_data = stopwatch.GetThreadData(); 598 if (!current_thread_data) 599 return; 600 601 TrackedTime start_of_run = stopwatch.StartTime(); 602 int32_t queue_duration = 0; 603 if (!start_of_run.is_null()) { 604 queue_duration = (start_of_run - time_posted).InMilliseconds(); 605 } 606 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 607 } 608 609 // static 610 void ThreadData::TallyRunInAScopedRegionIfTracking( 611 const Births* births, 612 const TaskStopwatch& stopwatch) { 613 // Even if we have been DEACTIVATED, we will process any pending births so 614 // that our data structures (which counted the outstanding births) remain 615 // consistent. 616 if (!births) 617 return; 618 619 ThreadData* current_thread_data = stopwatch.GetThreadData(); 620 if (!current_thread_data) 621 return; 622 623 int32_t queue_duration = 0; 624 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 625 } 626 627 void ThreadData::SnapshotExecutedTasks( 628 int current_profiling_phase, 629 PhasedProcessDataSnapshotMap* phased_snapshots, 630 BirthCountMap* birth_counts) { 631 // Get copy of data, so that the data will not change during the iterations 632 // and processing. 633 BirthMap birth_map; 634 DeathsSnapshot deaths; 635 SnapshotMaps(current_profiling_phase, &birth_map, &deaths); 636 637 for (const auto& birth : birth_map) { 638 (*birth_counts)[birth.second] += birth.second->birth_count(); 639 } 640 641 for (const auto& death : deaths) { 642 (*birth_counts)[death.first] -= death.first->birth_count(); 643 644 // For the current death data, walk through all its snapshots, starting from 645 // the current one, then from the previous profiling phase etc., and for 646 // each snapshot calculate the delta between the snapshot and the previous 647 // phase, if any. Store the deltas in the result. 648 for (const DeathDataPhaseSnapshot* phase = &death.second; phase; 649 phase = phase->prev) { 650 const DeathDataSnapshot& death_data = 651 phase->prev ? phase->death_data.Delta(phase->prev->death_data) 652 : phase->death_data; 653 654 if (death_data.count > 0) { 655 (*phased_snapshots)[phase->profiling_phase].tasks.push_back( 656 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data, 657 thread_name())); 658 } 659 } 660 } 661 } 662 663 // This may be called from another thread. 664 void ThreadData::SnapshotMaps(int profiling_phase, 665 BirthMap* birth_map, 666 DeathsSnapshot* deaths) { 667 base::AutoLock lock(map_lock_); 668 669 for (const auto& birth : birth_map_) 670 (*birth_map)[birth.first] = birth.second; 671 672 for (const auto& death : death_map_) { 673 deaths->push_back(std::make_pair( 674 death.first, 675 DeathDataPhaseSnapshot(profiling_phase, death.second.count(), 676 death.second.run_duration_sum(), 677 death.second.run_duration_max(), 678 death.second.run_duration_sample(), 679 death.second.queue_duration_sum(), 680 death.second.queue_duration_max(), 681 death.second.queue_duration_sample(), 682 death.second.last_phase_snapshot()))); 683 } 684 } 685 686 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) { 687 base::AutoLock lock(map_lock_); 688 689 for (auto& death : death_map_) { 690 death.second.OnProfilingPhaseCompleted(profiling_phase); 691 } 692 } 693 694 static void OptionallyInitializeAlternateTimer() { 695 NowFunction* alternate_time_source = GetAlternateTimeSource(); 696 if (alternate_time_source) 697 ThreadData::SetAlternateTimeSource(alternate_time_source); 698 } 699 700 void ThreadData::Initialize() { 701 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) 702 return; // Someone else did the initialization. 703 // Due to racy lazy initialization in tests, we'll need to recheck status_ 704 // after we acquire the lock. 705 706 // Ensure that we don't double initialize tls. We are called when single 707 // threaded in the product, but some tests may be racy and lazy about our 708 // initialization. 709 base::AutoLock lock(*list_lock_.Pointer()); 710 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) 711 return; // Someone raced in here and beat us. 712 713 // Put an alternate timer in place if the environment calls for it, such as 714 // for tracking TCMalloc allocations. This insertion is idempotent, so we 715 // don't mind if there is a race, and we'd prefer not to be in a lock while 716 // doing this work. 717 if (kAllowAlternateTimeSourceHandling) 718 OptionallyInitializeAlternateTimer(); 719 720 // Perform the "real" TLS initialization now, and leave it intact through 721 // process termination. 722 if (!tls_index_.initialized()) { // Testing may have initialized this. 723 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED); 724 tls_index_.Initialize(&ThreadData::OnThreadTermination); 725 DCHECK(tls_index_.initialized()); 726 } else { 727 // TLS was initialzed for us earlier. 728 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS); 729 } 730 731 // Incarnation counter is only significant to testing, as it otherwise will 732 // never again change in this process. 733 ++incarnation_counter_; 734 735 // The lock is not critical for setting status_, but it doesn't hurt. It also 736 // ensures that if we have a racy initialization, that we'll bail as soon as 737 // we get the lock earlier in this method. 738 base::subtle::Release_Store(&status_, kInitialStartupState); 739 DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED); 740 } 741 742 // static 743 void ThreadData::InitializeAndSetTrackingStatus(Status status) { 744 DCHECK_GE(status, DEACTIVATED); 745 DCHECK_LE(status, PROFILING_ACTIVE); 746 747 Initialize(); // No-op if already initialized. 748 749 if (status > DEACTIVATED) 750 status = PROFILING_ACTIVE; 751 base::subtle::Release_Store(&status_, status); 752 } 753 754 // static 755 ThreadData::Status ThreadData::status() { 756 return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_)); 757 } 758 759 // static 760 bool ThreadData::TrackingStatus() { 761 return base::subtle::Acquire_Load(&status_) > DEACTIVATED; 762 } 763 764 // static 765 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) { 766 DCHECK(now_function); 767 if (kAllowAlternateTimeSourceHandling) 768 now_function_ = now_function; 769 } 770 771 // static 772 void ThreadData::EnableProfilerTiming() { 773 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING); 774 } 775 776 // static 777 TrackedTime ThreadData::Now() { 778 if (kAllowAlternateTimeSourceHandling && now_function_) 779 return TrackedTime::FromMilliseconds((*now_function_)()); 780 if (IsProfilerTimingEnabled() && TrackingStatus()) 781 return TrackedTime::Now(); 782 return TrackedTime(); // Super fast when disabled, or not compiled. 783 } 784 785 // static 786 void ThreadData::EnsureCleanupWasCalled( 787 int /* major_threads_shutdown_count */) { 788 base::AutoLock lock(*list_lock_.Pointer()); 789 if (worker_thread_data_creation_count_ == 0) 790 return; // We haven't really run much, and couldn't have leaked. 791 792 // TODO(jar): until this is working on XP, don't run the real test. 793 #if 0 794 // Verify that we've at least shutdown/cleanup the major namesd threads. The 795 // caller should tell us how many thread shutdowns should have taken place by 796 // now. 797 CHECK_GT(cleanup_count_, major_threads_shutdown_count); 798 #endif 799 } 800 801 // static 802 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { 803 // This is only called from test code, where we need to cleanup so that 804 // additional tests can be run. 805 // We must be single threaded... but be careful anyway. 806 InitializeAndSetTrackingStatus(DEACTIVATED); 807 808 ThreadData* thread_data_list; 809 { 810 base::AutoLock lock(*list_lock_.Pointer()); 811 thread_data_list = all_thread_data_list_head_; 812 all_thread_data_list_head_ = NULL; 813 ++incarnation_counter_; 814 // To be clean, break apart the retired worker list (though we leak them). 815 while (first_retired_worker_) { 816 ThreadData* worker = first_retired_worker_; 817 CHECK_GT(worker->worker_thread_number_, 0); 818 first_retired_worker_ = worker->next_retired_worker_; 819 worker->next_retired_worker_ = NULL; 820 } 821 } 822 823 // Put most global static back in pristine shape. 824 worker_thread_data_creation_count_ = 0; 825 cleanup_count_ = 0; 826 tls_index_.Set(NULL); 827 // Almost UNINITIALIZED. 828 base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS); 829 830 // To avoid any chance of racing in unit tests, which is the only place we 831 // call this function, we may sometimes leak all the data structures we 832 // recovered, as they may still be in use on threads from prior tests! 833 if (leak) { 834 ThreadData* thread_data = thread_data_list; 835 while (thread_data) { 836 thread_data = thread_data->next(); 837 } 838 return; 839 } 840 841 // When we want to cleanup (on a single thread), here is what we do. 842 843 // Do actual recursive delete in all ThreadData instances. 844 while (thread_data_list) { 845 ThreadData* next_thread_data = thread_data_list; 846 thread_data_list = thread_data_list->next(); 847 848 for (BirthMap::iterator it = next_thread_data->birth_map_.begin(); 849 next_thread_data->birth_map_.end() != it; ++it) 850 delete it->second; // Delete the Birth Records. 851 delete next_thread_data; // Includes all Death Records. 852 } 853 } 854 855 //------------------------------------------------------------------------------ 856 TaskStopwatch::TaskStopwatch() 857 : wallclock_duration_ms_(0), 858 current_thread_data_(NULL), 859 excluded_duration_ms_(0), 860 parent_(NULL) { 861 #if DCHECK_IS_ON() 862 state_ = CREATED; 863 child_ = NULL; 864 #endif 865 } 866 867 TaskStopwatch::~TaskStopwatch() { 868 #if DCHECK_IS_ON() 869 DCHECK(state_ != RUNNING); 870 DCHECK(child_ == NULL); 871 #endif 872 } 873 874 void TaskStopwatch::Start() { 875 #if DCHECK_IS_ON() 876 DCHECK(state_ == CREATED); 877 state_ = RUNNING; 878 #endif 879 880 start_time_ = ThreadData::Now(); 881 882 current_thread_data_ = ThreadData::Get(); 883 if (!current_thread_data_) 884 return; 885 886 parent_ = current_thread_data_->current_stopwatch_; 887 #if DCHECK_IS_ON() 888 if (parent_) { 889 DCHECK(parent_->state_ == RUNNING); 890 DCHECK(parent_->child_ == NULL); 891 parent_->child_ = this; 892 } 893 #endif 894 current_thread_data_->current_stopwatch_ = this; 895 } 896 897 void TaskStopwatch::Stop() { 898 const TrackedTime end_time = ThreadData::Now(); 899 #if DCHECK_IS_ON() 900 DCHECK(state_ == RUNNING); 901 state_ = STOPPED; 902 DCHECK(child_ == NULL); 903 #endif 904 905 if (!start_time_.is_null() && !end_time.is_null()) { 906 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds(); 907 } 908 909 if (!current_thread_data_) 910 return; 911 912 DCHECK(current_thread_data_->current_stopwatch_ == this); 913 current_thread_data_->current_stopwatch_ = parent_; 914 if (!parent_) 915 return; 916 917 #if DCHECK_IS_ON() 918 DCHECK(parent_->state_ == RUNNING); 919 DCHECK(parent_->child_ == this); 920 parent_->child_ = NULL; 921 #endif 922 parent_->excluded_duration_ms_ += wallclock_duration_ms_; 923 parent_ = NULL; 924 } 925 926 TrackedTime TaskStopwatch::StartTime() const { 927 #if DCHECK_IS_ON() 928 DCHECK(state_ != CREATED); 929 #endif 930 931 return start_time_; 932 } 933 934 int32_t TaskStopwatch::RunDurationMs() const { 935 #if DCHECK_IS_ON() 936 DCHECK(state_ == STOPPED); 937 #endif 938 939 return wallclock_duration_ms_ - excluded_duration_ms_; 940 } 941 942 ThreadData* TaskStopwatch::GetThreadData() const { 943 #if DCHECK_IS_ON() 944 DCHECK(state_ != CREATED); 945 #endif 946 947 return current_thread_data_; 948 } 949 950 //------------------------------------------------------------------------------ 951 // DeathDataPhaseSnapshot 952 953 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot( 954 int profiling_phase, 955 int count, 956 int32_t run_duration_sum, 957 int32_t run_duration_max, 958 int32_t run_duration_sample, 959 int32_t queue_duration_sum, 960 int32_t queue_duration_max, 961 int32_t queue_duration_sample, 962 const DeathDataPhaseSnapshot* prev) 963 : profiling_phase(profiling_phase), 964 death_data(count, 965 run_duration_sum, 966 run_duration_max, 967 run_duration_sample, 968 queue_duration_sum, 969 queue_duration_max, 970 queue_duration_sample), 971 prev(prev) {} 972 973 //------------------------------------------------------------------------------ 974 // TaskSnapshot 975 976 TaskSnapshot::TaskSnapshot() { 977 } 978 979 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth, 980 const DeathDataSnapshot& death_data, 981 const std::string& death_thread_name) 982 : birth(birth), 983 death_data(death_data), 984 death_thread_name(death_thread_name) { 985 } 986 987 TaskSnapshot::~TaskSnapshot() { 988 } 989 990 //------------------------------------------------------------------------------ 991 // ProcessDataPhaseSnapshot 992 993 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() { 994 } 995 996 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() { 997 } 998 999 //------------------------------------------------------------------------------ 1000 // ProcessDataPhaseSnapshot 1001 1002 ProcessDataSnapshot::ProcessDataSnapshot() 1003 #if !defined(OS_NACL) 1004 : process_id(base::GetCurrentProcId()) { 1005 #else 1006 : process_id(base::kNullProcessId) { 1007 #endif 1008 } 1009 1010 ProcessDataSnapshot::~ProcessDataSnapshot() { 1011 } 1012 1013 } // namespace tracked_objects 1014