1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/tracked_objects.h" 6 7 #include <ctype.h> 8 #include <limits.h> 9 #include <stdlib.h> 10 11 #include "base/atomicops.h" 12 #include "base/base_switches.h" 13 #include "base/command_line.h" 14 #include "base/compiler_specific.h" 15 #include "base/debug/leak_annotations.h" 16 #include "base/logging.h" 17 #include "base/metrics/histogram_macros.h" 18 #include "base/numerics/safe_conversions.h" 19 #include "base/numerics/safe_math.h" 20 #include "base/process/process_handle.h" 21 #include "base/third_party/valgrind/memcheck.h" 22 #include "base/threading/worker_pool.h" 23 #include "base/tracking_info.h" 24 #include "build/build_config.h" 25 26 using base::TimeDelta; 27 28 namespace base { 29 class TimeDelta; 30 } 31 32 namespace tracked_objects { 33 34 namespace { 35 36 constexpr char kWorkerThreadSanitizedName[] = "WorkerThread-*"; 37 38 // When ThreadData is first initialized, should we start in an ACTIVE state to 39 // record all of the startup-time tasks, or should we start up DEACTIVATED, so 40 // that we only record after parsing the command line flag --enable-tracking. 41 // Note that the flag may force either state, so this really controls only the 42 // period of time up until that flag is parsed. If there is no flag seen, then 43 // this state may prevail for much or all of the process lifetime. 44 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE; 45 46 // Possible states of the profiler timing enabledness. 47 enum { 48 UNDEFINED_TIMING, 49 ENABLED_TIMING, 50 DISABLED_TIMING, 51 }; 52 53 // State of the profiler timing enabledness. 54 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING; 55 56 // Returns whether profiler timing is enabled. The default is true, but this 57 // may be overridden by a command-line flag. Some platforms may 58 // programmatically set this command-line flag to the "off" value if it's not 59 // specified. 60 // This in turn can be overridden by explicitly calling 61 // ThreadData::EnableProfilerTiming, say, based on a field trial. 62 inline bool IsProfilerTimingEnabled() { 63 // Reading |g_profiler_timing_enabled| is done without barrier because 64 // multiple initialization is not an issue while the barrier can be relatively 65 // costly given that this method is sometimes called in a tight loop. 66 base::subtle::Atomic32 current_timing_enabled = 67 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled); 68 if (current_timing_enabled == UNDEFINED_TIMING) { 69 if (!base::CommandLine::InitializedForCurrentProcess()) 70 return true; 71 current_timing_enabled = 72 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( 73 switches::kProfilerTiming) == 74 switches::kProfilerTimingDisabledValue) 75 ? DISABLED_TIMING 76 : ENABLED_TIMING; 77 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, 78 current_timing_enabled); 79 } 80 return current_timing_enabled == ENABLED_TIMING; 81 } 82 83 // Sanitize a thread name by replacing trailing sequence of digits with "*". 84 // Examples: 85 // 1. "BrowserBlockingWorker1/23857" => "BrowserBlockingWorker1/*" 86 // 2. "Chrome_IOThread" => "Chrome_IOThread" 87 std::string SanitizeThreadName(const std::string& thread_name) { 88 size_t i = thread_name.length(); 89 90 while (i > 0 && isdigit(thread_name[i - 1])) 91 --i; 92 93 if (i == thread_name.length()) 94 return thread_name; 95 96 return thread_name.substr(0, i) + '*'; 97 } 98 99 } // namespace 100 101 //------------------------------------------------------------------------------ 102 // DeathData tallies durations when a death takes place. 103 104 DeathData::DeathData() 105 : count_(0), 106 sample_probability_count_(0), 107 run_duration_sum_(0), 108 queue_duration_sum_(0), 109 run_duration_max_(0), 110 queue_duration_max_(0), 111 alloc_ops_(0), 112 free_ops_(0), 113 allocated_bytes_(0), 114 freed_bytes_(0), 115 alloc_overhead_bytes_(0), 116 max_allocated_bytes_(0), 117 run_duration_sample_(0), 118 queue_duration_sample_(0), 119 last_phase_snapshot_(nullptr) {} 120 121 DeathData::DeathData(const DeathData& other) 122 : count_(other.count_), 123 sample_probability_count_(other.sample_probability_count_), 124 run_duration_sum_(other.run_duration_sum_), 125 queue_duration_sum_(other.queue_duration_sum_), 126 run_duration_max_(other.run_duration_max_), 127 queue_duration_max_(other.queue_duration_max_), 128 alloc_ops_(other.alloc_ops_), 129 free_ops_(other.free_ops_), 130 allocated_bytes_(other.allocated_bytes_), 131 freed_bytes_(other.freed_bytes_), 132 alloc_overhead_bytes_(other.alloc_overhead_bytes_), 133 max_allocated_bytes_(other.max_allocated_bytes_), 134 run_duration_sample_(other.run_duration_sample_), 135 queue_duration_sample_(other.queue_duration_sample_), 136 last_phase_snapshot_(nullptr) { 137 // This constructor will be used by std::map when adding new DeathData values 138 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't 139 // need to worry about ownership transfer. 140 DCHECK(other.last_phase_snapshot_ == nullptr); 141 } 142 143 DeathData::~DeathData() { 144 while (last_phase_snapshot_) { 145 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_; 146 last_phase_snapshot_ = snapshot->prev; 147 delete snapshot; 148 } 149 } 150 151 // TODO(jar): I need to see if this macro to optimize branching is worth using. 152 // 153 // This macro has no branching, so it is surely fast, and is equivalent to: 154 // if (assign_it) 155 // target = source; 156 // We use a macro rather than a template to force this to inline. 157 // Related code for calculating max is discussed on the web. 158 #define CONDITIONAL_ASSIGN(assign_it, target, source) \ 159 ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it)) 160 161 void DeathData::RecordDurations(const int32_t queue_duration, 162 const int32_t run_duration, 163 const uint32_t random_number) { 164 // We'll just clamp at INT_MAX, but we should note this in the UI as such. 165 if (count_ < INT_MAX) 166 base::subtle::NoBarrier_Store(&count_, count_ + 1); 167 168 int sample_probability_count = 169 base::subtle::NoBarrier_Load(&sample_probability_count_); 170 if (sample_probability_count < INT_MAX) 171 ++sample_probability_count; 172 base::subtle::NoBarrier_Store(&sample_probability_count_, 173 sample_probability_count); 174 175 base::subtle::NoBarrier_Store(&queue_duration_sum_, 176 queue_duration_sum_ + queue_duration); 177 base::subtle::NoBarrier_Store(&run_duration_sum_, 178 run_duration_sum_ + run_duration); 179 180 if (queue_duration_max() < queue_duration) 181 base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration); 182 if (run_duration_max() < run_duration) 183 base::subtle::NoBarrier_Store(&run_duration_max_, run_duration); 184 185 // Take a uniformly distributed sample over all durations ever supplied during 186 // the current profiling phase. 187 // The probability that we (instead) use this new sample is 188 // 1/sample_probability_count_. This results in a completely uniform selection 189 // of the sample (at least when we don't clamp sample_probability_count_... 190 // but that should be inconsequentially likely). We ignore the fact that we 191 // correlated our selection of a sample to the run and queue times (i.e., we 192 // used them to generate random_number). 193 CHECK_GT(sample_probability_count, 0); 194 if (0 == (random_number % sample_probability_count)) { 195 base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration); 196 base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration); 197 } 198 } 199 200 void DeathData::RecordAllocations(const uint32_t alloc_ops, 201 const uint32_t free_ops, 202 const uint32_t allocated_bytes, 203 const uint32_t freed_bytes, 204 const uint32_t alloc_overhead_bytes, 205 const uint32_t max_allocated_bytes) { 206 // Use saturating arithmetic. 207 SaturatingMemberAdd(alloc_ops, &alloc_ops_); 208 SaturatingMemberAdd(free_ops, &free_ops_); 209 SaturatingMemberAdd(allocated_bytes, &allocated_bytes_); 210 SaturatingMemberAdd(freed_bytes, &freed_bytes_); 211 SaturatingMemberAdd(alloc_overhead_bytes, &alloc_overhead_bytes_); 212 213 int32_t max = base::saturated_cast<int32_t>(max_allocated_bytes); 214 if (max > max_allocated_bytes_) 215 base::subtle::NoBarrier_Store(&max_allocated_bytes_, max); 216 } 217 218 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) { 219 // Snapshotting and storing current state. 220 last_phase_snapshot_ = 221 new DeathDataPhaseSnapshot(profiling_phase, *this, last_phase_snapshot_); 222 223 // Not touching fields for which a delta can be computed by comparing with a 224 // snapshot from the previous phase. Resetting other fields. Sample values 225 // will be reset upon next death recording because sample_probability_count_ 226 // is set to 0. 227 // We avoid resetting to 0 in favor of deltas whenever possible. The reason 228 // is that for incrementable fields, resetting to 0 from the snapshot thread 229 // potentially in parallel with incrementing in the death thread may result in 230 // significant data corruption that has a potential to grow with time. Not 231 // resetting incrementable fields and using deltas will cause any 232 // off-by-little corruptions to be likely fixed at the next snapshot. 233 // The max values are not incrementable, and cannot be deduced using deltas 234 // for a given phase. Hence, we have to reset them to 0. But the potential 235 // damage is limited to getting the previous phase's max to apply for the next 236 // phase, and the error doesn't have a potential to keep growing with new 237 // resets. 238 // sample_probability_count_ is incrementable, but must be reset to 0 at the 239 // phase end, so that we start a new uniformly randomized sample selection 240 // after the reset. These fields are updated using atomics. However, race 241 // conditions are possible since these are updated individually and not 242 // together atomically, resulting in the values being mutually inconsistent. 243 // The damage is limited to selecting a wrong sample, which is not something 244 // that can cause accumulating or cascading effects. 245 // If there were no inconsistencies caused by race conditions, we never send a 246 // sample for the previous phase in the next phase's snapshot because 247 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count. 248 base::subtle::NoBarrier_Store(&sample_probability_count_, 0); 249 base::subtle::NoBarrier_Store(&run_duration_max_, 0); 250 base::subtle::NoBarrier_Store(&queue_duration_max_, 0); 251 } 252 253 void DeathData::SaturatingMemberAdd(const uint32_t addend, 254 base::subtle::Atomic32* sum) { 255 // Bail quick if no work or already saturated. 256 if (addend == 0U || *sum == INT_MAX) 257 return; 258 259 base::CheckedNumeric<int32_t> new_sum = *sum; 260 new_sum += addend; 261 base::subtle::NoBarrier_Store(sum, new_sum.ValueOrDefault(INT_MAX)); 262 } 263 264 //------------------------------------------------------------------------------ 265 DeathDataSnapshot::DeathDataSnapshot() 266 : count(-1), 267 run_duration_sum(-1), 268 run_duration_max(-1), 269 run_duration_sample(-1), 270 queue_duration_sum(-1), 271 queue_duration_max(-1), 272 queue_duration_sample(-1), 273 alloc_ops(-1), 274 free_ops(-1), 275 allocated_bytes(-1), 276 freed_bytes(-1), 277 alloc_overhead_bytes(-1), 278 max_allocated_bytes(-1) {} 279 280 DeathDataSnapshot::DeathDataSnapshot(int count, 281 int32_t run_duration_sum, 282 int32_t run_duration_max, 283 int32_t run_duration_sample, 284 int32_t queue_duration_sum, 285 int32_t queue_duration_max, 286 int32_t queue_duration_sample, 287 int32_t alloc_ops, 288 int32_t free_ops, 289 int32_t allocated_bytes, 290 int32_t freed_bytes, 291 int32_t alloc_overhead_bytes, 292 int32_t max_allocated_bytes) 293 : count(count), 294 run_duration_sum(run_duration_sum), 295 run_duration_max(run_duration_max), 296 run_duration_sample(run_duration_sample), 297 queue_duration_sum(queue_duration_sum), 298 queue_duration_max(queue_duration_max), 299 queue_duration_sample(queue_duration_sample), 300 alloc_ops(alloc_ops), 301 free_ops(free_ops), 302 allocated_bytes(allocated_bytes), 303 freed_bytes(freed_bytes), 304 alloc_overhead_bytes(alloc_overhead_bytes), 305 max_allocated_bytes(max_allocated_bytes) {} 306 307 DeathDataSnapshot::DeathDataSnapshot(const DeathData& death_data) 308 : count(death_data.count()), 309 run_duration_sum(death_data.run_duration_sum()), 310 run_duration_max(death_data.run_duration_max()), 311 run_duration_sample(death_data.run_duration_sample()), 312 queue_duration_sum(death_data.queue_duration_sum()), 313 queue_duration_max(death_data.queue_duration_max()), 314 queue_duration_sample(death_data.queue_duration_sample()), 315 alloc_ops(death_data.alloc_ops()), 316 free_ops(death_data.free_ops()), 317 allocated_bytes(death_data.allocated_bytes()), 318 freed_bytes(death_data.freed_bytes()), 319 alloc_overhead_bytes(death_data.alloc_overhead_bytes()), 320 max_allocated_bytes(death_data.max_allocated_bytes()) {} 321 322 DeathDataSnapshot::DeathDataSnapshot(const DeathDataSnapshot& death_data) = 323 default; 324 325 DeathDataSnapshot::~DeathDataSnapshot() { 326 } 327 328 DeathDataSnapshot DeathDataSnapshot::Delta( 329 const DeathDataSnapshot& older) const { 330 return DeathDataSnapshot( 331 count - older.count, run_duration_sum - older.run_duration_sum, 332 run_duration_max, run_duration_sample, 333 queue_duration_sum - older.queue_duration_sum, queue_duration_max, 334 queue_duration_sample, alloc_ops - older.alloc_ops, 335 free_ops - older.free_ops, allocated_bytes - older.allocated_bytes, 336 freed_bytes - older.freed_bytes, 337 alloc_overhead_bytes - older.alloc_overhead_bytes, max_allocated_bytes); 338 } 339 340 //------------------------------------------------------------------------------ 341 BirthOnThread::BirthOnThread(const Location& location, 342 const ThreadData& current) 343 : location_(location), 344 birth_thread_(¤t) { 345 } 346 347 //------------------------------------------------------------------------------ 348 BirthOnThreadSnapshot::BirthOnThreadSnapshot() { 349 } 350 351 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth) 352 : location(birth.location()), 353 sanitized_thread_name(birth.birth_thread()->sanitized_thread_name()) {} 354 355 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() { 356 } 357 358 //------------------------------------------------------------------------------ 359 Births::Births(const Location& location, const ThreadData& current) 360 : BirthOnThread(location, current), 361 birth_count_(1) { } 362 363 int Births::birth_count() const { return birth_count_; } 364 365 void Births::RecordBirth() { ++birth_count_; } 366 367 //------------------------------------------------------------------------------ 368 // ThreadData maintains the central data for all births and deaths on a single 369 // thread. 370 371 // TODO(jar): We should pull all these static vars together, into a struct, and 372 // optimize layout so that we benefit from locality of reference during accesses 373 // to them. 374 375 // static 376 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL; 377 378 // A TLS slot which points to the ThreadData instance for the current thread. 379 // We do a fake initialization here (zeroing out data), and then the real 380 // in-place construction happens when we call tls_index_.Initialize(). 381 // static 382 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER; 383 384 // static 385 int ThreadData::cleanup_count_ = 0; 386 387 // static 388 int ThreadData::incarnation_counter_ = 0; 389 390 // static 391 ThreadData* ThreadData::all_thread_data_list_head_ = NULL; 392 393 // static 394 ThreadData* ThreadData::first_retired_thread_data_ = NULL; 395 396 // static 397 base::LazyInstance<base::Lock>::Leaky 398 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER; 399 400 // static 401 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED; 402 403 ThreadData::ThreadData(const std::string& sanitized_thread_name) 404 : next_(NULL), 405 next_retired_thread_data_(NULL), 406 sanitized_thread_name_(sanitized_thread_name), 407 incarnation_count_for_pool_(-1), 408 current_stopwatch_(NULL) { 409 DCHECK(sanitized_thread_name_.empty() || 410 !isdigit(sanitized_thread_name_.back())); 411 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. 412 } 413 414 ThreadData::~ThreadData() { 415 } 416 417 void ThreadData::PushToHeadOfList() { 418 // Toss in a hint of randomness (atop the uniniitalized value). 419 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_, 420 sizeof(random_number_)); 421 MSAN_UNPOISON(&random_number_, sizeof(random_number_)); 422 random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0)); 423 random_number_ ^= (Now() - TrackedTime()).InMilliseconds(); 424 425 DCHECK(!next_); 426 base::AutoLock lock(*list_lock_.Pointer()); 427 incarnation_count_for_pool_ = incarnation_counter_; 428 next_ = all_thread_data_list_head_; 429 all_thread_data_list_head_ = this; 430 } 431 432 // static 433 ThreadData* ThreadData::first() { 434 base::AutoLock lock(*list_lock_.Pointer()); 435 return all_thread_data_list_head_; 436 } 437 438 ThreadData* ThreadData::next() const { return next_; } 439 440 // static 441 void ThreadData::InitializeThreadContext(const std::string& thread_name) { 442 if (base::WorkerPool::RunsTasksOnCurrentThread()) 443 return; 444 DCHECK_NE(thread_name, kWorkerThreadSanitizedName); 445 EnsureTlsInitialization(); 446 ThreadData* current_thread_data = 447 reinterpret_cast<ThreadData*>(tls_index_.Get()); 448 if (current_thread_data) 449 return; // Browser tests instigate this. 450 current_thread_data = 451 GetRetiredOrCreateThreadData(SanitizeThreadName(thread_name)); 452 tls_index_.Set(current_thread_data); 453 } 454 455 // static 456 ThreadData* ThreadData::Get() { 457 if (!tls_index_.initialized()) 458 return NULL; // For unittests only. 459 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get()); 460 if (registered) 461 return registered; 462 463 // We must be a worker thread, since we didn't pre-register. 464 ThreadData* worker_thread_data = 465 GetRetiredOrCreateThreadData(kWorkerThreadSanitizedName); 466 tls_index_.Set(worker_thread_data); 467 return worker_thread_data; 468 } 469 470 // static 471 void ThreadData::OnThreadTermination(void* thread_data) { 472 DCHECK(thread_data); // TLS should *never* call us with a NULL. 473 // We must NOT do any allocations during this callback. There is a chance 474 // that the allocator is no longer active on this thread. 475 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup(); 476 } 477 478 void ThreadData::OnThreadTerminationCleanup() { 479 // We must NOT do any allocations during this callback. There is a chance that 480 // the allocator is no longer active on this thread. 481 482 // The list_lock_ was created when we registered the callback, so it won't be 483 // allocated here despite the lazy reference. 484 base::AutoLock lock(*list_lock_.Pointer()); 485 if (incarnation_counter_ != incarnation_count_for_pool_) 486 return; // ThreadData was constructed in an earlier unit test. 487 ++cleanup_count_; 488 489 // Add this ThreadData to a retired list so that it can be reused by a thread 490 // with the same name sanitized name in the future. 491 // |next_retired_thread_data_| is expected to be nullptr for a ThreadData 492 // associated with an active thread. 493 DCHECK(!next_retired_thread_data_); 494 next_retired_thread_data_ = first_retired_thread_data_; 495 first_retired_thread_data_ = this; 496 } 497 498 // static 499 void ThreadData::Snapshot(int current_profiling_phase, 500 ProcessDataSnapshot* process_data_snapshot) { 501 // Get an unchanging copy of a ThreadData list. 502 ThreadData* my_list = ThreadData::first(); 503 504 // Gather data serially. 505 // This hackish approach *can* get some slightly corrupt tallies, as we are 506 // grabbing values without the protection of a lock, but it has the advantage 507 // of working even with threads that don't have message loops. If a user 508 // sees any strangeness, they can always just run their stats gathering a 509 // second time. 510 BirthCountMap birth_counts; 511 for (ThreadData* thread_data = my_list; thread_data; 512 thread_data = thread_data->next()) { 513 thread_data->SnapshotExecutedTasks(current_profiling_phase, 514 &process_data_snapshot->phased_snapshots, 515 &birth_counts); 516 } 517 518 // Add births that are still active -- i.e. objects that have tallied a birth, 519 // but have not yet tallied a matching death, and hence must be either 520 // running, queued up, or being held in limbo for future posting. 521 auto* current_phase_tasks = 522 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks; 523 for (const auto& birth_count : birth_counts) { 524 if (birth_count.second > 0) { 525 current_phase_tasks->push_back( 526 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first), 527 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0, 528 0, 0, 0, 0, 0, 0), 529 "Still_Alive")); 530 } 531 } 532 } 533 534 // static 535 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) { 536 // Get an unchanging copy of a ThreadData list. 537 ThreadData* my_list = ThreadData::first(); 538 539 // Add snapshots for all instances of death data in all threads serially. 540 // This hackish approach *can* get some slightly corrupt tallies, as we are 541 // grabbing values without the protection of a lock, but it has the advantage 542 // of working even with threads that don't have message loops. Any corruption 543 // shouldn't cause "cascading damage" to anything else (in later phases). 544 for (ThreadData* thread_data = my_list; thread_data; 545 thread_data = thread_data->next()) { 546 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase); 547 } 548 } 549 550 Births* ThreadData::TallyABirth(const Location& location) { 551 BirthMap::iterator it = birth_map_.find(location); 552 Births* child; 553 if (it != birth_map_.end()) { 554 child = it->second; 555 child->RecordBirth(); 556 } else { 557 child = new Births(location, *this); // Leak this. 558 // Lock since the map may get relocated now, and other threads sometimes 559 // snapshot it (but they lock before copying it). 560 base::AutoLock lock(map_lock_); 561 birth_map_[location] = child; 562 } 563 564 return child; 565 } 566 567 void ThreadData::TallyADeath(const Births& births, 568 int32_t queue_duration, 569 const TaskStopwatch& stopwatch) { 570 int32_t run_duration = stopwatch.RunDurationMs(); 571 572 // Stir in some randomness, plus add constant in case durations are zero. 573 const uint32_t kSomePrimeNumber = 2147483647; 574 random_number_ += queue_duration + run_duration + kSomePrimeNumber; 575 // An address is going to have some randomness to it as well ;-). 576 random_number_ ^= 577 static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0)); 578 579 DeathMap::iterator it = death_map_.find(&births); 580 DeathData* death_data; 581 if (it != death_map_.end()) { 582 death_data = &it->second; 583 } else { 584 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now. 585 death_data = &death_map_[&births]; 586 } // Release lock ASAP. 587 death_data->RecordDurations(queue_duration, run_duration, random_number_); 588 589 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER) 590 if (stopwatch.heap_tracking_enabled()) { 591 base::debug::ThreadHeapUsage heap_usage = stopwatch.heap_usage().usage(); 592 // Saturate the 64 bit counts on conversion to 32 bit storage. 593 death_data->RecordAllocations( 594 base::saturated_cast<int32_t>(heap_usage.alloc_ops), 595 base::saturated_cast<int32_t>(heap_usage.free_ops), 596 base::saturated_cast<int32_t>(heap_usage.alloc_bytes), 597 base::saturated_cast<int32_t>(heap_usage.free_bytes), 598 base::saturated_cast<int32_t>(heap_usage.alloc_overhead_bytes), 599 base::saturated_cast<int32_t>(heap_usage.max_allocated_bytes)); 600 } 601 #endif 602 } 603 604 // static 605 Births* ThreadData::TallyABirthIfActive(const Location& location) { 606 if (!TrackingStatus()) 607 return NULL; 608 ThreadData* current_thread_data = Get(); 609 if (!current_thread_data) 610 return NULL; 611 return current_thread_data->TallyABirth(location); 612 } 613 614 // static 615 void ThreadData::TallyRunOnNamedThreadIfTracking( 616 const base::TrackingInfo& completed_task, 617 const TaskStopwatch& stopwatch) { 618 // Even if we have been DEACTIVATED, we will process any pending births so 619 // that our data structures (which counted the outstanding births) remain 620 // consistent. 621 const Births* births = completed_task.birth_tally; 622 if (!births) 623 return; 624 ThreadData* current_thread_data = stopwatch.GetThreadData(); 625 if (!current_thread_data) 626 return; 627 628 // Watch out for a race where status_ is changing, and hence one or both 629 // of start_of_run or end_of_run is zero. In that case, we didn't bother to 630 // get a time value since we "weren't tracking" and we were trying to be 631 // efficient by not calling for a genuine time value. For simplicity, we'll 632 // use a default zero duration when we can't calculate a true value. 633 TrackedTime start_of_run = stopwatch.StartTime(); 634 int32_t queue_duration = 0; 635 if (!start_of_run.is_null()) { 636 queue_duration = (start_of_run - completed_task.EffectiveTimePosted()) 637 .InMilliseconds(); 638 } 639 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 640 } 641 642 // static 643 void ThreadData::TallyRunOnWorkerThreadIfTracking( 644 const Births* births, 645 const TrackedTime& time_posted, 646 const TaskStopwatch& stopwatch) { 647 // Even if we have been DEACTIVATED, we will process any pending births so 648 // that our data structures (which counted the outstanding births) remain 649 // consistent. 650 if (!births) 651 return; 652 653 // TODO(jar): Support the option to coalesce all worker-thread activity under 654 // one ThreadData instance that uses locks to protect *all* access. This will 655 // reduce memory (making it provably bounded), but run incrementally slower 656 // (since we'll use locks on TallyABirth and TallyADeath). The good news is 657 // that the locks on TallyADeath will be *after* the worker thread has run, 658 // and hence nothing will be waiting for the completion (... besides some 659 // other thread that might like to run). Also, the worker threads tasks are 660 // generally longer, and hence the cost of the lock may perchance be amortized 661 // over the long task's lifetime. 662 ThreadData* current_thread_data = stopwatch.GetThreadData(); 663 if (!current_thread_data) 664 return; 665 666 TrackedTime start_of_run = stopwatch.StartTime(); 667 int32_t queue_duration = 0; 668 if (!start_of_run.is_null()) { 669 queue_duration = (start_of_run - time_posted).InMilliseconds(); 670 } 671 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 672 } 673 674 // static 675 void ThreadData::TallyRunInAScopedRegionIfTracking( 676 const Births* births, 677 const TaskStopwatch& stopwatch) { 678 // Even if we have been DEACTIVATED, we will process any pending births so 679 // that our data structures (which counted the outstanding births) remain 680 // consistent. 681 if (!births) 682 return; 683 684 ThreadData* current_thread_data = stopwatch.GetThreadData(); 685 if (!current_thread_data) 686 return; 687 688 int32_t queue_duration = 0; 689 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); 690 } 691 692 void ThreadData::SnapshotExecutedTasks( 693 int current_profiling_phase, 694 PhasedProcessDataSnapshotMap* phased_snapshots, 695 BirthCountMap* birth_counts) { 696 // Get copy of data, so that the data will not change during the iterations 697 // and processing. 698 BirthMap birth_map; 699 DeathsSnapshot deaths; 700 SnapshotMaps(current_profiling_phase, &birth_map, &deaths); 701 702 for (const auto& birth : birth_map) { 703 (*birth_counts)[birth.second] += birth.second->birth_count(); 704 } 705 706 for (const auto& death : deaths) { 707 (*birth_counts)[death.first] -= death.first->birth_count(); 708 709 // For the current death data, walk through all its snapshots, starting from 710 // the current one, then from the previous profiling phase etc., and for 711 // each snapshot calculate the delta between the snapshot and the previous 712 // phase, if any. Store the deltas in the result. 713 for (const DeathDataPhaseSnapshot* phase = &death.second; phase; 714 phase = phase->prev) { 715 const DeathDataSnapshot& death_data = 716 phase->prev ? phase->death_data.Delta(phase->prev->death_data) 717 : phase->death_data; 718 719 if (death_data.count > 0) { 720 (*phased_snapshots)[phase->profiling_phase].tasks.push_back( 721 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data, 722 sanitized_thread_name())); 723 } 724 } 725 } 726 } 727 728 // This may be called from another thread. 729 void ThreadData::SnapshotMaps(int profiling_phase, 730 BirthMap* birth_map, 731 DeathsSnapshot* deaths) { 732 base::AutoLock lock(map_lock_); 733 734 for (const auto& birth : birth_map_) 735 (*birth_map)[birth.first] = birth.second; 736 737 for (const auto& death : death_map_) { 738 deaths->push_back(std::make_pair( 739 death.first, 740 DeathDataPhaseSnapshot(profiling_phase, death.second, 741 death.second.last_phase_snapshot()))); 742 } 743 } 744 745 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) { 746 base::AutoLock lock(map_lock_); 747 748 for (auto& death : death_map_) { 749 death.second.OnProfilingPhaseCompleted(profiling_phase); 750 } 751 } 752 753 void ThreadData::EnsureTlsInitialization() { 754 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) 755 return; // Someone else did the initialization. 756 // Due to racy lazy initialization in tests, we'll need to recheck status_ 757 // after we acquire the lock. 758 759 // Ensure that we don't double initialize tls. We are called when single 760 // threaded in the product, but some tests may be racy and lazy about our 761 // initialization. 762 base::AutoLock lock(*list_lock_.Pointer()); 763 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) 764 return; // Someone raced in here and beat us. 765 766 // Perform the "real" TLS initialization now, and leave it intact through 767 // process termination. 768 if (!tls_index_.initialized()) { // Testing may have initialized this. 769 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED); 770 tls_index_.Initialize(&ThreadData::OnThreadTermination); 771 DCHECK(tls_index_.initialized()); 772 } else { 773 // TLS was initialzed for us earlier. 774 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS); 775 } 776 777 // Incarnation counter is only significant to testing, as it otherwise will 778 // never again change in this process. 779 ++incarnation_counter_; 780 781 // The lock is not critical for setting status_, but it doesn't hurt. It also 782 // ensures that if we have a racy initialization, that we'll bail as soon as 783 // we get the lock earlier in this method. 784 base::subtle::Release_Store(&status_, kInitialStartupState); 785 DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED); 786 } 787 788 // static 789 void ThreadData::InitializeAndSetTrackingStatus(Status status) { 790 DCHECK_GE(status, DEACTIVATED); 791 DCHECK_LE(status, PROFILING_ACTIVE); 792 793 EnsureTlsInitialization(); // No-op if already initialized. 794 795 if (status > DEACTIVATED) 796 status = PROFILING_ACTIVE; 797 798 base::subtle::Release_Store(&status_, status); 799 } 800 801 // static 802 ThreadData::Status ThreadData::status() { 803 return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_)); 804 } 805 806 // static 807 bool ThreadData::TrackingStatus() { 808 return base::subtle::Acquire_Load(&status_) > DEACTIVATED; 809 } 810 811 // static 812 void ThreadData::EnableProfilerTiming() { 813 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING); 814 } 815 816 // static 817 TrackedTime ThreadData::Now() { 818 if (now_function_for_testing_) 819 return TrackedTime::FromMilliseconds((*now_function_for_testing_)()); 820 if (IsProfilerTimingEnabled() && TrackingStatus()) 821 return TrackedTime::Now(); 822 return TrackedTime(); // Super fast when disabled, or not compiled. 823 } 824 825 // static 826 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) { 827 base::AutoLock lock(*list_lock_.Pointer()); 828 829 // TODO(jar): until this is working on XP, don't run the real test. 830 #if 0 831 // Verify that we've at least shutdown/cleanup the major namesd threads. The 832 // caller should tell us how many thread shutdowns should have taken place by 833 // now. 834 CHECK_GT(cleanup_count_, major_threads_shutdown_count); 835 #endif 836 } 837 838 // static 839 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { 840 // This is only called from test code, where we need to cleanup so that 841 // additional tests can be run. 842 // We must be single threaded... but be careful anyway. 843 InitializeAndSetTrackingStatus(DEACTIVATED); 844 845 ThreadData* thread_data_list; 846 { 847 base::AutoLock lock(*list_lock_.Pointer()); 848 thread_data_list = all_thread_data_list_head_; 849 all_thread_data_list_head_ = NULL; 850 ++incarnation_counter_; 851 // To be clean, break apart the retired worker list (though we leak them). 852 while (first_retired_thread_data_) { 853 ThreadData* thread_data = first_retired_thread_data_; 854 first_retired_thread_data_ = thread_data->next_retired_thread_data_; 855 thread_data->next_retired_thread_data_ = nullptr; 856 } 857 } 858 859 // Put most global static back in pristine shape. 860 cleanup_count_ = 0; 861 tls_index_.Set(NULL); 862 // Almost UNINITIALIZED. 863 base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS); 864 865 // To avoid any chance of racing in unit tests, which is the only place we 866 // call this function, we may sometimes leak all the data structures we 867 // recovered, as they may still be in use on threads from prior tests! 868 if (leak) { 869 ThreadData* thread_data = thread_data_list; 870 while (thread_data) { 871 ANNOTATE_LEAKING_OBJECT_PTR(thread_data); 872 thread_data = thread_data->next(); 873 } 874 return; 875 } 876 877 // When we want to cleanup (on a single thread), here is what we do. 878 879 // Do actual recursive delete in all ThreadData instances. 880 while (thread_data_list) { 881 ThreadData* next_thread_data = thread_data_list; 882 thread_data_list = thread_data_list->next(); 883 884 for (BirthMap::iterator it = next_thread_data->birth_map_.begin(); 885 next_thread_data->birth_map_.end() != it; ++it) 886 delete it->second; // Delete the Birth Records. 887 delete next_thread_data; // Includes all Death Records. 888 } 889 } 890 891 // static 892 ThreadData* ThreadData::GetRetiredOrCreateThreadData( 893 const std::string& sanitized_thread_name) { 894 SCOPED_UMA_HISTOGRAM_TIMER("TrackedObjects.GetRetiredOrCreateThreadData"); 895 896 { 897 base::AutoLock lock(*list_lock_.Pointer()); 898 ThreadData** pcursor = &first_retired_thread_data_; 899 ThreadData* cursor = first_retired_thread_data_; 900 901 // Assuming that there aren't more than a few tens of retired ThreadData 902 // instances, this lookup should be quick compared to the thread creation 903 // time. Retired ThreadData instances cannot be stored in a map because 904 // insertions are done from OnThreadTerminationCleanup() where allocations 905 // are not allowed. 906 // 907 // Note: Test processes may have more than a few tens of retired ThreadData 908 // instances. 909 while (cursor) { 910 if (cursor->sanitized_thread_name() == sanitized_thread_name) { 911 DCHECK_EQ(*pcursor, cursor); 912 *pcursor = cursor->next_retired_thread_data_; 913 cursor->next_retired_thread_data_ = nullptr; 914 return cursor; 915 } 916 pcursor = &cursor->next_retired_thread_data_; 917 cursor = cursor->next_retired_thread_data_; 918 } 919 } 920 921 return new ThreadData(sanitized_thread_name); 922 } 923 924 //------------------------------------------------------------------------------ 925 TaskStopwatch::TaskStopwatch() 926 : wallclock_duration_ms_(0), 927 current_thread_data_(NULL), 928 excluded_duration_ms_(0), 929 parent_(NULL) { 930 #if DCHECK_IS_ON() 931 state_ = CREATED; 932 child_ = NULL; 933 #endif 934 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER) 935 heap_tracking_enabled_ = 936 base::debug::ThreadHeapUsageTracker::IsHeapTrackingEnabled(); 937 #endif 938 } 939 940 TaskStopwatch::~TaskStopwatch() { 941 #if DCHECK_IS_ON() 942 DCHECK(state_ != RUNNING); 943 DCHECK(child_ == NULL); 944 #endif 945 } 946 947 void TaskStopwatch::Start() { 948 #if DCHECK_IS_ON() 949 DCHECK(state_ == CREATED); 950 state_ = RUNNING; 951 #endif 952 953 start_time_ = ThreadData::Now(); 954 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER) 955 if (heap_tracking_enabled_) 956 heap_usage_.Start(); 957 #endif 958 959 current_thread_data_ = ThreadData::Get(); 960 if (!current_thread_data_) 961 return; 962 963 parent_ = current_thread_data_->current_stopwatch_; 964 #if DCHECK_IS_ON() 965 if (parent_) { 966 DCHECK(parent_->state_ == RUNNING); 967 DCHECK(parent_->child_ == NULL); 968 parent_->child_ = this; 969 } 970 #endif 971 current_thread_data_->current_stopwatch_ = this; 972 } 973 974 void TaskStopwatch::Stop() { 975 const TrackedTime end_time = ThreadData::Now(); 976 #if DCHECK_IS_ON() 977 DCHECK(state_ == RUNNING); 978 state_ = STOPPED; 979 DCHECK(child_ == NULL); 980 #endif 981 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER) 982 if (heap_tracking_enabled_) 983 heap_usage_.Stop(true); 984 #endif 985 986 if (!start_time_.is_null() && !end_time.is_null()) { 987 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds(); 988 } 989 990 if (!current_thread_data_) 991 return; 992 993 DCHECK(current_thread_data_->current_stopwatch_ == this); 994 current_thread_data_->current_stopwatch_ = parent_; 995 if (!parent_) 996 return; 997 998 #if DCHECK_IS_ON() 999 DCHECK(parent_->state_ == RUNNING); 1000 DCHECK(parent_->child_ == this); 1001 parent_->child_ = NULL; 1002 #endif 1003 parent_->excluded_duration_ms_ += wallclock_duration_ms_; 1004 parent_ = NULL; 1005 } 1006 1007 TrackedTime TaskStopwatch::StartTime() const { 1008 #if DCHECK_IS_ON() 1009 DCHECK(state_ != CREATED); 1010 #endif 1011 1012 return start_time_; 1013 } 1014 1015 int32_t TaskStopwatch::RunDurationMs() const { 1016 #if DCHECK_IS_ON() 1017 DCHECK(state_ == STOPPED); 1018 #endif 1019 1020 return wallclock_duration_ms_ - excluded_duration_ms_; 1021 } 1022 1023 ThreadData* TaskStopwatch::GetThreadData() const { 1024 #if DCHECK_IS_ON() 1025 DCHECK(state_ != CREATED); 1026 #endif 1027 1028 return current_thread_data_; 1029 } 1030 1031 //------------------------------------------------------------------------------ 1032 // DeathDataPhaseSnapshot 1033 1034 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot( 1035 int profiling_phase, 1036 const DeathData& death, 1037 const DeathDataPhaseSnapshot* prev) 1038 : profiling_phase(profiling_phase), death_data(death), prev(prev) {} 1039 1040 //------------------------------------------------------------------------------ 1041 // TaskSnapshot 1042 1043 TaskSnapshot::TaskSnapshot() { 1044 } 1045 1046 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth, 1047 const DeathDataSnapshot& death_data, 1048 const std::string& death_sanitized_thread_name) 1049 : birth(birth), 1050 death_data(death_data), 1051 death_sanitized_thread_name(death_sanitized_thread_name) {} 1052 1053 TaskSnapshot::~TaskSnapshot() { 1054 } 1055 1056 //------------------------------------------------------------------------------ 1057 // ProcessDataPhaseSnapshot 1058 1059 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() { 1060 } 1061 1062 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot( 1063 const ProcessDataPhaseSnapshot& other) = default; 1064 1065 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() { 1066 } 1067 1068 //------------------------------------------------------------------------------ 1069 // ProcessDataPhaseSnapshot 1070 1071 ProcessDataSnapshot::ProcessDataSnapshot() 1072 #if !defined(OS_NACL) 1073 : process_id(base::GetCurrentProcId()) { 1074 #else 1075 : process_id(base::kNullProcessId) { 1076 #endif 1077 } 1078 1079 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) = 1080 default; 1081 1082 ProcessDataSnapshot::~ProcessDataSnapshot() { 1083 } 1084 1085 } // namespace tracked_objects 1086