Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/tracked_objects.h"
      6 
      7 #include <ctype.h>
      8 #include <limits.h>
      9 #include <stdlib.h>
     10 
     11 #include "base/atomicops.h"
     12 #include "base/base_switches.h"
     13 #include "base/command_line.h"
     14 #include "base/compiler_specific.h"
     15 #include "base/debug/leak_annotations.h"
     16 #include "base/logging.h"
     17 #include "base/metrics/histogram_macros.h"
     18 #include "base/numerics/safe_conversions.h"
     19 #include "base/numerics/safe_math.h"
     20 #include "base/process/process_handle.h"
     21 #include "base/third_party/valgrind/memcheck.h"
     22 #include "base/threading/worker_pool.h"
     23 #include "base/tracking_info.h"
     24 #include "build/build_config.h"
     25 
     26 using base::TimeDelta;
     27 
     28 namespace base {
     29 class TimeDelta;
     30 }
     31 
     32 namespace tracked_objects {
     33 
     34 namespace {
     35 
     36 constexpr char kWorkerThreadSanitizedName[] = "WorkerThread-*";
     37 
     38 // When ThreadData is first initialized, should we start in an ACTIVE state to
     39 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
     40 // that we only record after parsing the command line flag --enable-tracking.
     41 // Note that the flag may force either state, so this really controls only the
     42 // period of time up until that flag is parsed.  If there is no flag seen, then
     43 // this state may prevail for much or all of the process lifetime.
     44 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
     45 
     46 // Possible states of the profiler timing enabledness.
     47 enum {
     48   UNDEFINED_TIMING,
     49   ENABLED_TIMING,
     50   DISABLED_TIMING,
     51 };
     52 
     53 // State of the profiler timing enabledness.
     54 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
     55 
     56 // Returns whether profiler timing is enabled.  The default is true, but this
     57 // may be overridden by a command-line flag.  Some platforms may
     58 // programmatically set this command-line flag to the "off" value if it's not
     59 // specified.
     60 // This in turn can be overridden by explicitly calling
     61 // ThreadData::EnableProfilerTiming, say, based on a field trial.
     62 inline bool IsProfilerTimingEnabled() {
     63   // Reading |g_profiler_timing_enabled| is done without barrier because
     64   // multiple initialization is not an issue while the barrier can be relatively
     65   // costly given that this method is sometimes called in a tight loop.
     66   base::subtle::Atomic32 current_timing_enabled =
     67       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
     68   if (current_timing_enabled == UNDEFINED_TIMING) {
     69     if (!base::CommandLine::InitializedForCurrentProcess())
     70       return true;
     71     current_timing_enabled =
     72         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
     73              switches::kProfilerTiming) ==
     74          switches::kProfilerTimingDisabledValue)
     75             ? DISABLED_TIMING
     76             : ENABLED_TIMING;
     77     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
     78                                   current_timing_enabled);
     79   }
     80   return current_timing_enabled == ENABLED_TIMING;
     81 }
     82 
     83 // Sanitize a thread name by replacing trailing sequence of digits with "*".
     84 // Examples:
     85 // 1. "BrowserBlockingWorker1/23857" => "BrowserBlockingWorker1/*"
     86 // 2. "Chrome_IOThread" => "Chrome_IOThread"
     87 std::string SanitizeThreadName(const std::string& thread_name) {
     88   size_t i = thread_name.length();
     89 
     90   while (i > 0 && isdigit(thread_name[i - 1]))
     91     --i;
     92 
     93   if (i == thread_name.length())
     94     return thread_name;
     95 
     96   return thread_name.substr(0, i) + '*';
     97 }
     98 
     99 }  // namespace
    100 
    101 //------------------------------------------------------------------------------
    102 // DeathData tallies durations when a death takes place.
    103 
    104 DeathData::DeathData()
    105     : count_(0),
    106       sample_probability_count_(0),
    107       run_duration_sum_(0),
    108       queue_duration_sum_(0),
    109       run_duration_max_(0),
    110       queue_duration_max_(0),
    111       alloc_ops_(0),
    112       free_ops_(0),
    113       allocated_bytes_(0),
    114       freed_bytes_(0),
    115       alloc_overhead_bytes_(0),
    116       max_allocated_bytes_(0),
    117       run_duration_sample_(0),
    118       queue_duration_sample_(0),
    119       last_phase_snapshot_(nullptr) {}
    120 
    121 DeathData::DeathData(const DeathData& other)
    122     : count_(other.count_),
    123       sample_probability_count_(other.sample_probability_count_),
    124       run_duration_sum_(other.run_duration_sum_),
    125       queue_duration_sum_(other.queue_duration_sum_),
    126       run_duration_max_(other.run_duration_max_),
    127       queue_duration_max_(other.queue_duration_max_),
    128       alloc_ops_(other.alloc_ops_),
    129       free_ops_(other.free_ops_),
    130       allocated_bytes_(other.allocated_bytes_),
    131       freed_bytes_(other.freed_bytes_),
    132       alloc_overhead_bytes_(other.alloc_overhead_bytes_),
    133       max_allocated_bytes_(other.max_allocated_bytes_),
    134       run_duration_sample_(other.run_duration_sample_),
    135       queue_duration_sample_(other.queue_duration_sample_),
    136       last_phase_snapshot_(nullptr) {
    137   // This constructor will be used by std::map when adding new DeathData values
    138   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
    139   // need to worry about ownership transfer.
    140   DCHECK(other.last_phase_snapshot_ == nullptr);
    141 }
    142 
    143 DeathData::~DeathData() {
    144   while (last_phase_snapshot_) {
    145     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
    146     last_phase_snapshot_ = snapshot->prev;
    147     delete snapshot;
    148   }
    149 }
    150 
    151 // TODO(jar): I need to see if this macro to optimize branching is worth using.
    152 //
    153 // This macro has no branching, so it is surely fast, and is equivalent to:
    154 //             if (assign_it)
    155 //               target = source;
    156 // We use a macro rather than a template to force this to inline.
    157 // Related code for calculating max is discussed on the web.
    158 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
    159   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
    160 
    161 void DeathData::RecordDurations(const int32_t queue_duration,
    162                                 const int32_t run_duration,
    163                                 const uint32_t random_number) {
    164   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
    165   if (count_ < INT_MAX)
    166     base::subtle::NoBarrier_Store(&count_, count_ + 1);
    167 
    168   int sample_probability_count =
    169       base::subtle::NoBarrier_Load(&sample_probability_count_);
    170   if (sample_probability_count < INT_MAX)
    171     ++sample_probability_count;
    172   base::subtle::NoBarrier_Store(&sample_probability_count_,
    173                                 sample_probability_count);
    174 
    175   base::subtle::NoBarrier_Store(&queue_duration_sum_,
    176                                 queue_duration_sum_ + queue_duration);
    177   base::subtle::NoBarrier_Store(&run_duration_sum_,
    178                                 run_duration_sum_ + run_duration);
    179 
    180   if (queue_duration_max() < queue_duration)
    181     base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
    182   if (run_duration_max() < run_duration)
    183     base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
    184 
    185   // Take a uniformly distributed sample over all durations ever supplied during
    186   // the current profiling phase.
    187   // The probability that we (instead) use this new sample is
    188   // 1/sample_probability_count_. This results in a completely uniform selection
    189   // of the sample (at least when we don't clamp sample_probability_count_...
    190   // but that should be inconsequentially likely).  We ignore the fact that we
    191   // correlated our selection of a sample to the run and queue times (i.e., we
    192   // used them to generate random_number).
    193   CHECK_GT(sample_probability_count, 0);
    194   if (0 == (random_number % sample_probability_count)) {
    195     base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
    196     base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
    197   }
    198 }
    199 
    200 void DeathData::RecordAllocations(const uint32_t alloc_ops,
    201                                   const uint32_t free_ops,
    202                                   const uint32_t allocated_bytes,
    203                                   const uint32_t freed_bytes,
    204                                   const uint32_t alloc_overhead_bytes,
    205                                   const uint32_t max_allocated_bytes) {
    206   // Use saturating arithmetic.
    207   SaturatingMemberAdd(alloc_ops, &alloc_ops_);
    208   SaturatingMemberAdd(free_ops, &free_ops_);
    209   SaturatingMemberAdd(allocated_bytes, &allocated_bytes_);
    210   SaturatingMemberAdd(freed_bytes, &freed_bytes_);
    211   SaturatingMemberAdd(alloc_overhead_bytes, &alloc_overhead_bytes_);
    212 
    213   int32_t max = base::saturated_cast<int32_t>(max_allocated_bytes);
    214   if (max > max_allocated_bytes_)
    215     base::subtle::NoBarrier_Store(&max_allocated_bytes_, max);
    216 }
    217 
    218 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
    219   // Snapshotting and storing current state.
    220   last_phase_snapshot_ =
    221       new DeathDataPhaseSnapshot(profiling_phase, *this, last_phase_snapshot_);
    222 
    223   // Not touching fields for which a delta can be computed by comparing with a
    224   // snapshot from the previous phase. Resetting other fields.  Sample values
    225   // will be reset upon next death recording because sample_probability_count_
    226   // is set to 0.
    227   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
    228   // is that for incrementable fields, resetting to 0 from the snapshot thread
    229   // potentially in parallel with incrementing in the death thread may result in
    230   // significant data corruption that has a potential to grow with time.  Not
    231   // resetting incrementable fields and using deltas will cause any
    232   // off-by-little corruptions to be likely fixed at the next snapshot.
    233   // The max values are not incrementable, and cannot be deduced using deltas
    234   // for a given phase. Hence, we have to reset them to 0.  But the potential
    235   // damage is limited to getting the previous phase's max to apply for the next
    236   // phase, and the error doesn't have a potential to keep growing with new
    237   // resets.
    238   // sample_probability_count_ is incrementable, but must be reset to 0 at the
    239   // phase end, so that we start a new uniformly randomized sample selection
    240   // after the reset. These fields are updated using atomics. However, race
    241   // conditions are possible since these are updated individually and not
    242   // together atomically, resulting in the values being mutually inconsistent.
    243   // The damage is limited to selecting a wrong sample, which is not something
    244   // that can cause accumulating or cascading effects.
    245   // If there were no inconsistencies caused by race conditions, we never send a
    246   // sample for the previous phase in the next phase's snapshot because
    247   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
    248   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
    249   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
    250   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
    251 }
    252 
    253 void DeathData::SaturatingMemberAdd(const uint32_t addend,
    254                                     base::subtle::Atomic32* sum) {
    255   // Bail quick if no work or already saturated.
    256   if (addend == 0U || *sum == INT_MAX)
    257     return;
    258 
    259   base::CheckedNumeric<int32_t> new_sum = *sum;
    260   new_sum += addend;
    261   base::subtle::NoBarrier_Store(sum, new_sum.ValueOrDefault(INT_MAX));
    262 }
    263 
    264 //------------------------------------------------------------------------------
    265 DeathDataSnapshot::DeathDataSnapshot()
    266     : count(-1),
    267       run_duration_sum(-1),
    268       run_duration_max(-1),
    269       run_duration_sample(-1),
    270       queue_duration_sum(-1),
    271       queue_duration_max(-1),
    272       queue_duration_sample(-1),
    273       alloc_ops(-1),
    274       free_ops(-1),
    275       allocated_bytes(-1),
    276       freed_bytes(-1),
    277       alloc_overhead_bytes(-1),
    278       max_allocated_bytes(-1) {}
    279 
    280 DeathDataSnapshot::DeathDataSnapshot(int count,
    281                                      int32_t run_duration_sum,
    282                                      int32_t run_duration_max,
    283                                      int32_t run_duration_sample,
    284                                      int32_t queue_duration_sum,
    285                                      int32_t queue_duration_max,
    286                                      int32_t queue_duration_sample,
    287                                      int32_t alloc_ops,
    288                                      int32_t free_ops,
    289                                      int32_t allocated_bytes,
    290                                      int32_t freed_bytes,
    291                                      int32_t alloc_overhead_bytes,
    292                                      int32_t max_allocated_bytes)
    293     : count(count),
    294       run_duration_sum(run_duration_sum),
    295       run_duration_max(run_duration_max),
    296       run_duration_sample(run_duration_sample),
    297       queue_duration_sum(queue_duration_sum),
    298       queue_duration_max(queue_duration_max),
    299       queue_duration_sample(queue_duration_sample),
    300       alloc_ops(alloc_ops),
    301       free_ops(free_ops),
    302       allocated_bytes(allocated_bytes),
    303       freed_bytes(freed_bytes),
    304       alloc_overhead_bytes(alloc_overhead_bytes),
    305       max_allocated_bytes(max_allocated_bytes) {}
    306 
    307 DeathDataSnapshot::DeathDataSnapshot(const DeathData& death_data)
    308     : count(death_data.count()),
    309       run_duration_sum(death_data.run_duration_sum()),
    310       run_duration_max(death_data.run_duration_max()),
    311       run_duration_sample(death_data.run_duration_sample()),
    312       queue_duration_sum(death_data.queue_duration_sum()),
    313       queue_duration_max(death_data.queue_duration_max()),
    314       queue_duration_sample(death_data.queue_duration_sample()),
    315       alloc_ops(death_data.alloc_ops()),
    316       free_ops(death_data.free_ops()),
    317       allocated_bytes(death_data.allocated_bytes()),
    318       freed_bytes(death_data.freed_bytes()),
    319       alloc_overhead_bytes(death_data.alloc_overhead_bytes()),
    320       max_allocated_bytes(death_data.max_allocated_bytes()) {}
    321 
    322 DeathDataSnapshot::DeathDataSnapshot(const DeathDataSnapshot& death_data) =
    323     default;
    324 
    325 DeathDataSnapshot::~DeathDataSnapshot() {
    326 }
    327 
    328 DeathDataSnapshot DeathDataSnapshot::Delta(
    329     const DeathDataSnapshot& older) const {
    330   return DeathDataSnapshot(
    331       count - older.count, run_duration_sum - older.run_duration_sum,
    332       run_duration_max, run_duration_sample,
    333       queue_duration_sum - older.queue_duration_sum, queue_duration_max,
    334       queue_duration_sample, alloc_ops - older.alloc_ops,
    335       free_ops - older.free_ops, allocated_bytes - older.allocated_bytes,
    336       freed_bytes - older.freed_bytes,
    337       alloc_overhead_bytes - older.alloc_overhead_bytes, max_allocated_bytes);
    338 }
    339 
    340 //------------------------------------------------------------------------------
    341 BirthOnThread::BirthOnThread(const Location& location,
    342                              const ThreadData& current)
    343     : location_(location),
    344       birth_thread_(&current) {
    345 }
    346 
    347 //------------------------------------------------------------------------------
    348 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
    349 }
    350 
    351 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
    352     : location(birth.location()),
    353       sanitized_thread_name(birth.birth_thread()->sanitized_thread_name()) {}
    354 
    355 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
    356 }
    357 
    358 //------------------------------------------------------------------------------
    359 Births::Births(const Location& location, const ThreadData& current)
    360     : BirthOnThread(location, current),
    361       birth_count_(1) { }
    362 
    363 int Births::birth_count() const { return birth_count_; }
    364 
    365 void Births::RecordBirth() { ++birth_count_; }
    366 
    367 //------------------------------------------------------------------------------
    368 // ThreadData maintains the central data for all births and deaths on a single
    369 // thread.
    370 
    371 // TODO(jar): We should pull all these static vars together, into a struct, and
    372 // optimize layout so that we benefit from locality of reference during accesses
    373 // to them.
    374 
    375 // static
    376 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
    377 
    378 // A TLS slot which points to the ThreadData instance for the current thread.
    379 // We do a fake initialization here (zeroing out data), and then the real
    380 // in-place construction happens when we call tls_index_.Initialize().
    381 // static
    382 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
    383 
    384 // static
    385 int ThreadData::cleanup_count_ = 0;
    386 
    387 // static
    388 int ThreadData::incarnation_counter_ = 0;
    389 
    390 // static
    391 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
    392 
    393 // static
    394 ThreadData* ThreadData::first_retired_thread_data_ = NULL;
    395 
    396 // static
    397 base::LazyInstance<base::Lock>::Leaky
    398     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
    399 
    400 // static
    401 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
    402 
    403 ThreadData::ThreadData(const std::string& sanitized_thread_name)
    404     : next_(NULL),
    405       next_retired_thread_data_(NULL),
    406       sanitized_thread_name_(sanitized_thread_name),
    407       incarnation_count_for_pool_(-1),
    408       current_stopwatch_(NULL) {
    409   DCHECK(sanitized_thread_name_.empty() ||
    410          !isdigit(sanitized_thread_name_.back()));
    411   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
    412 }
    413 
    414 ThreadData::~ThreadData() {
    415 }
    416 
    417 void ThreadData::PushToHeadOfList() {
    418   // Toss in a hint of randomness (atop the uniniitalized value).
    419   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
    420                                                  sizeof(random_number_));
    421   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
    422   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
    423   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
    424 
    425   DCHECK(!next_);
    426   base::AutoLock lock(*list_lock_.Pointer());
    427   incarnation_count_for_pool_ = incarnation_counter_;
    428   next_ = all_thread_data_list_head_;
    429   all_thread_data_list_head_ = this;
    430 }
    431 
    432 // static
    433 ThreadData* ThreadData::first() {
    434   base::AutoLock lock(*list_lock_.Pointer());
    435   return all_thread_data_list_head_;
    436 }
    437 
    438 ThreadData* ThreadData::next() const { return next_; }
    439 
    440 // static
    441 void ThreadData::InitializeThreadContext(const std::string& thread_name) {
    442   if (base::WorkerPool::RunsTasksOnCurrentThread())
    443     return;
    444   DCHECK_NE(thread_name, kWorkerThreadSanitizedName);
    445   EnsureTlsInitialization();
    446   ThreadData* current_thread_data =
    447       reinterpret_cast<ThreadData*>(tls_index_.Get());
    448   if (current_thread_data)
    449     return;  // Browser tests instigate this.
    450   current_thread_data =
    451       GetRetiredOrCreateThreadData(SanitizeThreadName(thread_name));
    452   tls_index_.Set(current_thread_data);
    453 }
    454 
    455 // static
    456 ThreadData* ThreadData::Get() {
    457   if (!tls_index_.initialized())
    458     return NULL;  // For unittests only.
    459   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
    460   if (registered)
    461     return registered;
    462 
    463   // We must be a worker thread, since we didn't pre-register.
    464   ThreadData* worker_thread_data =
    465       GetRetiredOrCreateThreadData(kWorkerThreadSanitizedName);
    466   tls_index_.Set(worker_thread_data);
    467   return worker_thread_data;
    468 }
    469 
    470 // static
    471 void ThreadData::OnThreadTermination(void* thread_data) {
    472   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
    473   // We must NOT do any allocations during this callback.  There is a chance
    474   // that the allocator is no longer active on this thread.
    475   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
    476 }
    477 
    478 void ThreadData::OnThreadTerminationCleanup() {
    479   // We must NOT do any allocations during this callback. There is a chance that
    480   // the allocator is no longer active on this thread.
    481 
    482   // The list_lock_ was created when we registered the callback, so it won't be
    483   // allocated here despite the lazy reference.
    484   base::AutoLock lock(*list_lock_.Pointer());
    485   if (incarnation_counter_ != incarnation_count_for_pool_)
    486     return;  // ThreadData was constructed in an earlier unit test.
    487   ++cleanup_count_;
    488 
    489   // Add this ThreadData to a retired list so that it can be reused by a thread
    490   // with the same name sanitized name in the future.
    491   // |next_retired_thread_data_| is expected to be nullptr for a ThreadData
    492   // associated with an active thread.
    493   DCHECK(!next_retired_thread_data_);
    494   next_retired_thread_data_ = first_retired_thread_data_;
    495   first_retired_thread_data_ = this;
    496 }
    497 
    498 // static
    499 void ThreadData::Snapshot(int current_profiling_phase,
    500                           ProcessDataSnapshot* process_data_snapshot) {
    501   // Get an unchanging copy of a ThreadData list.
    502   ThreadData* my_list = ThreadData::first();
    503 
    504   // Gather data serially.
    505   // This hackish approach *can* get some slightly corrupt tallies, as we are
    506   // grabbing values without the protection of a lock, but it has the advantage
    507   // of working even with threads that don't have message loops.  If a user
    508   // sees any strangeness, they can always just run their stats gathering a
    509   // second time.
    510   BirthCountMap birth_counts;
    511   for (ThreadData* thread_data = my_list; thread_data;
    512        thread_data = thread_data->next()) {
    513     thread_data->SnapshotExecutedTasks(current_profiling_phase,
    514                                        &process_data_snapshot->phased_snapshots,
    515                                        &birth_counts);
    516   }
    517 
    518   // Add births that are still active -- i.e. objects that have tallied a birth,
    519   // but have not yet tallied a matching death, and hence must be either
    520   // running, queued up, or being held in limbo for future posting.
    521   auto* current_phase_tasks =
    522       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
    523   for (const auto& birth_count : birth_counts) {
    524     if (birth_count.second > 0) {
    525       current_phase_tasks->push_back(
    526           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
    527                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0,
    528                                          0, 0, 0, 0, 0, 0),
    529                        "Still_Alive"));
    530     }
    531   }
    532 }
    533 
    534 // static
    535 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
    536   // Get an unchanging copy of a ThreadData list.
    537   ThreadData* my_list = ThreadData::first();
    538 
    539   // Add snapshots for all instances of death data in all threads serially.
    540   // This hackish approach *can* get some slightly corrupt tallies, as we are
    541   // grabbing values without the protection of a lock, but it has the advantage
    542   // of working even with threads that don't have message loops.  Any corruption
    543   // shouldn't cause "cascading damage" to anything else (in later phases).
    544   for (ThreadData* thread_data = my_list; thread_data;
    545        thread_data = thread_data->next()) {
    546     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
    547   }
    548 }
    549 
    550 Births* ThreadData::TallyABirth(const Location& location) {
    551   BirthMap::iterator it = birth_map_.find(location);
    552   Births* child;
    553   if (it != birth_map_.end()) {
    554     child =  it->second;
    555     child->RecordBirth();
    556   } else {
    557     child = new Births(location, *this);  // Leak this.
    558     // Lock since the map may get relocated now, and other threads sometimes
    559     // snapshot it (but they lock before copying it).
    560     base::AutoLock lock(map_lock_);
    561     birth_map_[location] = child;
    562   }
    563 
    564   return child;
    565 }
    566 
    567 void ThreadData::TallyADeath(const Births& births,
    568                              int32_t queue_duration,
    569                              const TaskStopwatch& stopwatch) {
    570   int32_t run_duration = stopwatch.RunDurationMs();
    571 
    572   // Stir in some randomness, plus add constant in case durations are zero.
    573   const uint32_t kSomePrimeNumber = 2147483647;
    574   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
    575   // An address is going to have some randomness to it as well ;-).
    576   random_number_ ^=
    577       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
    578 
    579   DeathMap::iterator it = death_map_.find(&births);
    580   DeathData* death_data;
    581   if (it != death_map_.end()) {
    582     death_data = &it->second;
    583   } else {
    584     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
    585     death_data = &death_map_[&births];
    586   }  // Release lock ASAP.
    587   death_data->RecordDurations(queue_duration, run_duration, random_number_);
    588 
    589 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
    590   if (stopwatch.heap_tracking_enabled()) {
    591     base::debug::ThreadHeapUsage heap_usage = stopwatch.heap_usage().usage();
    592     // Saturate the 64 bit counts on conversion to 32 bit storage.
    593     death_data->RecordAllocations(
    594         base::saturated_cast<int32_t>(heap_usage.alloc_ops),
    595         base::saturated_cast<int32_t>(heap_usage.free_ops),
    596         base::saturated_cast<int32_t>(heap_usage.alloc_bytes),
    597         base::saturated_cast<int32_t>(heap_usage.free_bytes),
    598         base::saturated_cast<int32_t>(heap_usage.alloc_overhead_bytes),
    599         base::saturated_cast<int32_t>(heap_usage.max_allocated_bytes));
    600   }
    601 #endif
    602 }
    603 
    604 // static
    605 Births* ThreadData::TallyABirthIfActive(const Location& location) {
    606   if (!TrackingStatus())
    607     return NULL;
    608   ThreadData* current_thread_data = Get();
    609   if (!current_thread_data)
    610     return NULL;
    611   return current_thread_data->TallyABirth(location);
    612 }
    613 
    614 // static
    615 void ThreadData::TallyRunOnNamedThreadIfTracking(
    616     const base::TrackingInfo& completed_task,
    617     const TaskStopwatch& stopwatch) {
    618   // Even if we have been DEACTIVATED, we will process any pending births so
    619   // that our data structures (which counted the outstanding births) remain
    620   // consistent.
    621   const Births* births = completed_task.birth_tally;
    622   if (!births)
    623     return;
    624   ThreadData* current_thread_data = stopwatch.GetThreadData();
    625   if (!current_thread_data)
    626     return;
    627 
    628   // Watch out for a race where status_ is changing, and hence one or both
    629   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
    630   // get a time value since we "weren't tracking" and we were trying to be
    631   // efficient by not calling for a genuine time value.  For simplicity, we'll
    632   // use a default zero duration when we can't calculate a true value.
    633   TrackedTime start_of_run = stopwatch.StartTime();
    634   int32_t queue_duration = 0;
    635   if (!start_of_run.is_null()) {
    636     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
    637         .InMilliseconds();
    638   }
    639   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    640 }
    641 
    642 // static
    643 void ThreadData::TallyRunOnWorkerThreadIfTracking(
    644     const Births* births,
    645     const TrackedTime& time_posted,
    646     const TaskStopwatch& stopwatch) {
    647   // Even if we have been DEACTIVATED, we will process any pending births so
    648   // that our data structures (which counted the outstanding births) remain
    649   // consistent.
    650   if (!births)
    651     return;
    652 
    653   // TODO(jar): Support the option to coalesce all worker-thread activity under
    654   // one ThreadData instance that uses locks to protect *all* access.  This will
    655   // reduce memory (making it provably bounded), but run incrementally slower
    656   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
    657   // that the locks on TallyADeath will be *after* the worker thread has run,
    658   // and hence nothing will be waiting for the completion (...  besides some
    659   // other thread that might like to run).  Also, the worker threads tasks are
    660   // generally longer, and hence the cost of the lock may perchance be amortized
    661   // over the long task's lifetime.
    662   ThreadData* current_thread_data = stopwatch.GetThreadData();
    663   if (!current_thread_data)
    664     return;
    665 
    666   TrackedTime start_of_run = stopwatch.StartTime();
    667   int32_t queue_duration = 0;
    668   if (!start_of_run.is_null()) {
    669     queue_duration = (start_of_run - time_posted).InMilliseconds();
    670   }
    671   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    672 }
    673 
    674 // static
    675 void ThreadData::TallyRunInAScopedRegionIfTracking(
    676     const Births* births,
    677     const TaskStopwatch& stopwatch) {
    678   // Even if we have been DEACTIVATED, we will process any pending births so
    679   // that our data structures (which counted the outstanding births) remain
    680   // consistent.
    681   if (!births)
    682     return;
    683 
    684   ThreadData* current_thread_data = stopwatch.GetThreadData();
    685   if (!current_thread_data)
    686     return;
    687 
    688   int32_t queue_duration = 0;
    689   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    690 }
    691 
    692 void ThreadData::SnapshotExecutedTasks(
    693     int current_profiling_phase,
    694     PhasedProcessDataSnapshotMap* phased_snapshots,
    695     BirthCountMap* birth_counts) {
    696   // Get copy of data, so that the data will not change during the iterations
    697   // and processing.
    698   BirthMap birth_map;
    699   DeathsSnapshot deaths;
    700   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
    701 
    702   for (const auto& birth : birth_map) {
    703     (*birth_counts)[birth.second] += birth.second->birth_count();
    704   }
    705 
    706   for (const auto& death : deaths) {
    707     (*birth_counts)[death.first] -= death.first->birth_count();
    708 
    709     // For the current death data, walk through all its snapshots, starting from
    710     // the current one, then from the previous profiling phase etc., and for
    711     // each snapshot calculate the delta between the snapshot and the previous
    712     // phase, if any.  Store the deltas in the result.
    713     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
    714          phase = phase->prev) {
    715       const DeathDataSnapshot& death_data =
    716           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
    717                       : phase->death_data;
    718 
    719       if (death_data.count > 0) {
    720         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
    721             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
    722                          sanitized_thread_name()));
    723       }
    724     }
    725   }
    726 }
    727 
    728 // This may be called from another thread.
    729 void ThreadData::SnapshotMaps(int profiling_phase,
    730                               BirthMap* birth_map,
    731                               DeathsSnapshot* deaths) {
    732   base::AutoLock lock(map_lock_);
    733 
    734   for (const auto& birth : birth_map_)
    735     (*birth_map)[birth.first] = birth.second;
    736 
    737   for (const auto& death : death_map_) {
    738     deaths->push_back(std::make_pair(
    739         death.first,
    740         DeathDataPhaseSnapshot(profiling_phase, death.second,
    741                                death.second.last_phase_snapshot())));
    742   }
    743 }
    744 
    745 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
    746   base::AutoLock lock(map_lock_);
    747 
    748   for (auto& death : death_map_) {
    749     death.second.OnProfilingPhaseCompleted(profiling_phase);
    750   }
    751 }
    752 
    753 void ThreadData::EnsureTlsInitialization() {
    754   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
    755     return;  // Someone else did the initialization.
    756   // Due to racy lazy initialization in tests, we'll need to recheck status_
    757   // after we acquire the lock.
    758 
    759   // Ensure that we don't double initialize tls.  We are called when single
    760   // threaded in the product, but some tests may be racy and lazy about our
    761   // initialization.
    762   base::AutoLock lock(*list_lock_.Pointer());
    763   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
    764     return;  // Someone raced in here and beat us.
    765 
    766   // Perform the "real" TLS initialization now, and leave it intact through
    767   // process termination.
    768   if (!tls_index_.initialized()) {  // Testing may have initialized this.
    769     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
    770     tls_index_.Initialize(&ThreadData::OnThreadTermination);
    771     DCHECK(tls_index_.initialized());
    772   } else {
    773     // TLS was initialzed for us earlier.
    774     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
    775   }
    776 
    777   // Incarnation counter is only significant to testing, as it otherwise will
    778   // never again change in this process.
    779   ++incarnation_counter_;
    780 
    781   // The lock is not critical for setting status_, but it doesn't hurt.  It also
    782   // ensures that if we have a racy initialization, that we'll bail as soon as
    783   // we get the lock earlier in this method.
    784   base::subtle::Release_Store(&status_, kInitialStartupState);
    785   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
    786 }
    787 
    788 // static
    789 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
    790   DCHECK_GE(status, DEACTIVATED);
    791   DCHECK_LE(status, PROFILING_ACTIVE);
    792 
    793   EnsureTlsInitialization();  // No-op if already initialized.
    794 
    795   if (status > DEACTIVATED)
    796     status = PROFILING_ACTIVE;
    797 
    798   base::subtle::Release_Store(&status_, status);
    799 }
    800 
    801 // static
    802 ThreadData::Status ThreadData::status() {
    803   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
    804 }
    805 
    806 // static
    807 bool ThreadData::TrackingStatus() {
    808   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
    809 }
    810 
    811 // static
    812 void ThreadData::EnableProfilerTiming() {
    813   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
    814 }
    815 
    816 // static
    817 TrackedTime ThreadData::Now() {
    818   if (now_function_for_testing_)
    819     return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
    820   if (IsProfilerTimingEnabled() && TrackingStatus())
    821     return TrackedTime::Now();
    822   return TrackedTime();  // Super fast when disabled, or not compiled.
    823 }
    824 
    825 // static
    826 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
    827   base::AutoLock lock(*list_lock_.Pointer());
    828 
    829   // TODO(jar): until this is working on XP, don't run the real test.
    830 #if 0
    831   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
    832   // caller should tell us how many thread shutdowns should have taken place by
    833   // now.
    834   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
    835 #endif
    836 }
    837 
    838 // static
    839 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
    840   // This is only called from test code, where we need to cleanup so that
    841   // additional tests can be run.
    842   // We must be single threaded... but be careful anyway.
    843   InitializeAndSetTrackingStatus(DEACTIVATED);
    844 
    845   ThreadData* thread_data_list;
    846   {
    847     base::AutoLock lock(*list_lock_.Pointer());
    848     thread_data_list = all_thread_data_list_head_;
    849     all_thread_data_list_head_ = NULL;
    850     ++incarnation_counter_;
    851     // To be clean, break apart the retired worker list (though we leak them).
    852     while (first_retired_thread_data_) {
    853       ThreadData* thread_data = first_retired_thread_data_;
    854       first_retired_thread_data_ = thread_data->next_retired_thread_data_;
    855       thread_data->next_retired_thread_data_ = nullptr;
    856     }
    857   }
    858 
    859   // Put most global static back in pristine shape.
    860   cleanup_count_ = 0;
    861   tls_index_.Set(NULL);
    862   // Almost UNINITIALIZED.
    863   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
    864 
    865   // To avoid any chance of racing in unit tests, which is the only place we
    866   // call this function, we may sometimes leak all the data structures we
    867   // recovered, as they may still be in use on threads from prior tests!
    868   if (leak) {
    869     ThreadData* thread_data = thread_data_list;
    870     while (thread_data) {
    871       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
    872       thread_data = thread_data->next();
    873     }
    874     return;
    875   }
    876 
    877   // When we want to cleanup (on a single thread), here is what we do.
    878 
    879   // Do actual recursive delete in all ThreadData instances.
    880   while (thread_data_list) {
    881     ThreadData* next_thread_data = thread_data_list;
    882     thread_data_list = thread_data_list->next();
    883 
    884     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
    885          next_thread_data->birth_map_.end() != it; ++it)
    886       delete it->second;  // Delete the Birth Records.
    887     delete next_thread_data;  // Includes all Death Records.
    888   }
    889 }
    890 
    891 // static
    892 ThreadData* ThreadData::GetRetiredOrCreateThreadData(
    893     const std::string& sanitized_thread_name) {
    894   SCOPED_UMA_HISTOGRAM_TIMER("TrackedObjects.GetRetiredOrCreateThreadData");
    895 
    896   {
    897     base::AutoLock lock(*list_lock_.Pointer());
    898     ThreadData** pcursor = &first_retired_thread_data_;
    899     ThreadData* cursor = first_retired_thread_data_;
    900 
    901     // Assuming that there aren't more than a few tens of retired ThreadData
    902     // instances, this lookup should be quick compared to the thread creation
    903     // time. Retired ThreadData instances cannot be stored in a map because
    904     // insertions are done from OnThreadTerminationCleanup() where allocations
    905     // are not allowed.
    906     //
    907     // Note: Test processes may have more than a few tens of retired ThreadData
    908     // instances.
    909     while (cursor) {
    910       if (cursor->sanitized_thread_name() == sanitized_thread_name) {
    911         DCHECK_EQ(*pcursor, cursor);
    912         *pcursor = cursor->next_retired_thread_data_;
    913         cursor->next_retired_thread_data_ = nullptr;
    914         return cursor;
    915       }
    916       pcursor = &cursor->next_retired_thread_data_;
    917       cursor = cursor->next_retired_thread_data_;
    918     }
    919   }
    920 
    921   return new ThreadData(sanitized_thread_name);
    922 }
    923 
    924 //------------------------------------------------------------------------------
    925 TaskStopwatch::TaskStopwatch()
    926     : wallclock_duration_ms_(0),
    927       current_thread_data_(NULL),
    928       excluded_duration_ms_(0),
    929       parent_(NULL) {
    930 #if DCHECK_IS_ON()
    931   state_ = CREATED;
    932   child_ = NULL;
    933 #endif
    934 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
    935   heap_tracking_enabled_ =
    936       base::debug::ThreadHeapUsageTracker::IsHeapTrackingEnabled();
    937 #endif
    938 }
    939 
    940 TaskStopwatch::~TaskStopwatch() {
    941 #if DCHECK_IS_ON()
    942   DCHECK(state_ != RUNNING);
    943   DCHECK(child_ == NULL);
    944 #endif
    945 }
    946 
    947 void TaskStopwatch::Start() {
    948 #if DCHECK_IS_ON()
    949   DCHECK(state_ == CREATED);
    950   state_ = RUNNING;
    951 #endif
    952 
    953   start_time_ = ThreadData::Now();
    954 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
    955   if (heap_tracking_enabled_)
    956     heap_usage_.Start();
    957 #endif
    958 
    959   current_thread_data_ = ThreadData::Get();
    960   if (!current_thread_data_)
    961     return;
    962 
    963   parent_ = current_thread_data_->current_stopwatch_;
    964 #if DCHECK_IS_ON()
    965   if (parent_) {
    966     DCHECK(parent_->state_ == RUNNING);
    967     DCHECK(parent_->child_ == NULL);
    968     parent_->child_ = this;
    969   }
    970 #endif
    971   current_thread_data_->current_stopwatch_ = this;
    972 }
    973 
    974 void TaskStopwatch::Stop() {
    975   const TrackedTime end_time = ThreadData::Now();
    976 #if DCHECK_IS_ON()
    977   DCHECK(state_ == RUNNING);
    978   state_ = STOPPED;
    979   DCHECK(child_ == NULL);
    980 #endif
    981 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
    982   if (heap_tracking_enabled_)
    983     heap_usage_.Stop(true);
    984 #endif
    985 
    986   if (!start_time_.is_null() && !end_time.is_null()) {
    987     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
    988   }
    989 
    990   if (!current_thread_data_)
    991     return;
    992 
    993   DCHECK(current_thread_data_->current_stopwatch_ == this);
    994   current_thread_data_->current_stopwatch_ = parent_;
    995   if (!parent_)
    996     return;
    997 
    998 #if DCHECK_IS_ON()
    999   DCHECK(parent_->state_ == RUNNING);
   1000   DCHECK(parent_->child_ == this);
   1001   parent_->child_ = NULL;
   1002 #endif
   1003   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
   1004   parent_ = NULL;
   1005 }
   1006 
   1007 TrackedTime TaskStopwatch::StartTime() const {
   1008 #if DCHECK_IS_ON()
   1009   DCHECK(state_ != CREATED);
   1010 #endif
   1011 
   1012   return start_time_;
   1013 }
   1014 
   1015 int32_t TaskStopwatch::RunDurationMs() const {
   1016 #if DCHECK_IS_ON()
   1017   DCHECK(state_ == STOPPED);
   1018 #endif
   1019 
   1020   return wallclock_duration_ms_ - excluded_duration_ms_;
   1021 }
   1022 
   1023 ThreadData* TaskStopwatch::GetThreadData() const {
   1024 #if DCHECK_IS_ON()
   1025   DCHECK(state_ != CREATED);
   1026 #endif
   1027 
   1028   return current_thread_data_;
   1029 }
   1030 
   1031 //------------------------------------------------------------------------------
   1032 // DeathDataPhaseSnapshot
   1033 
   1034 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
   1035     int profiling_phase,
   1036     const DeathData& death,
   1037     const DeathDataPhaseSnapshot* prev)
   1038     : profiling_phase(profiling_phase), death_data(death), prev(prev) {}
   1039 
   1040 //------------------------------------------------------------------------------
   1041 // TaskSnapshot
   1042 
   1043 TaskSnapshot::TaskSnapshot() {
   1044 }
   1045 
   1046 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
   1047                            const DeathDataSnapshot& death_data,
   1048                            const std::string& death_sanitized_thread_name)
   1049     : birth(birth),
   1050       death_data(death_data),
   1051       death_sanitized_thread_name(death_sanitized_thread_name) {}
   1052 
   1053 TaskSnapshot::~TaskSnapshot() {
   1054 }
   1055 
   1056 //------------------------------------------------------------------------------
   1057 // ProcessDataPhaseSnapshot
   1058 
   1059 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
   1060 }
   1061 
   1062 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
   1063     const ProcessDataPhaseSnapshot& other) = default;
   1064 
   1065 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
   1066 }
   1067 
   1068 //------------------------------------------------------------------------------
   1069 // ProcessDataPhaseSnapshot
   1070 
   1071 ProcessDataSnapshot::ProcessDataSnapshot()
   1072 #if !defined(OS_NACL)
   1073     : process_id(base::GetCurrentProcId()) {
   1074 #else
   1075     : process_id(base::kNullProcessId) {
   1076 #endif
   1077 }
   1078 
   1079 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
   1080     default;
   1081 
   1082 ProcessDataSnapshot::~ProcessDataSnapshot() {
   1083 }
   1084 
   1085 }  // namespace tracked_objects
   1086