Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/tracked_objects.h"
      6 
      7 #include <limits.h>
      8 #include <stdlib.h>
      9 
     10 #include "base/atomicops.h"
     11 #include "base/base_switches.h"
     12 #include "base/command_line.h"
     13 #include "base/compiler_specific.h"
     14 #include "base/logging.h"
     15 #include "base/process/process_handle.h"
     16 #include "base/profiler/alternate_timer.h"
     17 #include "base/strings/stringprintf.h"
     18 #include "base/tracking_info.h"
     19 #include "build/build_config.h"
     20 #include "third_party/valgrind/memcheck.h"
     21 
     22 using base::TimeDelta;
     23 
     24 namespace base {
     25 class TimeDelta;
     26 }
     27 
     28 namespace tracked_objects {
     29 
     30 namespace {
     31 // When ThreadData is first initialized, should we start in an ACTIVE state to
     32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
     33 // that we only record after parsing the command line flag --enable-tracking.
     34 // Note that the flag may force either state, so this really controls only the
     35 // period of time up until that flag is parsed.  If there is no flag seen, then
     36 // this state may prevail for much or all of the process lifetime.
     37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
     38 
     39 // Control whether an alternate time source (Now() function) is supported by
     40 // the ThreadData class.  This compile time flag should be set to true if we
     41 // want other modules (such as a memory allocator, or a thread-specific CPU time
     42 // clock) to be able to provide a thread-specific Now() function.  Without this
     43 // compile-time flag, the code will only support the wall-clock time.  This flag
     44 // can be flipped to efficiently disable this path (if there is a performance
     45 // problem with its presence).
     46 static const bool kAllowAlternateTimeSourceHandling = true;
     47 
     48 // Possible states of the profiler timing enabledness.
     49 enum {
     50   UNDEFINED_TIMING,
     51   ENABLED_TIMING,
     52   DISABLED_TIMING,
     53 };
     54 
     55 // State of the profiler timing enabledness.
     56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
     57 
     58 // Returns whether profiler timing is enabled.  The default is true, but this
     59 // may be overridden by a command-line flag.  Some platforms may
     60 // programmatically set this command-line flag to the "off" value if it's not
     61 // specified.
     62 // This in turn can be overridden by explicitly calling
     63 // ThreadData::EnableProfilerTiming, say, based on a field trial.
     64 inline bool IsProfilerTimingEnabled() {
     65   // Reading |g_profiler_timing_enabled| is done without barrier because
     66   // multiple initialization is not an issue while the barrier can be relatively
     67   // costly given that this method is sometimes called in a tight loop.
     68   base::subtle::Atomic32 current_timing_enabled =
     69       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
     70   if (current_timing_enabled == UNDEFINED_TIMING) {
     71     if (!base::CommandLine::InitializedForCurrentProcess())
     72       return true;
     73     current_timing_enabled =
     74         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
     75              switches::kProfilerTiming) ==
     76          switches::kProfilerTimingDisabledValue)
     77             ? DISABLED_TIMING
     78             : ENABLED_TIMING;
     79     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
     80                                   current_timing_enabled);
     81   }
     82   return current_timing_enabled == ENABLED_TIMING;
     83 }
     84 
     85 }  // namespace
     86 
     87 //------------------------------------------------------------------------------
     88 // DeathData tallies durations when a death takes place.
     89 
     90 DeathData::DeathData()
     91     : count_(0),
     92       sample_probability_count_(0),
     93       run_duration_sum_(0),
     94       queue_duration_sum_(0),
     95       run_duration_max_(0),
     96       queue_duration_max_(0),
     97       run_duration_sample_(0),
     98       queue_duration_sample_(0),
     99       last_phase_snapshot_(nullptr) {
    100 }
    101 
    102 DeathData::DeathData(const DeathData& other)
    103     : count_(other.count_),
    104       sample_probability_count_(other.sample_probability_count_),
    105       run_duration_sum_(other.run_duration_sum_),
    106       queue_duration_sum_(other.queue_duration_sum_),
    107       run_duration_max_(other.run_duration_max_),
    108       queue_duration_max_(other.queue_duration_max_),
    109       run_duration_sample_(other.run_duration_sample_),
    110       queue_duration_sample_(other.queue_duration_sample_),
    111       last_phase_snapshot_(nullptr) {
    112   // This constructor will be used by std::map when adding new DeathData values
    113   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
    114   // need to worry about ownership transfer.
    115   DCHECK(other.last_phase_snapshot_ == nullptr);
    116 }
    117 
    118 DeathData::~DeathData() {
    119   while (last_phase_snapshot_) {
    120     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
    121     last_phase_snapshot_ = snapshot->prev;
    122     delete snapshot;
    123   }
    124 }
    125 
    126 // TODO(jar): I need to see if this macro to optimize branching is worth using.
    127 //
    128 // This macro has no branching, so it is surely fast, and is equivalent to:
    129 //             if (assign_it)
    130 //               target = source;
    131 // We use a macro rather than a template to force this to inline.
    132 // Related code for calculating max is discussed on the web.
    133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
    134   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
    135 
    136 void DeathData::RecordDeath(const int32_t queue_duration,
    137                             const int32_t run_duration,
    138                             const uint32_t random_number) {
    139   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
    140   if (count_ < INT_MAX)
    141     base::subtle::NoBarrier_Store(&count_, count_ + 1);
    142 
    143   int sample_probability_count =
    144       base::subtle::NoBarrier_Load(&sample_probability_count_);
    145   if (sample_probability_count < INT_MAX)
    146     ++sample_probability_count;
    147   base::subtle::NoBarrier_Store(&sample_probability_count_,
    148                                 sample_probability_count);
    149 
    150   base::subtle::NoBarrier_Store(&queue_duration_sum_,
    151                                 queue_duration_sum_ + queue_duration);
    152   base::subtle::NoBarrier_Store(&run_duration_sum_,
    153                                 run_duration_sum_ + run_duration);
    154 
    155   if (queue_duration_max() < queue_duration)
    156     base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
    157   if (run_duration_max() < run_duration)
    158     base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
    159 
    160   // Take a uniformly distributed sample over all durations ever supplied during
    161   // the current profiling phase.
    162   // The probability that we (instead) use this new sample is
    163   // 1/sample_probability_count_. This results in a completely uniform selection
    164   // of the sample (at least when we don't clamp sample_probability_count_...
    165   // but that should be inconsequentially likely).  We ignore the fact that we
    166   // correlated our selection of a sample to the run and queue times (i.e., we
    167   // used them to generate random_number).
    168   CHECK_GT(sample_probability_count, 0);
    169   if (0 == (random_number % sample_probability_count)) {
    170     base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
    171     base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
    172   }
    173 }
    174 
    175 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
    176   // Snapshotting and storing current state.
    177   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
    178       profiling_phase, count(), run_duration_sum(), run_duration_max(),
    179       run_duration_sample(), queue_duration_sum(), queue_duration_max(),
    180       queue_duration_sample(), last_phase_snapshot_);
    181 
    182   // Not touching fields for which a delta can be computed by comparing with a
    183   // snapshot from the previous phase. Resetting other fields.  Sample values
    184   // will be reset upon next death recording because sample_probability_count_
    185   // is set to 0.
    186   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
    187   // is that for incrementable fields, resetting to 0 from the snapshot thread
    188   // potentially in parallel with incrementing in the death thread may result in
    189   // significant data corruption that has a potential to grow with time.  Not
    190   // resetting incrementable fields and using deltas will cause any
    191   // off-by-little corruptions to be likely fixed at the next snapshot.
    192   // The max values are not incrementable, and cannot be deduced using deltas
    193   // for a given phase. Hence, we have to reset them to 0.  But the potential
    194   // damage is limited to getting the previous phase's max to apply for the next
    195   // phase, and the error doesn't have a potential to keep growing with new
    196   // resets.
    197   // sample_probability_count_ is incrementable, but must be reset to 0 at the
    198   // phase end, so that we start a new uniformly randomized sample selection
    199   // after the reset. These fields are updated using atomics. However, race
    200   // conditions are possible since these are updated individually and not
    201   // together atomically, resulting in the values being mutually inconsistent.
    202   // The damage is limited to selecting a wrong sample, which is not something
    203   // that can cause accumulating or cascading effects.
    204   // If there were no inconsistencies caused by race conditions, we never send a
    205   // sample for the previous phase in the next phase's snapshot because
    206   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
    207   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
    208   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
    209   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
    210 }
    211 
    212 //------------------------------------------------------------------------------
    213 DeathDataSnapshot::DeathDataSnapshot()
    214     : count(-1),
    215       run_duration_sum(-1),
    216       run_duration_max(-1),
    217       run_duration_sample(-1),
    218       queue_duration_sum(-1),
    219       queue_duration_max(-1),
    220       queue_duration_sample(-1) {
    221 }
    222 
    223 DeathDataSnapshot::DeathDataSnapshot(int count,
    224                                      int32_t run_duration_sum,
    225                                      int32_t run_duration_max,
    226                                      int32_t run_duration_sample,
    227                                      int32_t queue_duration_sum,
    228                                      int32_t queue_duration_max,
    229                                      int32_t queue_duration_sample)
    230     : count(count),
    231       run_duration_sum(run_duration_sum),
    232       run_duration_max(run_duration_max),
    233       run_duration_sample(run_duration_sample),
    234       queue_duration_sum(queue_duration_sum),
    235       queue_duration_max(queue_duration_max),
    236       queue_duration_sample(queue_duration_sample) {}
    237 
    238 DeathDataSnapshot::~DeathDataSnapshot() {
    239 }
    240 
    241 DeathDataSnapshot DeathDataSnapshot::Delta(
    242     const DeathDataSnapshot& older) const {
    243   return DeathDataSnapshot(count - older.count,
    244                            run_duration_sum - older.run_duration_sum,
    245                            run_duration_max, run_duration_sample,
    246                            queue_duration_sum - older.queue_duration_sum,
    247                            queue_duration_max, queue_duration_sample);
    248 }
    249 
    250 //------------------------------------------------------------------------------
    251 BirthOnThread::BirthOnThread(const Location& location,
    252                              const ThreadData& current)
    253     : location_(location),
    254       birth_thread_(&current) {
    255 }
    256 
    257 //------------------------------------------------------------------------------
    258 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
    259 }
    260 
    261 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
    262     : location(birth.location()),
    263       thread_name(birth.birth_thread()->thread_name()) {
    264 }
    265 
    266 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
    267 }
    268 
    269 //------------------------------------------------------------------------------
    270 Births::Births(const Location& location, const ThreadData& current)
    271     : BirthOnThread(location, current),
    272       birth_count_(1) { }
    273 
    274 int Births::birth_count() const { return birth_count_; }
    275 
    276 void Births::RecordBirth() { ++birth_count_; }
    277 
    278 //------------------------------------------------------------------------------
    279 // ThreadData maintains the central data for all births and deaths on a single
    280 // thread.
    281 
    282 // TODO(jar): We should pull all these static vars together, into a struct, and
    283 // optimize layout so that we benefit from locality of reference during accesses
    284 // to them.
    285 
    286 // static
    287 NowFunction* ThreadData::now_function_ = NULL;
    288 
    289 // static
    290 bool ThreadData::now_function_is_time_ = false;
    291 
    292 // A TLS slot which points to the ThreadData instance for the current thread.
    293 // We do a fake initialization here (zeroing out data), and then the real
    294 // in-place construction happens when we call tls_index_.Initialize().
    295 // static
    296 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
    297 
    298 // static
    299 int ThreadData::worker_thread_data_creation_count_ = 0;
    300 
    301 // static
    302 int ThreadData::cleanup_count_ = 0;
    303 
    304 // static
    305 int ThreadData::incarnation_counter_ = 0;
    306 
    307 // static
    308 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
    309 
    310 // static
    311 ThreadData* ThreadData::first_retired_worker_ = NULL;
    312 
    313 // static
    314 base::LazyInstance<base::Lock>::Leaky
    315     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
    316 
    317 // static
    318 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
    319 
    320 ThreadData::ThreadData(const std::string& suggested_name)
    321     : next_(NULL),
    322       next_retired_worker_(NULL),
    323       worker_thread_number_(0),
    324       incarnation_count_for_pool_(-1),
    325       current_stopwatch_(NULL) {
    326   DCHECK_GE(suggested_name.size(), 0u);
    327   thread_name_ = suggested_name;
    328   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
    329 }
    330 
    331 ThreadData::ThreadData(int thread_number)
    332     : next_(NULL),
    333       next_retired_worker_(NULL),
    334       worker_thread_number_(thread_number),
    335       incarnation_count_for_pool_(-1),
    336       current_stopwatch_(NULL) {
    337   CHECK_GT(thread_number, 0);
    338   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
    339   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
    340 }
    341 
    342 ThreadData::~ThreadData() {
    343 }
    344 
    345 void ThreadData::PushToHeadOfList() {
    346   // Toss in a hint of randomness (atop the uniniitalized value).
    347   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
    348                                                  sizeof(random_number_));
    349   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
    350   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
    351   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
    352 
    353   DCHECK(!next_);
    354   base::AutoLock lock(*list_lock_.Pointer());
    355   incarnation_count_for_pool_ = incarnation_counter_;
    356   next_ = all_thread_data_list_head_;
    357   all_thread_data_list_head_ = this;
    358 }
    359 
    360 // static
    361 ThreadData* ThreadData::first() {
    362   base::AutoLock lock(*list_lock_.Pointer());
    363   return all_thread_data_list_head_;
    364 }
    365 
    366 ThreadData* ThreadData::next() const { return next_; }
    367 
    368 // static
    369 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
    370   Initialize();
    371   ThreadData* current_thread_data =
    372       reinterpret_cast<ThreadData*>(tls_index_.Get());
    373   if (current_thread_data)
    374     return;  // Browser tests instigate this.
    375   current_thread_data = new ThreadData(suggested_name);
    376   tls_index_.Set(current_thread_data);
    377 }
    378 
    379 // static
    380 ThreadData* ThreadData::Get() {
    381   if (!tls_index_.initialized())
    382     return NULL;  // For unittests only.
    383   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
    384   if (registered)
    385     return registered;
    386 
    387   // We must be a worker thread, since we didn't pre-register.
    388   ThreadData* worker_thread_data = NULL;
    389   int worker_thread_number = 0;
    390   {
    391     base::AutoLock lock(*list_lock_.Pointer());
    392     if (first_retired_worker_) {
    393       worker_thread_data = first_retired_worker_;
    394       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
    395       worker_thread_data->next_retired_worker_ = NULL;
    396     } else {
    397       worker_thread_number = ++worker_thread_data_creation_count_;
    398     }
    399   }
    400 
    401   // If we can't find a previously used instance, then we have to create one.
    402   if (!worker_thread_data) {
    403     DCHECK_GT(worker_thread_number, 0);
    404     worker_thread_data = new ThreadData(worker_thread_number);
    405   }
    406   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
    407 
    408   tls_index_.Set(worker_thread_data);
    409   return worker_thread_data;
    410 }
    411 
    412 // static
    413 void ThreadData::OnThreadTermination(void* thread_data) {
    414   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
    415   // We must NOT do any allocations during this callback.  There is a chance
    416   // that the allocator is no longer active on this thread.
    417   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
    418 }
    419 
    420 void ThreadData::OnThreadTerminationCleanup() {
    421   // The list_lock_ was created when we registered the callback, so it won't be
    422   // allocated here despite the lazy reference.
    423   base::AutoLock lock(*list_lock_.Pointer());
    424   if (incarnation_counter_ != incarnation_count_for_pool_)
    425     return;  // ThreadData was constructed in an earlier unit test.
    426   ++cleanup_count_;
    427   // Only worker threads need to be retired and reused.
    428   if (!worker_thread_number_) {
    429     return;
    430   }
    431   // We must NOT do any allocations during this callback.
    432   // Using the simple linked lists avoids all allocations.
    433   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
    434   this->next_retired_worker_ = first_retired_worker_;
    435   first_retired_worker_ = this;
    436 }
    437 
    438 // static
    439 void ThreadData::Snapshot(int current_profiling_phase,
    440                           ProcessDataSnapshot* process_data_snapshot) {
    441   // Get an unchanging copy of a ThreadData list.
    442   ThreadData* my_list = ThreadData::first();
    443 
    444   // Gather data serially.
    445   // This hackish approach *can* get some slightly corrupt tallies, as we are
    446   // grabbing values without the protection of a lock, but it has the advantage
    447   // of working even with threads that don't have message loops.  If a user
    448   // sees any strangeness, they can always just run their stats gathering a
    449   // second time.
    450   BirthCountMap birth_counts;
    451   for (ThreadData* thread_data = my_list; thread_data;
    452        thread_data = thread_data->next()) {
    453     thread_data->SnapshotExecutedTasks(current_profiling_phase,
    454                                        &process_data_snapshot->phased_snapshots,
    455                                        &birth_counts);
    456   }
    457 
    458   // Add births that are still active -- i.e. objects that have tallied a birth,
    459   // but have not yet tallied a matching death, and hence must be either
    460   // running, queued up, or being held in limbo for future posting.
    461   auto* current_phase_tasks =
    462       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
    463   for (const auto& birth_count : birth_counts) {
    464     if (birth_count.second > 0) {
    465       current_phase_tasks->push_back(
    466           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
    467                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
    468                        "Still_Alive"));
    469     }
    470   }
    471 }
    472 
    473 // static
    474 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
    475   // Get an unchanging copy of a ThreadData list.
    476   ThreadData* my_list = ThreadData::first();
    477 
    478   // Add snapshots for all instances of death data in all threads serially.
    479   // This hackish approach *can* get some slightly corrupt tallies, as we are
    480   // grabbing values without the protection of a lock, but it has the advantage
    481   // of working even with threads that don't have message loops.  Any corruption
    482   // shouldn't cause "cascading damage" to anything else (in later phases).
    483   for (ThreadData* thread_data = my_list; thread_data;
    484        thread_data = thread_data->next()) {
    485     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
    486   }
    487 }
    488 
    489 Births* ThreadData::TallyABirth(const Location& location) {
    490   BirthMap::iterator it = birth_map_.find(location);
    491   Births* child;
    492   if (it != birth_map_.end()) {
    493     child =  it->second;
    494     child->RecordBirth();
    495   } else {
    496     child = new Births(location, *this);  // Leak this.
    497     // Lock since the map may get relocated now, and other threads sometimes
    498     // snapshot it (but they lock before copying it).
    499     base::AutoLock lock(map_lock_);
    500     birth_map_[location] = child;
    501   }
    502 
    503   return child;
    504 }
    505 
    506 void ThreadData::TallyADeath(const Births& births,
    507                              int32_t queue_duration,
    508                              const TaskStopwatch& stopwatch) {
    509   int32_t run_duration = stopwatch.RunDurationMs();
    510 
    511   // Stir in some randomness, plus add constant in case durations are zero.
    512   const uint32_t kSomePrimeNumber = 2147483647;
    513   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
    514   // An address is going to have some randomness to it as well ;-).
    515   random_number_ ^=
    516       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
    517 
    518   // We don't have queue durations without OS timer.  OS timer is automatically
    519   // used for task-post-timing, so the use of an alternate timer implies all
    520   // queue times are invalid, unless it was explicitly said that we can trust
    521   // the alternate timer.
    522   if (kAllowAlternateTimeSourceHandling &&
    523       now_function_ &&
    524       !now_function_is_time_) {
    525     queue_duration = 0;
    526   }
    527 
    528   DeathMap::iterator it = death_map_.find(&births);
    529   DeathData* death_data;
    530   if (it != death_map_.end()) {
    531     death_data = &it->second;
    532   } else {
    533     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
    534     death_data = &death_map_[&births];
    535   }  // Release lock ASAP.
    536   death_data->RecordDeath(queue_duration, run_duration, random_number_);
    537 }
    538 
    539 // static
    540 Births* ThreadData::TallyABirthIfActive(const Location& location) {
    541   if (!TrackingStatus())
    542     return NULL;
    543   ThreadData* current_thread_data = Get();
    544   if (!current_thread_data)
    545     return NULL;
    546   return current_thread_data->TallyABirth(location);
    547 }
    548 
    549 // static
    550 void ThreadData::TallyRunOnNamedThreadIfTracking(
    551     const base::TrackingInfo& completed_task,
    552     const TaskStopwatch& stopwatch) {
    553   // Even if we have been DEACTIVATED, we will process any pending births so
    554   // that our data structures (which counted the outstanding births) remain
    555   // consistent.
    556   const Births* births = completed_task.birth_tally;
    557   if (!births)
    558     return;
    559   ThreadData* current_thread_data = stopwatch.GetThreadData();
    560   if (!current_thread_data)
    561     return;
    562 
    563   // Watch out for a race where status_ is changing, and hence one or both
    564   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
    565   // get a time value since we "weren't tracking" and we were trying to be
    566   // efficient by not calling for a genuine time value.  For simplicity, we'll
    567   // use a default zero duration when we can't calculate a true value.
    568   TrackedTime start_of_run = stopwatch.StartTime();
    569   int32_t queue_duration = 0;
    570   if (!start_of_run.is_null()) {
    571     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
    572         .InMilliseconds();
    573   }
    574   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    575 }
    576 
    577 // static
    578 void ThreadData::TallyRunOnWorkerThreadIfTracking(
    579     const Births* births,
    580     const TrackedTime& time_posted,
    581     const TaskStopwatch& stopwatch) {
    582   // Even if we have been DEACTIVATED, we will process any pending births so
    583   // that our data structures (which counted the outstanding births) remain
    584   // consistent.
    585   if (!births)
    586     return;
    587 
    588   // TODO(jar): Support the option to coalesce all worker-thread activity under
    589   // one ThreadData instance that uses locks to protect *all* access.  This will
    590   // reduce memory (making it provably bounded), but run incrementally slower
    591   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
    592   // that the locks on TallyADeath will be *after* the worker thread has run,
    593   // and hence nothing will be waiting for the completion (...  besides some
    594   // other thread that might like to run).  Also, the worker threads tasks are
    595   // generally longer, and hence the cost of the lock may perchance be amortized
    596   // over the long task's lifetime.
    597   ThreadData* current_thread_data = stopwatch.GetThreadData();
    598   if (!current_thread_data)
    599     return;
    600 
    601   TrackedTime start_of_run = stopwatch.StartTime();
    602   int32_t queue_duration = 0;
    603   if (!start_of_run.is_null()) {
    604     queue_duration = (start_of_run - time_posted).InMilliseconds();
    605   }
    606   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    607 }
    608 
    609 // static
    610 void ThreadData::TallyRunInAScopedRegionIfTracking(
    611     const Births* births,
    612     const TaskStopwatch& stopwatch) {
    613   // Even if we have been DEACTIVATED, we will process any pending births so
    614   // that our data structures (which counted the outstanding births) remain
    615   // consistent.
    616   if (!births)
    617     return;
    618 
    619   ThreadData* current_thread_data = stopwatch.GetThreadData();
    620   if (!current_thread_data)
    621     return;
    622 
    623   int32_t queue_duration = 0;
    624   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    625 }
    626 
    627 void ThreadData::SnapshotExecutedTasks(
    628     int current_profiling_phase,
    629     PhasedProcessDataSnapshotMap* phased_snapshots,
    630     BirthCountMap* birth_counts) {
    631   // Get copy of data, so that the data will not change during the iterations
    632   // and processing.
    633   BirthMap birth_map;
    634   DeathsSnapshot deaths;
    635   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
    636 
    637   for (const auto& birth : birth_map) {
    638     (*birth_counts)[birth.second] += birth.second->birth_count();
    639   }
    640 
    641   for (const auto& death : deaths) {
    642     (*birth_counts)[death.first] -= death.first->birth_count();
    643 
    644     // For the current death data, walk through all its snapshots, starting from
    645     // the current one, then from the previous profiling phase etc., and for
    646     // each snapshot calculate the delta between the snapshot and the previous
    647     // phase, if any.  Store the deltas in the result.
    648     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
    649          phase = phase->prev) {
    650       const DeathDataSnapshot& death_data =
    651           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
    652                       : phase->death_data;
    653 
    654       if (death_data.count > 0) {
    655         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
    656             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
    657                          thread_name()));
    658       }
    659     }
    660   }
    661 }
    662 
    663 // This may be called from another thread.
    664 void ThreadData::SnapshotMaps(int profiling_phase,
    665                               BirthMap* birth_map,
    666                               DeathsSnapshot* deaths) {
    667   base::AutoLock lock(map_lock_);
    668 
    669   for (const auto& birth : birth_map_)
    670     (*birth_map)[birth.first] = birth.second;
    671 
    672   for (const auto& death : death_map_) {
    673     deaths->push_back(std::make_pair(
    674         death.first,
    675         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
    676                                death.second.run_duration_sum(),
    677                                death.second.run_duration_max(),
    678                                death.second.run_duration_sample(),
    679                                death.second.queue_duration_sum(),
    680                                death.second.queue_duration_max(),
    681                                death.second.queue_duration_sample(),
    682                                death.second.last_phase_snapshot())));
    683   }
    684 }
    685 
    686 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
    687   base::AutoLock lock(map_lock_);
    688 
    689   for (auto& death : death_map_) {
    690     death.second.OnProfilingPhaseCompleted(profiling_phase);
    691   }
    692 }
    693 
    694 static void OptionallyInitializeAlternateTimer() {
    695   NowFunction* alternate_time_source = GetAlternateTimeSource();
    696   if (alternate_time_source)
    697     ThreadData::SetAlternateTimeSource(alternate_time_source);
    698 }
    699 
    700 void ThreadData::Initialize() {
    701   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
    702     return;  // Someone else did the initialization.
    703   // Due to racy lazy initialization in tests, we'll need to recheck status_
    704   // after we acquire the lock.
    705 
    706   // Ensure that we don't double initialize tls.  We are called when single
    707   // threaded in the product, but some tests may be racy and lazy about our
    708   // initialization.
    709   base::AutoLock lock(*list_lock_.Pointer());
    710   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
    711     return;  // Someone raced in here and beat us.
    712 
    713   // Put an alternate timer in place if the environment calls for it, such as
    714   // for tracking TCMalloc allocations.  This insertion is idempotent, so we
    715   // don't mind if there is a race, and we'd prefer not to be in a lock while
    716   // doing this work.
    717   if (kAllowAlternateTimeSourceHandling)
    718     OptionallyInitializeAlternateTimer();
    719 
    720   // Perform the "real" TLS initialization now, and leave it intact through
    721   // process termination.
    722   if (!tls_index_.initialized()) {  // Testing may have initialized this.
    723     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
    724     tls_index_.Initialize(&ThreadData::OnThreadTermination);
    725     DCHECK(tls_index_.initialized());
    726   } else {
    727     // TLS was initialzed for us earlier.
    728     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
    729   }
    730 
    731   // Incarnation counter is only significant to testing, as it otherwise will
    732   // never again change in this process.
    733   ++incarnation_counter_;
    734 
    735   // The lock is not critical for setting status_, but it doesn't hurt.  It also
    736   // ensures that if we have a racy initialization, that we'll bail as soon as
    737   // we get the lock earlier in this method.
    738   base::subtle::Release_Store(&status_, kInitialStartupState);
    739   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
    740 }
    741 
    742 // static
    743 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
    744   DCHECK_GE(status, DEACTIVATED);
    745   DCHECK_LE(status, PROFILING_ACTIVE);
    746 
    747   Initialize();  // No-op if already initialized.
    748 
    749   if (status > DEACTIVATED)
    750     status = PROFILING_ACTIVE;
    751   base::subtle::Release_Store(&status_, status);
    752 }
    753 
    754 // static
    755 ThreadData::Status ThreadData::status() {
    756   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
    757 }
    758 
    759 // static
    760 bool ThreadData::TrackingStatus() {
    761   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
    762 }
    763 
    764 // static
    765 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
    766   DCHECK(now_function);
    767   if (kAllowAlternateTimeSourceHandling)
    768     now_function_ = now_function;
    769 }
    770 
    771 // static
    772 void ThreadData::EnableProfilerTiming() {
    773   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
    774 }
    775 
    776 // static
    777 TrackedTime ThreadData::Now() {
    778   if (kAllowAlternateTimeSourceHandling && now_function_)
    779     return TrackedTime::FromMilliseconds((*now_function_)());
    780   if (IsProfilerTimingEnabled() && TrackingStatus())
    781     return TrackedTime::Now();
    782   return TrackedTime();  // Super fast when disabled, or not compiled.
    783 }
    784 
    785 // static
    786 void ThreadData::EnsureCleanupWasCalled(
    787     int /* major_threads_shutdown_count */) {
    788   base::AutoLock lock(*list_lock_.Pointer());
    789   if (worker_thread_data_creation_count_ == 0)
    790     return;  // We haven't really run much, and couldn't have leaked.
    791 
    792   // TODO(jar): until this is working on XP, don't run the real test.
    793 #if 0
    794   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
    795   // caller should tell us how many thread shutdowns should have taken place by
    796   // now.
    797   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
    798 #endif
    799 }
    800 
    801 // static
    802 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
    803   // This is only called from test code, where we need to cleanup so that
    804   // additional tests can be run.
    805   // We must be single threaded... but be careful anyway.
    806   InitializeAndSetTrackingStatus(DEACTIVATED);
    807 
    808   ThreadData* thread_data_list;
    809   {
    810     base::AutoLock lock(*list_lock_.Pointer());
    811     thread_data_list = all_thread_data_list_head_;
    812     all_thread_data_list_head_ = NULL;
    813     ++incarnation_counter_;
    814     // To be clean, break apart the retired worker list (though we leak them).
    815     while (first_retired_worker_) {
    816       ThreadData* worker = first_retired_worker_;
    817       CHECK_GT(worker->worker_thread_number_, 0);
    818       first_retired_worker_ = worker->next_retired_worker_;
    819       worker->next_retired_worker_ = NULL;
    820     }
    821   }
    822 
    823   // Put most global static back in pristine shape.
    824   worker_thread_data_creation_count_ = 0;
    825   cleanup_count_ = 0;
    826   tls_index_.Set(NULL);
    827   // Almost UNINITIALIZED.
    828   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
    829 
    830   // To avoid any chance of racing in unit tests, which is the only place we
    831   // call this function, we may sometimes leak all the data structures we
    832   // recovered, as they may still be in use on threads from prior tests!
    833   if (leak) {
    834     ThreadData* thread_data = thread_data_list;
    835     while (thread_data) {
    836       thread_data = thread_data->next();
    837     }
    838     return;
    839   }
    840 
    841   // When we want to cleanup (on a single thread), here is what we do.
    842 
    843   // Do actual recursive delete in all ThreadData instances.
    844   while (thread_data_list) {
    845     ThreadData* next_thread_data = thread_data_list;
    846     thread_data_list = thread_data_list->next();
    847 
    848     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
    849          next_thread_data->birth_map_.end() != it; ++it)
    850       delete it->second;  // Delete the Birth Records.
    851     delete next_thread_data;  // Includes all Death Records.
    852   }
    853 }
    854 
    855 //------------------------------------------------------------------------------
    856 TaskStopwatch::TaskStopwatch()
    857     : wallclock_duration_ms_(0),
    858       current_thread_data_(NULL),
    859       excluded_duration_ms_(0),
    860       parent_(NULL) {
    861 #if DCHECK_IS_ON()
    862   state_ = CREATED;
    863   child_ = NULL;
    864 #endif
    865 }
    866 
    867 TaskStopwatch::~TaskStopwatch() {
    868 #if DCHECK_IS_ON()
    869   DCHECK(state_ != RUNNING);
    870   DCHECK(child_ == NULL);
    871 #endif
    872 }
    873 
    874 void TaskStopwatch::Start() {
    875 #if DCHECK_IS_ON()
    876   DCHECK(state_ == CREATED);
    877   state_ = RUNNING;
    878 #endif
    879 
    880   start_time_ = ThreadData::Now();
    881 
    882   current_thread_data_ = ThreadData::Get();
    883   if (!current_thread_data_)
    884     return;
    885 
    886   parent_ = current_thread_data_->current_stopwatch_;
    887 #if DCHECK_IS_ON()
    888   if (parent_) {
    889     DCHECK(parent_->state_ == RUNNING);
    890     DCHECK(parent_->child_ == NULL);
    891     parent_->child_ = this;
    892   }
    893 #endif
    894   current_thread_data_->current_stopwatch_ = this;
    895 }
    896 
    897 void TaskStopwatch::Stop() {
    898   const TrackedTime end_time = ThreadData::Now();
    899 #if DCHECK_IS_ON()
    900   DCHECK(state_ == RUNNING);
    901   state_ = STOPPED;
    902   DCHECK(child_ == NULL);
    903 #endif
    904 
    905   if (!start_time_.is_null() && !end_time.is_null()) {
    906     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
    907   }
    908 
    909   if (!current_thread_data_)
    910     return;
    911 
    912   DCHECK(current_thread_data_->current_stopwatch_ == this);
    913   current_thread_data_->current_stopwatch_ = parent_;
    914   if (!parent_)
    915     return;
    916 
    917 #if DCHECK_IS_ON()
    918   DCHECK(parent_->state_ == RUNNING);
    919   DCHECK(parent_->child_ == this);
    920   parent_->child_ = NULL;
    921 #endif
    922   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
    923   parent_ = NULL;
    924 }
    925 
    926 TrackedTime TaskStopwatch::StartTime() const {
    927 #if DCHECK_IS_ON()
    928   DCHECK(state_ != CREATED);
    929 #endif
    930 
    931   return start_time_;
    932 }
    933 
    934 int32_t TaskStopwatch::RunDurationMs() const {
    935 #if DCHECK_IS_ON()
    936   DCHECK(state_ == STOPPED);
    937 #endif
    938 
    939   return wallclock_duration_ms_ - excluded_duration_ms_;
    940 }
    941 
    942 ThreadData* TaskStopwatch::GetThreadData() const {
    943 #if DCHECK_IS_ON()
    944   DCHECK(state_ != CREATED);
    945 #endif
    946 
    947   return current_thread_data_;
    948 }
    949 
    950 //------------------------------------------------------------------------------
    951 // DeathDataPhaseSnapshot
    952 
    953 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
    954     int profiling_phase,
    955     int count,
    956     int32_t run_duration_sum,
    957     int32_t run_duration_max,
    958     int32_t run_duration_sample,
    959     int32_t queue_duration_sum,
    960     int32_t queue_duration_max,
    961     int32_t queue_duration_sample,
    962     const DeathDataPhaseSnapshot* prev)
    963     : profiling_phase(profiling_phase),
    964       death_data(count,
    965                  run_duration_sum,
    966                  run_duration_max,
    967                  run_duration_sample,
    968                  queue_duration_sum,
    969                  queue_duration_max,
    970                  queue_duration_sample),
    971       prev(prev) {}
    972 
    973 //------------------------------------------------------------------------------
    974 // TaskSnapshot
    975 
    976 TaskSnapshot::TaskSnapshot() {
    977 }
    978 
    979 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
    980                            const DeathDataSnapshot& death_data,
    981                            const std::string& death_thread_name)
    982     : birth(birth),
    983       death_data(death_data),
    984       death_thread_name(death_thread_name) {
    985 }
    986 
    987 TaskSnapshot::~TaskSnapshot() {
    988 }
    989 
    990 //------------------------------------------------------------------------------
    991 // ProcessDataPhaseSnapshot
    992 
    993 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
    994 }
    995 
    996 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
    997 }
    998 
    999 //------------------------------------------------------------------------------
   1000 // ProcessDataPhaseSnapshot
   1001 
   1002 ProcessDataSnapshot::ProcessDataSnapshot()
   1003 #if !defined(OS_NACL)
   1004     : process_id(base::GetCurrentProcId()) {
   1005 #else
   1006     : process_id(base::kNullProcessId) {
   1007 #endif
   1008 }
   1009 
   1010 ProcessDataSnapshot::~ProcessDataSnapshot() {
   1011 }
   1012 
   1013 }  // namespace tracked_objects
   1014