Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/tracked_objects.h"
      6 
      7 #include <limits.h>
      8 #include <stdlib.h>
      9 
     10 #include "base/atomicops.h"
     11 #include "base/base_switches.h"
     12 #include "base/command_line.h"
     13 #include "base/compiler_specific.h"
     14 #include "base/debug/leak_annotations.h"
     15 #include "base/logging.h"
     16 #include "base/process/process_handle.h"
     17 #include "base/strings/stringprintf.h"
     18 #include "base/third_party/valgrind/memcheck.h"
     19 #include "base/threading/worker_pool.h"
     20 #include "base/tracking_info.h"
     21 #include "build/build_config.h"
     22 
     23 using base::TimeDelta;
     24 
     25 namespace base {
     26 class TimeDelta;
     27 }
     28 
     29 namespace tracked_objects {
     30 
     31 namespace {
     32 // When ThreadData is first initialized, should we start in an ACTIVE state to
     33 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
     34 // that we only record after parsing the command line flag --enable-tracking.
     35 // Note that the flag may force either state, so this really controls only the
     36 // period of time up until that flag is parsed.  If there is no flag seen, then
     37 // this state may prevail for much or all of the process lifetime.
     38 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
     39 
     40 // Possible states of the profiler timing enabledness.
     41 enum {
     42   UNDEFINED_TIMING,
     43   ENABLED_TIMING,
     44   DISABLED_TIMING,
     45 };
     46 
     47 // State of the profiler timing enabledness.
     48 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
     49 
     50 // Returns whether profiler timing is enabled.  The default is true, but this
     51 // may be overridden by a command-line flag.  Some platforms may
     52 // programmatically set this command-line flag to the "off" value if it's not
     53 // specified.
     54 // This in turn can be overridden by explicitly calling
     55 // ThreadData::EnableProfilerTiming, say, based on a field trial.
     56 inline bool IsProfilerTimingEnabled() {
     57   // Reading |g_profiler_timing_enabled| is done without barrier because
     58   // multiple initialization is not an issue while the barrier can be relatively
     59   // costly given that this method is sometimes called in a tight loop.
     60   base::subtle::Atomic32 current_timing_enabled =
     61       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
     62   if (current_timing_enabled == UNDEFINED_TIMING) {
     63     if (!base::CommandLine::InitializedForCurrentProcess())
     64       return true;
     65     current_timing_enabled =
     66         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
     67              switches::kProfilerTiming) ==
     68          switches::kProfilerTimingDisabledValue)
     69             ? DISABLED_TIMING
     70             : ENABLED_TIMING;
     71     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
     72                                   current_timing_enabled);
     73   }
     74   return current_timing_enabled == ENABLED_TIMING;
     75 }
     76 
     77 }  // namespace
     78 
     79 //------------------------------------------------------------------------------
     80 // DeathData tallies durations when a death takes place.
     81 
     82 DeathData::DeathData()
     83     : count_(0),
     84       sample_probability_count_(0),
     85       run_duration_sum_(0),
     86       queue_duration_sum_(0),
     87       run_duration_max_(0),
     88       queue_duration_max_(0),
     89       run_duration_sample_(0),
     90       queue_duration_sample_(0),
     91       last_phase_snapshot_(nullptr) {
     92 }
     93 
     94 DeathData::DeathData(const DeathData& other)
     95     : count_(other.count_),
     96       sample_probability_count_(other.sample_probability_count_),
     97       run_duration_sum_(other.run_duration_sum_),
     98       queue_duration_sum_(other.queue_duration_sum_),
     99       run_duration_max_(other.run_duration_max_),
    100       queue_duration_max_(other.queue_duration_max_),
    101       run_duration_sample_(other.run_duration_sample_),
    102       queue_duration_sample_(other.queue_duration_sample_),
    103       last_phase_snapshot_(nullptr) {
    104   // This constructor will be used by std::map when adding new DeathData values
    105   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
    106   // need to worry about ownership transfer.
    107   DCHECK(other.last_phase_snapshot_ == nullptr);
    108 }
    109 
    110 DeathData::~DeathData() {
    111   while (last_phase_snapshot_) {
    112     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
    113     last_phase_snapshot_ = snapshot->prev;
    114     delete snapshot;
    115   }
    116 }
    117 
    118 // TODO(jar): I need to see if this macro to optimize branching is worth using.
    119 //
    120 // This macro has no branching, so it is surely fast, and is equivalent to:
    121 //             if (assign_it)
    122 //               target = source;
    123 // We use a macro rather than a template to force this to inline.
    124 // Related code for calculating max is discussed on the web.
    125 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
    126   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
    127 
    128 void DeathData::RecordDeath(const int32_t queue_duration,
    129                             const int32_t run_duration,
    130                             const uint32_t random_number) {
    131   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
    132   if (count_ < INT_MAX)
    133     base::subtle::NoBarrier_Store(&count_, count_ + 1);
    134 
    135   int sample_probability_count =
    136       base::subtle::NoBarrier_Load(&sample_probability_count_);
    137   if (sample_probability_count < INT_MAX)
    138     ++sample_probability_count;
    139   base::subtle::NoBarrier_Store(&sample_probability_count_,
    140                                 sample_probability_count);
    141 
    142   base::subtle::NoBarrier_Store(&queue_duration_sum_,
    143                                 queue_duration_sum_ + queue_duration);
    144   base::subtle::NoBarrier_Store(&run_duration_sum_,
    145                                 run_duration_sum_ + run_duration);
    146 
    147   if (queue_duration_max() < queue_duration)
    148     base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
    149   if (run_duration_max() < run_duration)
    150     base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
    151 
    152   // Take a uniformly distributed sample over all durations ever supplied during
    153   // the current profiling phase.
    154   // The probability that we (instead) use this new sample is
    155   // 1/sample_probability_count_. This results in a completely uniform selection
    156   // of the sample (at least when we don't clamp sample_probability_count_...
    157   // but that should be inconsequentially likely).  We ignore the fact that we
    158   // correlated our selection of a sample to the run and queue times (i.e., we
    159   // used them to generate random_number).
    160   CHECK_GT(sample_probability_count, 0);
    161   if (0 == (random_number % sample_probability_count)) {
    162     base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
    163     base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
    164   }
    165 }
    166 
    167 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
    168   // Snapshotting and storing current state.
    169   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
    170       profiling_phase, count(), run_duration_sum(), run_duration_max(),
    171       run_duration_sample(), queue_duration_sum(), queue_duration_max(),
    172       queue_duration_sample(), last_phase_snapshot_);
    173 
    174   // Not touching fields for which a delta can be computed by comparing with a
    175   // snapshot from the previous phase. Resetting other fields.  Sample values
    176   // will be reset upon next death recording because sample_probability_count_
    177   // is set to 0.
    178   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
    179   // is that for incrementable fields, resetting to 0 from the snapshot thread
    180   // potentially in parallel with incrementing in the death thread may result in
    181   // significant data corruption that has a potential to grow with time.  Not
    182   // resetting incrementable fields and using deltas will cause any
    183   // off-by-little corruptions to be likely fixed at the next snapshot.
    184   // The max values are not incrementable, and cannot be deduced using deltas
    185   // for a given phase. Hence, we have to reset them to 0.  But the potential
    186   // damage is limited to getting the previous phase's max to apply for the next
    187   // phase, and the error doesn't have a potential to keep growing with new
    188   // resets.
    189   // sample_probability_count_ is incrementable, but must be reset to 0 at the
    190   // phase end, so that we start a new uniformly randomized sample selection
    191   // after the reset. These fields are updated using atomics. However, race
    192   // conditions are possible since these are updated individually and not
    193   // together atomically, resulting in the values being mutually inconsistent.
    194   // The damage is limited to selecting a wrong sample, which is not something
    195   // that can cause accumulating or cascading effects.
    196   // If there were no inconsistencies caused by race conditions, we never send a
    197   // sample for the previous phase in the next phase's snapshot because
    198   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
    199   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
    200   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
    201   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
    202 }
    203 
    204 //------------------------------------------------------------------------------
    205 DeathDataSnapshot::DeathDataSnapshot()
    206     : count(-1),
    207       run_duration_sum(-1),
    208       run_duration_max(-1),
    209       run_duration_sample(-1),
    210       queue_duration_sum(-1),
    211       queue_duration_max(-1),
    212       queue_duration_sample(-1) {
    213 }
    214 
    215 DeathDataSnapshot::DeathDataSnapshot(int count,
    216                                      int32_t run_duration_sum,
    217                                      int32_t run_duration_max,
    218                                      int32_t run_duration_sample,
    219                                      int32_t queue_duration_sum,
    220                                      int32_t queue_duration_max,
    221                                      int32_t queue_duration_sample)
    222     : count(count),
    223       run_duration_sum(run_duration_sum),
    224       run_duration_max(run_duration_max),
    225       run_duration_sample(run_duration_sample),
    226       queue_duration_sum(queue_duration_sum),
    227       queue_duration_max(queue_duration_max),
    228       queue_duration_sample(queue_duration_sample) {}
    229 
    230 DeathDataSnapshot::~DeathDataSnapshot() {
    231 }
    232 
    233 DeathDataSnapshot DeathDataSnapshot::Delta(
    234     const DeathDataSnapshot& older) const {
    235   return DeathDataSnapshot(count - older.count,
    236                            run_duration_sum - older.run_duration_sum,
    237                            run_duration_max, run_duration_sample,
    238                            queue_duration_sum - older.queue_duration_sum,
    239                            queue_duration_max, queue_duration_sample);
    240 }
    241 
    242 //------------------------------------------------------------------------------
    243 BirthOnThread::BirthOnThread(const Location& location,
    244                              const ThreadData& current)
    245     : location_(location),
    246       birth_thread_(&current) {
    247 }
    248 
    249 //------------------------------------------------------------------------------
    250 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
    251 }
    252 
    253 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
    254     : location(birth.location()),
    255       thread_name(birth.birth_thread()->thread_name()) {
    256 }
    257 
    258 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
    259 }
    260 
    261 //------------------------------------------------------------------------------
    262 Births::Births(const Location& location, const ThreadData& current)
    263     : BirthOnThread(location, current),
    264       birth_count_(1) { }
    265 
    266 int Births::birth_count() const { return birth_count_; }
    267 
    268 void Births::RecordBirth() { ++birth_count_; }
    269 
    270 //------------------------------------------------------------------------------
    271 // ThreadData maintains the central data for all births and deaths on a single
    272 // thread.
    273 
    274 // TODO(jar): We should pull all these static vars together, into a struct, and
    275 // optimize layout so that we benefit from locality of reference during accesses
    276 // to them.
    277 
    278 // static
    279 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
    280 
    281 // A TLS slot which points to the ThreadData instance for the current thread.
    282 // We do a fake initialization here (zeroing out data), and then the real
    283 // in-place construction happens when we call tls_index_.Initialize().
    284 // static
    285 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
    286 
    287 // static
    288 int ThreadData::worker_thread_data_creation_count_ = 0;
    289 
    290 // static
    291 int ThreadData::cleanup_count_ = 0;
    292 
    293 // static
    294 int ThreadData::incarnation_counter_ = 0;
    295 
    296 // static
    297 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
    298 
    299 // static
    300 ThreadData* ThreadData::first_retired_worker_ = NULL;
    301 
    302 // static
    303 base::LazyInstance<base::Lock>::Leaky
    304     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
    305 
    306 // static
    307 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
    308 
    309 ThreadData::ThreadData(const std::string& suggested_name)
    310     : next_(NULL),
    311       next_retired_worker_(NULL),
    312       worker_thread_number_(0),
    313       incarnation_count_for_pool_(-1),
    314       current_stopwatch_(NULL) {
    315   DCHECK_GE(suggested_name.size(), 0u);
    316   thread_name_ = suggested_name;
    317   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
    318 }
    319 
    320 ThreadData::ThreadData(int thread_number)
    321     : next_(NULL),
    322       next_retired_worker_(NULL),
    323       worker_thread_number_(thread_number),
    324       incarnation_count_for_pool_(-1),
    325       current_stopwatch_(NULL) {
    326   CHECK_GT(thread_number, 0);
    327   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
    328   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
    329 }
    330 
    331 ThreadData::~ThreadData() {
    332 }
    333 
    334 void ThreadData::PushToHeadOfList() {
    335   // Toss in a hint of randomness (atop the uniniitalized value).
    336   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
    337                                                  sizeof(random_number_));
    338   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
    339   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
    340   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
    341 
    342   DCHECK(!next_);
    343   base::AutoLock lock(*list_lock_.Pointer());
    344   incarnation_count_for_pool_ = incarnation_counter_;
    345   next_ = all_thread_data_list_head_;
    346   all_thread_data_list_head_ = this;
    347 }
    348 
    349 // static
    350 ThreadData* ThreadData::first() {
    351   base::AutoLock lock(*list_lock_.Pointer());
    352   return all_thread_data_list_head_;
    353 }
    354 
    355 ThreadData* ThreadData::next() const { return next_; }
    356 
    357 // static
    358 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
    359   if (base::WorkerPool::RunsTasksOnCurrentThread())
    360     return;
    361   EnsureTlsInitialization();
    362   ThreadData* current_thread_data =
    363       reinterpret_cast<ThreadData*>(tls_index_.Get());
    364   if (current_thread_data)
    365     return;  // Browser tests instigate this.
    366   current_thread_data = new ThreadData(suggested_name);
    367   tls_index_.Set(current_thread_data);
    368 }
    369 
    370 // static
    371 ThreadData* ThreadData::Get() {
    372   if (!tls_index_.initialized())
    373     return NULL;  // For unittests only.
    374   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
    375   if (registered)
    376     return registered;
    377 
    378   // We must be a worker thread, since we didn't pre-register.
    379   ThreadData* worker_thread_data = NULL;
    380   int worker_thread_number = 0;
    381   {
    382     base::AutoLock lock(*list_lock_.Pointer());
    383     if (first_retired_worker_) {
    384       worker_thread_data = first_retired_worker_;
    385       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
    386       worker_thread_data->next_retired_worker_ = NULL;
    387     } else {
    388       worker_thread_number = ++worker_thread_data_creation_count_;
    389     }
    390   }
    391 
    392   // If we can't find a previously used instance, then we have to create one.
    393   if (!worker_thread_data) {
    394     DCHECK_GT(worker_thread_number, 0);
    395     worker_thread_data = new ThreadData(worker_thread_number);
    396   }
    397   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
    398 
    399   tls_index_.Set(worker_thread_data);
    400   return worker_thread_data;
    401 }
    402 
    403 // static
    404 void ThreadData::OnThreadTermination(void* thread_data) {
    405   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
    406   // We must NOT do any allocations during this callback.  There is a chance
    407   // that the allocator is no longer active on this thread.
    408   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
    409 }
    410 
    411 void ThreadData::OnThreadTerminationCleanup() {
    412   // The list_lock_ was created when we registered the callback, so it won't be
    413   // allocated here despite the lazy reference.
    414   base::AutoLock lock(*list_lock_.Pointer());
    415   if (incarnation_counter_ != incarnation_count_for_pool_)
    416     return;  // ThreadData was constructed in an earlier unit test.
    417   ++cleanup_count_;
    418   // Only worker threads need to be retired and reused.
    419   if (!worker_thread_number_) {
    420     return;
    421   }
    422   // We must NOT do any allocations during this callback.
    423   // Using the simple linked lists avoids all allocations.
    424   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
    425   this->next_retired_worker_ = first_retired_worker_;
    426   first_retired_worker_ = this;
    427 }
    428 
    429 // static
    430 void ThreadData::Snapshot(int current_profiling_phase,
    431                           ProcessDataSnapshot* process_data_snapshot) {
    432   // Get an unchanging copy of a ThreadData list.
    433   ThreadData* my_list = ThreadData::first();
    434 
    435   // Gather data serially.
    436   // This hackish approach *can* get some slightly corrupt tallies, as we are
    437   // grabbing values without the protection of a lock, but it has the advantage
    438   // of working even with threads that don't have message loops.  If a user
    439   // sees any strangeness, they can always just run their stats gathering a
    440   // second time.
    441   BirthCountMap birth_counts;
    442   for (ThreadData* thread_data = my_list; thread_data;
    443        thread_data = thread_data->next()) {
    444     thread_data->SnapshotExecutedTasks(current_profiling_phase,
    445                                        &process_data_snapshot->phased_snapshots,
    446                                        &birth_counts);
    447   }
    448 
    449   // Add births that are still active -- i.e. objects that have tallied a birth,
    450   // but have not yet tallied a matching death, and hence must be either
    451   // running, queued up, or being held in limbo for future posting.
    452   auto* current_phase_tasks =
    453       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
    454   for (const auto& birth_count : birth_counts) {
    455     if (birth_count.second > 0) {
    456       current_phase_tasks->push_back(
    457           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
    458                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
    459                        "Still_Alive"));
    460     }
    461   }
    462 }
    463 
    464 // static
    465 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
    466   // Get an unchanging copy of a ThreadData list.
    467   ThreadData* my_list = ThreadData::first();
    468 
    469   // Add snapshots for all instances of death data in all threads serially.
    470   // This hackish approach *can* get some slightly corrupt tallies, as we are
    471   // grabbing values without the protection of a lock, but it has the advantage
    472   // of working even with threads that don't have message loops.  Any corruption
    473   // shouldn't cause "cascading damage" to anything else (in later phases).
    474   for (ThreadData* thread_data = my_list; thread_data;
    475        thread_data = thread_data->next()) {
    476     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
    477   }
    478 }
    479 
    480 Births* ThreadData::TallyABirth(const Location& location) {
    481   BirthMap::iterator it = birth_map_.find(location);
    482   Births* child;
    483   if (it != birth_map_.end()) {
    484     child =  it->second;
    485     child->RecordBirth();
    486   } else {
    487     child = new Births(location, *this);  // Leak this.
    488     // Lock since the map may get relocated now, and other threads sometimes
    489     // snapshot it (but they lock before copying it).
    490     base::AutoLock lock(map_lock_);
    491     birth_map_[location] = child;
    492   }
    493 
    494   return child;
    495 }
    496 
    497 void ThreadData::TallyADeath(const Births& births,
    498                              int32_t queue_duration,
    499                              const TaskStopwatch& stopwatch) {
    500   int32_t run_duration = stopwatch.RunDurationMs();
    501 
    502   // Stir in some randomness, plus add constant in case durations are zero.
    503   const uint32_t kSomePrimeNumber = 2147483647;
    504   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
    505   // An address is going to have some randomness to it as well ;-).
    506   random_number_ ^=
    507       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
    508 
    509   DeathMap::iterator it = death_map_.find(&births);
    510   DeathData* death_data;
    511   if (it != death_map_.end()) {
    512     death_data = &it->second;
    513   } else {
    514     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
    515     death_data = &death_map_[&births];
    516   }  // Release lock ASAP.
    517   death_data->RecordDeath(queue_duration, run_duration, random_number_);
    518 }
    519 
    520 // static
    521 Births* ThreadData::TallyABirthIfActive(const Location& location) {
    522   if (!TrackingStatus())
    523     return NULL;
    524   ThreadData* current_thread_data = Get();
    525   if (!current_thread_data)
    526     return NULL;
    527   return current_thread_data->TallyABirth(location);
    528 }
    529 
    530 // static
    531 void ThreadData::TallyRunOnNamedThreadIfTracking(
    532     const base::TrackingInfo& completed_task,
    533     const TaskStopwatch& stopwatch) {
    534   // Even if we have been DEACTIVATED, we will process any pending births so
    535   // that our data structures (which counted the outstanding births) remain
    536   // consistent.
    537   const Births* births = completed_task.birth_tally;
    538   if (!births)
    539     return;
    540   ThreadData* current_thread_data = stopwatch.GetThreadData();
    541   if (!current_thread_data)
    542     return;
    543 
    544   // Watch out for a race where status_ is changing, and hence one or both
    545   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
    546   // get a time value since we "weren't tracking" and we were trying to be
    547   // efficient by not calling for a genuine time value.  For simplicity, we'll
    548   // use a default zero duration when we can't calculate a true value.
    549   TrackedTime start_of_run = stopwatch.StartTime();
    550   int32_t queue_duration = 0;
    551   if (!start_of_run.is_null()) {
    552     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
    553         .InMilliseconds();
    554   }
    555   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    556 }
    557 
    558 // static
    559 void ThreadData::TallyRunOnWorkerThreadIfTracking(
    560     const Births* births,
    561     const TrackedTime& time_posted,
    562     const TaskStopwatch& stopwatch) {
    563   // Even if we have been DEACTIVATED, we will process any pending births so
    564   // that our data structures (which counted the outstanding births) remain
    565   // consistent.
    566   if (!births)
    567     return;
    568 
    569   // TODO(jar): Support the option to coalesce all worker-thread activity under
    570   // one ThreadData instance that uses locks to protect *all* access.  This will
    571   // reduce memory (making it provably bounded), but run incrementally slower
    572   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
    573   // that the locks on TallyADeath will be *after* the worker thread has run,
    574   // and hence nothing will be waiting for the completion (...  besides some
    575   // other thread that might like to run).  Also, the worker threads tasks are
    576   // generally longer, and hence the cost of the lock may perchance be amortized
    577   // over the long task's lifetime.
    578   ThreadData* current_thread_data = stopwatch.GetThreadData();
    579   if (!current_thread_data)
    580     return;
    581 
    582   TrackedTime start_of_run = stopwatch.StartTime();
    583   int32_t queue_duration = 0;
    584   if (!start_of_run.is_null()) {
    585     queue_duration = (start_of_run - time_posted).InMilliseconds();
    586   }
    587   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    588 }
    589 
    590 // static
    591 void ThreadData::TallyRunInAScopedRegionIfTracking(
    592     const Births* births,
    593     const TaskStopwatch& stopwatch) {
    594   // Even if we have been DEACTIVATED, we will process any pending births so
    595   // that our data structures (which counted the outstanding births) remain
    596   // consistent.
    597   if (!births)
    598     return;
    599 
    600   ThreadData* current_thread_data = stopwatch.GetThreadData();
    601   if (!current_thread_data)
    602     return;
    603 
    604   int32_t queue_duration = 0;
    605   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
    606 }
    607 
    608 void ThreadData::SnapshotExecutedTasks(
    609     int current_profiling_phase,
    610     PhasedProcessDataSnapshotMap* phased_snapshots,
    611     BirthCountMap* birth_counts) {
    612   // Get copy of data, so that the data will not change during the iterations
    613   // and processing.
    614   BirthMap birth_map;
    615   DeathsSnapshot deaths;
    616   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
    617 
    618   for (const auto& birth : birth_map) {
    619     (*birth_counts)[birth.second] += birth.second->birth_count();
    620   }
    621 
    622   for (const auto& death : deaths) {
    623     (*birth_counts)[death.first] -= death.first->birth_count();
    624 
    625     // For the current death data, walk through all its snapshots, starting from
    626     // the current one, then from the previous profiling phase etc., and for
    627     // each snapshot calculate the delta between the snapshot and the previous
    628     // phase, if any.  Store the deltas in the result.
    629     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
    630          phase = phase->prev) {
    631       const DeathDataSnapshot& death_data =
    632           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
    633                       : phase->death_data;
    634 
    635       if (death_data.count > 0) {
    636         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
    637             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
    638                          thread_name()));
    639       }
    640     }
    641   }
    642 }
    643 
    644 // This may be called from another thread.
    645 void ThreadData::SnapshotMaps(int profiling_phase,
    646                               BirthMap* birth_map,
    647                               DeathsSnapshot* deaths) {
    648   base::AutoLock lock(map_lock_);
    649 
    650   for (const auto& birth : birth_map_)
    651     (*birth_map)[birth.first] = birth.second;
    652 
    653   for (const auto& death : death_map_) {
    654     deaths->push_back(std::make_pair(
    655         death.first,
    656         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
    657                                death.second.run_duration_sum(),
    658                                death.second.run_duration_max(),
    659                                death.second.run_duration_sample(),
    660                                death.second.queue_duration_sum(),
    661                                death.second.queue_duration_max(),
    662                                death.second.queue_duration_sample(),
    663                                death.second.last_phase_snapshot())));
    664   }
    665 }
    666 
    667 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
    668   base::AutoLock lock(map_lock_);
    669 
    670   for (auto& death : death_map_) {
    671     death.second.OnProfilingPhaseCompleted(profiling_phase);
    672   }
    673 }
    674 
    675 void ThreadData::EnsureTlsInitialization() {
    676   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
    677     return;  // Someone else did the initialization.
    678   // Due to racy lazy initialization in tests, we'll need to recheck status_
    679   // after we acquire the lock.
    680 
    681   // Ensure that we don't double initialize tls.  We are called when single
    682   // threaded in the product, but some tests may be racy and lazy about our
    683   // initialization.
    684   base::AutoLock lock(*list_lock_.Pointer());
    685   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
    686     return;  // Someone raced in here and beat us.
    687 
    688   // Perform the "real" TLS initialization now, and leave it intact through
    689   // process termination.
    690   if (!tls_index_.initialized()) {  // Testing may have initialized this.
    691     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
    692     tls_index_.Initialize(&ThreadData::OnThreadTermination);
    693     DCHECK(tls_index_.initialized());
    694   } else {
    695     // TLS was initialzed for us earlier.
    696     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
    697   }
    698 
    699   // Incarnation counter is only significant to testing, as it otherwise will
    700   // never again change in this process.
    701   ++incarnation_counter_;
    702 
    703   // The lock is not critical for setting status_, but it doesn't hurt.  It also
    704   // ensures that if we have a racy initialization, that we'll bail as soon as
    705   // we get the lock earlier in this method.
    706   base::subtle::Release_Store(&status_, kInitialStartupState);
    707   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
    708 }
    709 
    710 // static
    711 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
    712   DCHECK_GE(status, DEACTIVATED);
    713   DCHECK_LE(status, PROFILING_ACTIVE);
    714 
    715   EnsureTlsInitialization();  // No-op if already initialized.
    716 
    717   if (status > DEACTIVATED)
    718     status = PROFILING_ACTIVE;
    719   base::subtle::Release_Store(&status_, status);
    720 }
    721 
    722 // static
    723 ThreadData::Status ThreadData::status() {
    724   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
    725 }
    726 
    727 // static
    728 bool ThreadData::TrackingStatus() {
    729   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
    730 }
    731 
    732 // static
    733 void ThreadData::EnableProfilerTiming() {
    734   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
    735 }
    736 
    737 // static
    738 TrackedTime ThreadData::Now() {
    739   if (now_function_for_testing_)
    740     return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
    741   if (IsProfilerTimingEnabled() && TrackingStatus())
    742     return TrackedTime::Now();
    743   return TrackedTime();  // Super fast when disabled, or not compiled.
    744 }
    745 
    746 // static
    747 void ThreadData::EnsureCleanupWasCalled(int /*major_threads_shutdown_count*/) {
    748   base::AutoLock lock(*list_lock_.Pointer());
    749   if (worker_thread_data_creation_count_ == 0)
    750     return;  // We haven't really run much, and couldn't have leaked.
    751 
    752   // TODO(jar): until this is working on XP, don't run the real test.
    753 #if 0
    754   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
    755   // caller should tell us how many thread shutdowns should have taken place by
    756   // now.
    757   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
    758 #endif
    759 }
    760 
    761 // static
    762 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
    763   // This is only called from test code, where we need to cleanup so that
    764   // additional tests can be run.
    765   // We must be single threaded... but be careful anyway.
    766   InitializeAndSetTrackingStatus(DEACTIVATED);
    767 
    768   ThreadData* thread_data_list;
    769   {
    770     base::AutoLock lock(*list_lock_.Pointer());
    771     thread_data_list = all_thread_data_list_head_;
    772     all_thread_data_list_head_ = NULL;
    773     ++incarnation_counter_;
    774     // To be clean, break apart the retired worker list (though we leak them).
    775     while (first_retired_worker_) {
    776       ThreadData* worker = first_retired_worker_;
    777       CHECK_GT(worker->worker_thread_number_, 0);
    778       first_retired_worker_ = worker->next_retired_worker_;
    779       worker->next_retired_worker_ = NULL;
    780     }
    781   }
    782 
    783   // Put most global static back in pristine shape.
    784   worker_thread_data_creation_count_ = 0;
    785   cleanup_count_ = 0;
    786   tls_index_.Set(NULL);
    787   // Almost UNINITIALIZED.
    788   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
    789 
    790   // To avoid any chance of racing in unit tests, which is the only place we
    791   // call this function, we may sometimes leak all the data structures we
    792   // recovered, as they may still be in use on threads from prior tests!
    793   if (leak) {
    794     ThreadData* thread_data = thread_data_list;
    795     while (thread_data) {
    796       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
    797       thread_data = thread_data->next();
    798     }
    799     return;
    800   }
    801 
    802   // When we want to cleanup (on a single thread), here is what we do.
    803 
    804   // Do actual recursive delete in all ThreadData instances.
    805   while (thread_data_list) {
    806     ThreadData* next_thread_data = thread_data_list;
    807     thread_data_list = thread_data_list->next();
    808 
    809     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
    810          next_thread_data->birth_map_.end() != it; ++it)
    811       delete it->second;  // Delete the Birth Records.
    812     delete next_thread_data;  // Includes all Death Records.
    813   }
    814 }
    815 
    816 //------------------------------------------------------------------------------
    817 TaskStopwatch::TaskStopwatch()
    818     : wallclock_duration_ms_(0),
    819       current_thread_data_(NULL),
    820       excluded_duration_ms_(0),
    821       parent_(NULL) {
    822 #if DCHECK_IS_ON()
    823   state_ = CREATED;
    824   child_ = NULL;
    825 #endif
    826 }
    827 
    828 TaskStopwatch::~TaskStopwatch() {
    829 #if DCHECK_IS_ON()
    830   DCHECK(state_ != RUNNING);
    831   DCHECK(child_ == NULL);
    832 #endif
    833 }
    834 
    835 void TaskStopwatch::Start() {
    836 #if DCHECK_IS_ON()
    837   DCHECK(state_ == CREATED);
    838   state_ = RUNNING;
    839 #endif
    840 
    841   start_time_ = ThreadData::Now();
    842 
    843   current_thread_data_ = ThreadData::Get();
    844   if (!current_thread_data_)
    845     return;
    846 
    847   parent_ = current_thread_data_->current_stopwatch_;
    848 #if DCHECK_IS_ON()
    849   if (parent_) {
    850     DCHECK(parent_->state_ == RUNNING);
    851     DCHECK(parent_->child_ == NULL);
    852     parent_->child_ = this;
    853   }
    854 #endif
    855   current_thread_data_->current_stopwatch_ = this;
    856 }
    857 
    858 void TaskStopwatch::Stop() {
    859   const TrackedTime end_time = ThreadData::Now();
    860 #if DCHECK_IS_ON()
    861   DCHECK(state_ == RUNNING);
    862   state_ = STOPPED;
    863   DCHECK(child_ == NULL);
    864 #endif
    865 
    866   if (!start_time_.is_null() && !end_time.is_null()) {
    867     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
    868   }
    869 
    870   if (!current_thread_data_)
    871     return;
    872 
    873   DCHECK(current_thread_data_->current_stopwatch_ == this);
    874   current_thread_data_->current_stopwatch_ = parent_;
    875   if (!parent_)
    876     return;
    877 
    878 #if DCHECK_IS_ON()
    879   DCHECK(parent_->state_ == RUNNING);
    880   DCHECK(parent_->child_ == this);
    881   parent_->child_ = NULL;
    882 #endif
    883   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
    884   parent_ = NULL;
    885 }
    886 
    887 TrackedTime TaskStopwatch::StartTime() const {
    888 #if DCHECK_IS_ON()
    889   DCHECK(state_ != CREATED);
    890 #endif
    891 
    892   return start_time_;
    893 }
    894 
    895 int32_t TaskStopwatch::RunDurationMs() const {
    896 #if DCHECK_IS_ON()
    897   DCHECK(state_ == STOPPED);
    898 #endif
    899 
    900   return wallclock_duration_ms_ - excluded_duration_ms_;
    901 }
    902 
    903 ThreadData* TaskStopwatch::GetThreadData() const {
    904 #if DCHECK_IS_ON()
    905   DCHECK(state_ != CREATED);
    906 #endif
    907 
    908   return current_thread_data_;
    909 }
    910 
    911 //------------------------------------------------------------------------------
    912 // DeathDataPhaseSnapshot
    913 
    914 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
    915     int profiling_phase,
    916     int count,
    917     int32_t run_duration_sum,
    918     int32_t run_duration_max,
    919     int32_t run_duration_sample,
    920     int32_t queue_duration_sum,
    921     int32_t queue_duration_max,
    922     int32_t queue_duration_sample,
    923     const DeathDataPhaseSnapshot* prev)
    924     : profiling_phase(profiling_phase),
    925       death_data(count,
    926                  run_duration_sum,
    927                  run_duration_max,
    928                  run_duration_sample,
    929                  queue_duration_sum,
    930                  queue_duration_max,
    931                  queue_duration_sample),
    932       prev(prev) {}
    933 
    934 //------------------------------------------------------------------------------
    935 // TaskSnapshot
    936 
    937 TaskSnapshot::TaskSnapshot() {
    938 }
    939 
    940 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
    941                            const DeathDataSnapshot& death_data,
    942                            const std::string& death_thread_name)
    943     : birth(birth),
    944       death_data(death_data),
    945       death_thread_name(death_thread_name) {
    946 }
    947 
    948 TaskSnapshot::~TaskSnapshot() {
    949 }
    950 
    951 //------------------------------------------------------------------------------
    952 // ProcessDataPhaseSnapshot
    953 
    954 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
    955 }
    956 
    957 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
    958     const ProcessDataPhaseSnapshot& other) = default;
    959 
    960 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
    961 }
    962 
    963 //------------------------------------------------------------------------------
    964 // ProcessDataPhaseSnapshot
    965 
    966 ProcessDataSnapshot::ProcessDataSnapshot()
    967 #if !defined(OS_NACL)
    968     : process_id(base::GetCurrentProcId()) {
    969 #else
    970     : process_id(base::kNullProcessId) {
    971 #endif
    972 }
    973 
    974 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
    975     default;
    976 
    977 ProcessDataSnapshot::~ProcessDataSnapshot() {
    978 }
    979 
    980 }  // namespace tracked_objects
    981