Home | History | Annotate | Download | only in metrics
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/metrics/thread_watcher.h"
      6 
      7 #include <math.h>  // ceil
      8 
      9 #include "base/bind.h"
     10 #include "base/compiler_specific.h"
     11 #include "base/debug/alias.h"
     12 #include "base/lazy_instance.h"
     13 #include "base/strings/string_number_conversions.h"
     14 #include "base/strings/string_split.h"
     15 #include "base/strings/string_tokenizer.h"
     16 #include "base/strings/stringprintf.h"
     17 #include "base/threading/thread_restrictions.h"
     18 #include "build/build_config.h"
     19 #include "chrome/browser/metrics/metrics_service.h"
     20 #include "chrome/common/chrome_switches.h"
     21 #include "chrome/common/chrome_version_info.h"
     22 #include "chrome/common/dump_without_crashing.h"
     23 #include "chrome/common/logging_chrome.h"
     24 
     25 #if defined(OS_WIN)
     26 #include "base/win/windows_version.h"
     27 #endif
     28 
     29 using content::BrowserThread;
     30 
     31 namespace {
     32 
     33 // The following are unique function names for forcing the crash when a thread
     34 // is unresponsive. This makes it possible to tell from the callstack alone what
     35 // thread was unresponsive.
     36 //
     37 // We disable optimizations for this block of functions so the compiler doesn't
     38 // merge them all together.
     39 MSVC_DISABLE_OPTIMIZE()
     40 MSVC_PUSH_DISABLE_WARNING(4748)
     41 
     42 int* NullPointer() {
     43   return reinterpret_cast<int*>(NULL);
     44 }
     45 
     46 void NullPointerCrash(int line_number) {
     47 #ifndef NDEBUG
     48   *NullPointer() = line_number;  // Crash.
     49 #else
     50   logging::DumpWithoutCrashing();
     51 #endif
     52 }
     53 
     54 NOINLINE void ShutdownCrash() {
     55   NullPointerCrash(__LINE__);
     56 }
     57 
     58 NOINLINE void ThreadUnresponsive_UI() {
     59   NullPointerCrash(__LINE__);
     60 }
     61 
     62 NOINLINE void ThreadUnresponsive_DB() {
     63   NullPointerCrash(__LINE__);
     64 }
     65 
     66 NOINLINE void ThreadUnresponsive_FILE() {
     67   NullPointerCrash(__LINE__);
     68 }
     69 
     70 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
     71   NullPointerCrash(__LINE__);
     72 }
     73 
     74 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
     75   NullPointerCrash(__LINE__);
     76 }
     77 
     78 NOINLINE void ThreadUnresponsive_CACHE() {
     79   NullPointerCrash(__LINE__);
     80 }
     81 
     82 NOINLINE void ThreadUnresponsive_IO() {
     83   NullPointerCrash(__LINE__);
     84 }
     85 
     86 MSVC_POP_WARNING()
     87 MSVC_ENABLE_OPTIMIZE();
     88 
     89 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
     90   base::debug::Alias(&thread_id);
     91 
     92   switch (thread_id) {
     93     case BrowserThread::UI:
     94       return ThreadUnresponsive_UI();
     95     case BrowserThread::DB:
     96       return ThreadUnresponsive_DB();
     97     case BrowserThread::FILE:
     98       return ThreadUnresponsive_FILE();
     99     case BrowserThread::FILE_USER_BLOCKING:
    100       return ThreadUnresponsive_FILE_USER_BLOCKING();
    101     case BrowserThread::PROCESS_LAUNCHER:
    102       return ThreadUnresponsive_PROCESS_LAUNCHER();
    103     case BrowserThread::CACHE:
    104       return ThreadUnresponsive_CACHE();
    105     case BrowserThread::IO:
    106       return ThreadUnresponsive_IO();
    107     case BrowserThread::ID_COUNT:
    108       CHECK(false);  // This shouldn't actually be reached!
    109       break;
    110 
    111     // Omission of the default hander is intentional -- that way the compiler
    112     // should warn if our switch becomes outdated.
    113   }
    114 
    115   CHECK(false) << "Unknown thread was unresponsive.";  // Shouldn't be reached.
    116 }
    117 
    118 }  // namespace
    119 
    120 // ThreadWatcher methods and members.
    121 ThreadWatcher::ThreadWatcher(const WatchingParams& params)
    122     : thread_id_(params.thread_id),
    123       thread_name_(params.thread_name),
    124       watched_loop_(
    125           BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
    126       sleep_time_(params.sleep_time),
    127       unresponsive_time_(params.unresponsive_time),
    128       ping_time_(base::TimeTicks::Now()),
    129       pong_time_(ping_time_),
    130       ping_sequence_number_(0),
    131       active_(false),
    132       ping_count_(params.unresponsive_threshold),
    133       response_time_histogram_(NULL),
    134       unresponsive_time_histogram_(NULL),
    135       unresponsive_count_(0),
    136       hung_processing_complete_(false),
    137       unresponsive_threshold_(params.unresponsive_threshold),
    138       crash_on_hang_(params.crash_on_hang),
    139       live_threads_threshold_(params.live_threads_threshold),
    140       weak_ptr_factory_(this) {
    141   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    142   Initialize();
    143 }
    144 
    145 ThreadWatcher::~ThreadWatcher() {}
    146 
    147 // static
    148 void ThreadWatcher::StartWatching(const WatchingParams& params) {
    149   DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
    150   DCHECK_GE(params.unresponsive_time.InMilliseconds(),
    151             params.sleep_time.InMilliseconds());
    152 
    153   // If we are not on WatchDogThread, then post a task to call StartWatching on
    154   // WatchDogThread.
    155   if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
    156     WatchDogThread::PostTask(
    157         FROM_HERE,
    158         base::Bind(&ThreadWatcher::StartWatching, params));
    159     return;
    160   }
    161 
    162   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    163 
    164   // Create a new thread watcher object for the given thread and activate it.
    165   ThreadWatcher* watcher = new ThreadWatcher(params);
    166 
    167   DCHECK(watcher);
    168   // If we couldn't register the thread watcher object, we are shutting down,
    169   // then don't activate thread watching.
    170   if (!ThreadWatcherList::IsRegistered(params.thread_id))
    171     return;
    172   watcher->ActivateThreadWatching();
    173 }
    174 
    175 void ThreadWatcher::ActivateThreadWatching() {
    176   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    177   if (active_) return;
    178   active_ = true;
    179   ping_count_ = unresponsive_threshold_;
    180   ResetHangCounters();
    181   base::MessageLoop::current()->PostTask(
    182       FROM_HERE,
    183       base::Bind(&ThreadWatcher::PostPingMessage,
    184                  weak_ptr_factory_.GetWeakPtr()));
    185 }
    186 
    187 void ThreadWatcher::DeActivateThreadWatching() {
    188   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    189   active_ = false;
    190   ping_count_ = 0;
    191   weak_ptr_factory_.InvalidateWeakPtrs();
    192 }
    193 
    194 void ThreadWatcher::WakeUp() {
    195   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    196   // There is some user activity, PostPingMessage task of thread watcher if
    197   // needed.
    198   if (!active_) return;
    199 
    200   // Throw away the previous |unresponsive_count_| and start over again. Just
    201   // before going to sleep, |unresponsive_count_| could be very close to
    202   // |unresponsive_threshold_| and when user becomes active,
    203   // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
    204   // response for ping messages. Reset |unresponsive_count_| to start measuring
    205   // the unresponsiveness of the threads when system becomes active.
    206   unresponsive_count_ = 0;
    207 
    208   if (ping_count_ <= 0) {
    209     ping_count_ = unresponsive_threshold_;
    210     ResetHangCounters();
    211     PostPingMessage();
    212   } else {
    213     ping_count_ = unresponsive_threshold_;
    214   }
    215 }
    216 
    217 void ThreadWatcher::PostPingMessage() {
    218   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    219   // If we have stopped watching or if the user is idle, then stop sending
    220   // ping messages.
    221   if (!active_ || ping_count_ <= 0)
    222     return;
    223 
    224   // Save the current time when we have sent ping message.
    225   ping_time_ = base::TimeTicks::Now();
    226 
    227   // Send a ping message to the watched thread. Callback will be called on
    228   // the WatchDogThread.
    229   base::Closure callback(
    230       base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
    231                  ping_sequence_number_));
    232   if (watched_loop_->PostTask(
    233           FROM_HERE,
    234           base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
    235                      callback))) {
    236       // Post a task to check the responsiveness of watched thread.
    237       base::MessageLoop::current()->PostDelayedTask(
    238           FROM_HERE,
    239           base::Bind(&ThreadWatcher::OnCheckResponsiveness,
    240                      weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
    241           unresponsive_time_);
    242   } else {
    243     // Watched thread might have gone away, stop watching it.
    244     DeActivateThreadWatching();
    245   }
    246 }
    247 
    248 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
    249   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    250 
    251   // Record watched thread's response time.
    252   base::TimeTicks now = base::TimeTicks::Now();
    253   base::TimeDelta response_time = now - ping_time_;
    254   response_time_histogram_->AddTime(response_time);
    255 
    256   // Save the current time when we have got pong message.
    257   pong_time_ = now;
    258 
    259   // Check if there are any extra pings in flight.
    260   DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
    261   if (ping_sequence_number_ != ping_sequence_number)
    262     return;
    263 
    264   // Increment sequence number for the next ping message to indicate watched
    265   // thread is responsive.
    266   ++ping_sequence_number_;
    267 
    268   // If we have stopped watching or if the user is idle, then stop sending
    269   // ping messages.
    270   if (!active_ || --ping_count_ <= 0)
    271     return;
    272 
    273   base::MessageLoop::current()->PostDelayedTask(
    274       FROM_HERE,
    275       base::Bind(&ThreadWatcher::PostPingMessage,
    276                  weak_ptr_factory_.GetWeakPtr()),
    277       sleep_time_);
    278 }
    279 
    280 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
    281   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    282   // If we have stopped watching then consider thread as responding.
    283   if (!active_) {
    284     responsive_ = true;
    285     return;
    286   }
    287   // If the latest ping_sequence_number_ is not same as the ping_sequence_number
    288   // that is passed in, then we can assume OnPongMessage was called.
    289   // OnPongMessage increments ping_sequence_number_.
    290   if (ping_sequence_number_ != ping_sequence_number) {
    291     // Reset unresponsive_count_ to zero because we got a response from the
    292     // watched thread.
    293     ResetHangCounters();
    294 
    295     responsive_ = true;
    296     return;
    297   }
    298   // Record that we got no response from watched thread.
    299   GotNoResponse();
    300 
    301   // Post a task to check the responsiveness of watched thread.
    302   base::MessageLoop::current()->PostDelayedTask(
    303       FROM_HERE,
    304       base::Bind(&ThreadWatcher::OnCheckResponsiveness,
    305                  weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
    306       unresponsive_time_);
    307   responsive_ = false;
    308 }
    309 
    310 void ThreadWatcher::Initialize() {
    311   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    312   ThreadWatcherList::Register(this);
    313 
    314   const std::string response_time_histogram_name =
    315       "ThreadWatcher.ResponseTime." + thread_name_;
    316   response_time_histogram_ = base::Histogram::FactoryTimeGet(
    317       response_time_histogram_name,
    318       base::TimeDelta::FromMilliseconds(1),
    319       base::TimeDelta::FromSeconds(100), 50,
    320       base::Histogram::kUmaTargetedHistogramFlag);
    321 
    322   const std::string unresponsive_time_histogram_name =
    323       "ThreadWatcher.Unresponsive." + thread_name_;
    324   unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
    325       unresponsive_time_histogram_name,
    326       base::TimeDelta::FromMilliseconds(1),
    327       base::TimeDelta::FromSeconds(100), 50,
    328       base::Histogram::kUmaTargetedHistogramFlag);
    329 
    330   const std::string responsive_count_histogram_name =
    331       "ThreadWatcher.ResponsiveThreads." + thread_name_;
    332   responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
    333       responsive_count_histogram_name, 1, 10, 11,
    334       base::Histogram::kUmaTargetedHistogramFlag);
    335 
    336   const std::string unresponsive_count_histogram_name =
    337       "ThreadWatcher.UnresponsiveThreads." + thread_name_;
    338   unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
    339       unresponsive_count_histogram_name, 1, 10, 11,
    340       base::Histogram::kUmaTargetedHistogramFlag);
    341 }
    342 
    343 // static
    344 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
    345                                   const base::Closure& callback_task) {
    346   // This method is called on watched thread.
    347   DCHECK(BrowserThread::CurrentlyOn(thread_id));
    348   WatchDogThread::PostTask(FROM_HERE, callback_task);
    349 }
    350 
    351 void ThreadWatcher::ResetHangCounters() {
    352   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    353   unresponsive_count_ = 0;
    354   hung_processing_complete_ = false;
    355 }
    356 
    357 void ThreadWatcher::GotNoResponse() {
    358   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    359 
    360   ++unresponsive_count_;
    361   if (!IsVeryUnresponsive())
    362     return;
    363 
    364   // Record total unresponsive_time since last pong message.
    365   base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
    366   unresponsive_time_histogram_->AddTime(unresponse_time);
    367 
    368   // We have already collected stats for the non-responding watched thread.
    369   if (hung_processing_complete_)
    370     return;
    371 
    372   // Record how other threads are responding.
    373   uint32 responding_thread_count = 0;
    374   uint32 unresponding_thread_count = 0;
    375   ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
    376                                         &unresponding_thread_count);
    377 
    378   // Record how many watched threads are responding.
    379   responsive_count_histogram_->Add(responding_thread_count);
    380 
    381   // Record how many watched threads are not responding.
    382   unresponsive_count_histogram_->Add(unresponding_thread_count);
    383 
    384   // Crash the browser if the watched thread is to be crashed on hang and if the
    385   // number of other threads responding is less than or equal to
    386   // live_threads_threshold_ and at least one other thread is responding.
    387   if (crash_on_hang_ &&
    388       responding_thread_count > 0 &&
    389       responding_thread_count <= live_threads_threshold_) {
    390     static bool crashed_once = false;
    391     if (!crashed_once) {
    392       crashed_once = true;
    393       CrashBecauseThreadWasUnresponsive(thread_id_);
    394     }
    395   }
    396 
    397   hung_processing_complete_ = true;
    398 }
    399 
    400 bool ThreadWatcher::IsVeryUnresponsive() {
    401   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    402   return unresponsive_count_ >= unresponsive_threshold_;
    403 }
    404 
    405 // ThreadWatcherList methods and members.
    406 //
    407 // static
    408 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
    409 // static
    410 const int ThreadWatcherList::kSleepSeconds = 1;
    411 // static
    412 const int ThreadWatcherList::kUnresponsiveSeconds = 2;
    413 // static
    414 const int ThreadWatcherList::kUnresponsiveCount = 9;
    415 // static
    416 const int ThreadWatcherList::kLiveThreadsThreshold = 2;
    417 
    418 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
    419     uint32 live_threads_threshold,
    420     uint32 unresponsive_threshold)
    421     : live_threads_threshold(live_threads_threshold),
    422       unresponsive_threshold(unresponsive_threshold) {
    423 }
    424 
    425 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
    426     : live_threads_threshold(kLiveThreadsThreshold),
    427       unresponsive_threshold(kUnresponsiveCount) {
    428 }
    429 
    430 // static
    431 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
    432   uint32 unresponsive_threshold;
    433   CrashOnHangThreadMap crash_on_hang_threads;
    434   ParseCommandLine(command_line,
    435                    &unresponsive_threshold,
    436                    &crash_on_hang_threads);
    437 
    438   ThreadWatcherObserver::SetupNotifications(
    439       base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
    440 
    441   WatchDogThread::PostDelayedTask(
    442       FROM_HERE,
    443       base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
    444                  unresponsive_threshold,
    445                  crash_on_hang_threads),
    446       base::TimeDelta::FromSeconds(120));
    447 }
    448 
    449 // static
    450 void ThreadWatcherList::StopWatchingAll() {
    451   ThreadWatcherObserver::RemoveNotifications();
    452   DeleteAll();
    453 }
    454 
    455 // static
    456 void ThreadWatcherList::Register(ThreadWatcher* watcher) {
    457   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    458   if (!g_thread_watcher_list_)
    459     return;
    460   DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
    461   g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
    462 }
    463 
    464 // static
    465 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
    466   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    467   return NULL != ThreadWatcherList::Find(thread_id);
    468 }
    469 
    470 // static
    471 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
    472                                            uint32* unresponding_thread_count) {
    473   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    474   *responding_thread_count = 0;
    475   *unresponding_thread_count = 0;
    476   if (!g_thread_watcher_list_)
    477     return;
    478 
    479   for (RegistrationList::iterator it =
    480            g_thread_watcher_list_->registered_.begin();
    481        g_thread_watcher_list_->registered_.end() != it;
    482        ++it) {
    483     if (it->second->IsVeryUnresponsive())
    484       ++(*unresponding_thread_count);
    485     else
    486       ++(*responding_thread_count);
    487   }
    488 }
    489 
    490 // static
    491 void ThreadWatcherList::WakeUpAll() {
    492   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    493   if (!g_thread_watcher_list_)
    494     return;
    495 
    496   for (RegistrationList::iterator it =
    497            g_thread_watcher_list_->registered_.begin();
    498        g_thread_watcher_list_->registered_.end() != it;
    499        ++it)
    500     it->second->WakeUp();
    501 }
    502 
    503 ThreadWatcherList::ThreadWatcherList() {
    504   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    505   CHECK(!g_thread_watcher_list_);
    506   g_thread_watcher_list_ = this;
    507 }
    508 
    509 ThreadWatcherList::~ThreadWatcherList() {
    510   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    511   DCHECK(this == g_thread_watcher_list_);
    512   g_thread_watcher_list_ = NULL;
    513 }
    514 
    515 // static
    516 void ThreadWatcherList::ParseCommandLine(
    517     const CommandLine& command_line,
    518     uint32* unresponsive_threshold,
    519     CrashOnHangThreadMap* crash_on_hang_threads) {
    520   // Initialize |unresponsive_threshold| to a default value.
    521   *unresponsive_threshold = kUnresponsiveCount;
    522 
    523   // Increase the unresponsive_threshold on the Stable and Beta channels to
    524   // reduce the number of crashes due to ThreadWatcher.
    525   chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
    526   if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
    527     *unresponsive_threshold *= 4;
    528   } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
    529     *unresponsive_threshold *= 2;
    530   }
    531 
    532 #if defined(OS_WIN)
    533   // For Windows XP (old systems), double the unresponsive_threshold to give
    534   // the OS a chance to schedule UI/IO threads a time slice to respond with a
    535   // pong message (to get around limitations with the OS).
    536   if (base::win::GetVersion() <= base::win::VERSION_XP)
    537     *unresponsive_threshold *= 2;
    538 #endif
    539 
    540   uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
    541   std::string crash_on_hang_thread_names;
    542   bool has_command_line_overwrite = false;
    543   if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
    544     crash_on_hang_thread_names =
    545         command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
    546     has_command_line_overwrite = true;
    547   } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
    548     // Default to crashing the browser if UI or IO or FILE threads are not
    549     // responsive except in stable channel.
    550     crash_on_hang_thread_names = base::StringPrintf(
    551         "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
    552         kLiveThreadsThreshold, crash_seconds,
    553         kLiveThreadsThreshold, crash_seconds,
    554         kLiveThreadsThreshold, crash_seconds * 5);
    555   }
    556 
    557   ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
    558                                      kLiveThreadsThreshold,
    559                                      crash_seconds,
    560                                      crash_on_hang_threads);
    561 
    562   if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
    563       has_command_line_overwrite) {
    564     return;
    565   }
    566 
    567   // Set up a field trial for 100% of the users to crash if either UI or IO
    568   // thread is not responsive for 30 seconds (or 15 pings).
    569   scoped_refptr<base::FieldTrial> field_trial(
    570       base::FieldTrialList::FactoryGetFieldTrial(
    571           "ThreadWatcher", 100, "default_hung_threads",
    572           2013, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL));
    573   int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
    574   if (field_trial->group() == hung_thread_group) {
    575     for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
    576          crash_on_hang_threads->end() != it;
    577          ++it) {
    578       if (it->first != "IO")
    579         continue;
    580       it->second.live_threads_threshold = INT_MAX;
    581       it->second.unresponsive_threshold = 15;
    582     }
    583   }
    584 }
    585 
    586 // static
    587 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
    588     const std::string& crash_on_hang_thread_names,
    589     uint32 default_live_threads_threshold,
    590     uint32 default_crash_seconds,
    591     CrashOnHangThreadMap* crash_on_hang_threads) {
    592   base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
    593   std::vector<std::string> values;
    594   while (tokens.GetNext()) {
    595     const std::string& token = tokens.token();
    596     base::SplitString(token, ':', &values);
    597     std::string thread_name = values[0];
    598 
    599     uint32 live_threads_threshold = default_live_threads_threshold;
    600     uint32 crash_seconds = default_crash_seconds;
    601     if (values.size() >= 2 &&
    602         (!base::StringToUint(values[1], &live_threads_threshold))) {
    603       continue;
    604     }
    605     if (values.size() >= 3 &&
    606         (!base::StringToUint(values[2], &crash_seconds))) {
    607       continue;
    608     }
    609     uint32 unresponsive_threshold = static_cast<uint32>(
    610         ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
    611 
    612     CrashDataThresholds crash_data(live_threads_threshold,
    613                                    unresponsive_threshold);
    614     // Use the last specifier.
    615     (*crash_on_hang_threads)[thread_name] = crash_data;
    616   }
    617 }
    618 
    619 // static
    620 void ThreadWatcherList::InitializeAndStartWatching(
    621     uint32 unresponsive_threshold,
    622     const CrashOnHangThreadMap& crash_on_hang_threads) {
    623   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    624 
    625   ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
    626   CHECK(thread_watcher_list);
    627 
    628   BrowserThread::PostTask(
    629       BrowserThread::UI,
    630       FROM_HERE,
    631       base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
    632 
    633   const base::TimeDelta kSleepTime =
    634       base::TimeDelta::FromSeconds(kSleepSeconds);
    635   const base::TimeDelta kUnresponsiveTime =
    636       base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
    637 
    638   StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
    639                 unresponsive_threshold, crash_on_hang_threads);
    640   StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
    641                 unresponsive_threshold, crash_on_hang_threads);
    642   StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
    643                 unresponsive_threshold, crash_on_hang_threads);
    644   StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
    645                 unresponsive_threshold, crash_on_hang_threads);
    646   StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
    647                 unresponsive_threshold, crash_on_hang_threads);
    648 }
    649 
    650 // static
    651 void ThreadWatcherList::StartWatching(
    652     const BrowserThread::ID& thread_id,
    653     const std::string& thread_name,
    654     const base::TimeDelta& sleep_time,
    655     const base::TimeDelta& unresponsive_time,
    656     uint32 unresponsive_threshold,
    657     const CrashOnHangThreadMap& crash_on_hang_threads) {
    658   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    659 
    660   CrashOnHangThreadMap::const_iterator it =
    661       crash_on_hang_threads.find(thread_name);
    662   bool crash_on_hang = false;
    663   uint32 live_threads_threshold = 0;
    664   if (it != crash_on_hang_threads.end()) {
    665     crash_on_hang = true;
    666     live_threads_threshold = it->second.live_threads_threshold;
    667     unresponsive_threshold = it->second.unresponsive_threshold;
    668   }
    669 
    670   ThreadWatcher::StartWatching(
    671       ThreadWatcher::WatchingParams(thread_id,
    672                                     thread_name,
    673                                     sleep_time,
    674                                     unresponsive_time,
    675                                     unresponsive_threshold,
    676                                     crash_on_hang,
    677                                     live_threads_threshold));
    678 }
    679 
    680 // static
    681 void ThreadWatcherList::DeleteAll() {
    682   if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
    683     WatchDogThread::PostTask(
    684         FROM_HERE,
    685         base::Bind(&ThreadWatcherList::DeleteAll));
    686     return;
    687   }
    688 
    689   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    690   if (!g_thread_watcher_list_)
    691     return;
    692 
    693   // Delete all thread watcher objects.
    694   while (!g_thread_watcher_list_->registered_.empty()) {
    695     RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
    696     delete it->second;
    697     g_thread_watcher_list_->registered_.erase(it);
    698   }
    699 
    700   delete g_thread_watcher_list_;
    701 }
    702 
    703 // static
    704 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
    705   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
    706   if (!g_thread_watcher_list_)
    707     return NULL;
    708   RegistrationList::iterator it =
    709       g_thread_watcher_list_->registered_.find(thread_id);
    710   if (g_thread_watcher_list_->registered_.end() == it)
    711     return NULL;
    712   return it->second;
    713 }
    714 
    715 // ThreadWatcherObserver methods and members.
    716 //
    717 // static
    718 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
    719 
    720 ThreadWatcherObserver::ThreadWatcherObserver(
    721     const base::TimeDelta& wakeup_interval)
    722     : last_wakeup_time_(base::TimeTicks::Now()),
    723       wakeup_interval_(wakeup_interval) {
    724   CHECK(!g_thread_watcher_observer_);
    725   g_thread_watcher_observer_ = this;
    726 }
    727 
    728 ThreadWatcherObserver::~ThreadWatcherObserver() {
    729   DCHECK(this == g_thread_watcher_observer_);
    730   g_thread_watcher_observer_ = NULL;
    731 }
    732 
    733 // static
    734 void ThreadWatcherObserver::SetupNotifications(
    735     const base::TimeDelta& wakeup_interval) {
    736   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    737   ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
    738   MetricsService::SetUpNotifications(&observer->registrar_, observer);
    739 }
    740 
    741 // static
    742 void ThreadWatcherObserver::RemoveNotifications() {
    743   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    744   if (!g_thread_watcher_observer_)
    745     return;
    746   g_thread_watcher_observer_->registrar_.RemoveAll();
    747   delete g_thread_watcher_observer_;
    748 }
    749 
    750 void ThreadWatcherObserver::Observe(
    751     int type,
    752     const content::NotificationSource& source,
    753     const content::NotificationDetails& details) {
    754   // There is some user activity, see if thread watchers are to be awakened.
    755   base::TimeTicks now = base::TimeTicks::Now();
    756   if ((now - last_wakeup_time_) < wakeup_interval_)
    757     return;
    758   last_wakeup_time_ = now;
    759   WatchDogThread::PostTask(
    760       FROM_HERE,
    761       base::Bind(&ThreadWatcherList::WakeUpAll));
    762 }
    763 
    764 // WatchDogThread methods and members.
    765 
    766 // This lock protects g_watchdog_thread.
    767 static base::LazyInstance<base::Lock>::Leaky
    768     g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
    769 
    770 // The singleton of this class.
    771 static WatchDogThread* g_watchdog_thread = NULL;
    772 
    773 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
    774 }
    775 
    776 WatchDogThread::~WatchDogThread() {
    777   Stop();
    778 }
    779 
    780 // static
    781 bool WatchDogThread::CurrentlyOnWatchDogThread() {
    782   base::AutoLock lock(g_watchdog_lock.Get());
    783   return g_watchdog_thread &&
    784       g_watchdog_thread->message_loop() == base::MessageLoop::current();
    785 }
    786 
    787 // static
    788 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
    789                               const base::Closure& task) {
    790   return PostTaskHelper(from_here, task, base::TimeDelta());
    791 }
    792 
    793 // static
    794 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
    795                                      const base::Closure& task,
    796                                      base::TimeDelta delay) {
    797   return PostTaskHelper(from_here, task, delay);
    798 }
    799 
    800 // static
    801 bool WatchDogThread::PostTaskHelper(
    802     const tracked_objects::Location& from_here,
    803     const base::Closure& task,
    804     base::TimeDelta delay) {
    805   {
    806     base::AutoLock lock(g_watchdog_lock.Get());
    807 
    808     base::MessageLoop* message_loop = g_watchdog_thread ?
    809         g_watchdog_thread->message_loop() : NULL;
    810     if (message_loop) {
    811       message_loop->PostDelayedTask(from_here, task, delay);
    812       return true;
    813     }
    814   }
    815 
    816   return false;
    817 }
    818 
    819 void WatchDogThread::Init() {
    820   // This thread shouldn't be allowed to perform any blocking disk I/O.
    821   base::ThreadRestrictions::SetIOAllowed(false);
    822 
    823   base::AutoLock lock(g_watchdog_lock.Get());
    824   CHECK(!g_watchdog_thread);
    825   g_watchdog_thread = this;
    826 }
    827 
    828 void WatchDogThread::CleanUp() {
    829   base::AutoLock lock(g_watchdog_lock.Get());
    830   g_watchdog_thread = NULL;
    831 }
    832 
    833 namespace {
    834 
    835 // StartupWatchDogThread methods and members.
    836 //
    837 // Class for detecting hangs during startup.
    838 class StartupWatchDogThread : public base::Watchdog {
    839  public:
    840   // Constructor specifies how long the StartupWatchDogThread will wait before
    841   // alarming.
    842   explicit StartupWatchDogThread(const base::TimeDelta& duration)
    843       : base::Watchdog(duration, "Startup watchdog thread", true) {
    844   }
    845 
    846   // Alarm is called if the time expires after an Arm() without someone calling
    847   // Disarm(). When Alarm goes off, in release mode we get the crash dump
    848   // without crashing and in debug mode we break into the debugger.
    849   virtual void Alarm() OVERRIDE {
    850 #ifndef NDEBUG
    851     DCHECK(false);
    852 #else
    853     logging::DumpWithoutCrashing();
    854 #endif
    855   }
    856 
    857   DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
    858 };
    859 
    860 // ShutdownWatchDogThread methods and members.
    861 //
    862 // Class for detecting hangs during shutdown.
    863 class ShutdownWatchDogThread : public base::Watchdog {
    864  public:
    865   // Constructor specifies how long the ShutdownWatchDogThread will wait before
    866   // alarming.
    867   explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
    868       : base::Watchdog(duration, "Shutdown watchdog thread", true) {
    869   }
    870 
    871   // Alarm is called if the time expires after an Arm() without someone calling
    872   // Disarm(). We crash the browser if this method is called.
    873   virtual void Alarm() OVERRIDE {
    874     ShutdownCrash();
    875   }
    876 
    877   DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
    878 };
    879 }  // namespace
    880 
    881 // StartupTimeBomb methods and members.
    882 //
    883 // static
    884 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
    885 
    886 StartupTimeBomb::StartupTimeBomb()
    887     : startup_watchdog_(NULL),
    888       thread_id_(base::PlatformThread::CurrentId()) {
    889   CHECK(!g_startup_timebomb_);
    890   g_startup_timebomb_ = this;
    891 }
    892 
    893 StartupTimeBomb::~StartupTimeBomb() {
    894   DCHECK(this == g_startup_timebomb_);
    895   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
    896   if (startup_watchdog_)
    897     Disarm();
    898   g_startup_timebomb_ = NULL;
    899 }
    900 
    901 void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
    902   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
    903   DCHECK(!startup_watchdog_);
    904   // TODO(rtenneti): http://crbug.com/112970. Don't arm the startup timebomb
    905   // until we fix breakpad code not to crash in logging::DumpWithoutCrashing().
    906   // startup_watchdog_ = new StartupWatchDogThread(duration);
    907   // startup_watchdog_->Arm();
    908   return;
    909 }
    910 
    911 void StartupTimeBomb::Disarm() {
    912   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
    913   if (startup_watchdog_) {
    914     startup_watchdog_->Disarm();
    915     startup_watchdog_->Cleanup();
    916     DeleteStartupWatchdog();
    917   }
    918 }
    919 
    920 void StartupTimeBomb::DeleteStartupWatchdog() {
    921   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
    922   if (startup_watchdog_->IsJoinable()) {
    923     // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
    924     // very fast.
    925     base::ThreadRestrictions::SetIOAllowed(true);
    926     delete startup_watchdog_;
    927     startup_watchdog_ = NULL;
    928     return;
    929   }
    930   base::MessageLoop::current()->PostDelayedTask(
    931       FROM_HERE,
    932       base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
    933                  base::Unretained(this)),
    934       base::TimeDelta::FromSeconds(10));
    935 }
    936 
    937 // static
    938 void StartupTimeBomb::DisarmStartupTimeBomb() {
    939   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    940   if (g_startup_timebomb_)
    941     g_startup_timebomb_->Disarm();
    942 }
    943 
    944 // ShutdownWatcherHelper methods and members.
    945 //
    946 // ShutdownWatcherHelper is a wrapper class for detecting hangs during
    947 // shutdown.
    948 ShutdownWatcherHelper::ShutdownWatcherHelper()
    949     : shutdown_watchdog_(NULL),
    950       thread_id_(base::PlatformThread::CurrentId()) {
    951 }
    952 
    953 ShutdownWatcherHelper::~ShutdownWatcherHelper() {
    954   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
    955   if (shutdown_watchdog_) {
    956     shutdown_watchdog_->Disarm();
    957     delete shutdown_watchdog_;
    958     shutdown_watchdog_ = NULL;
    959   }
    960 }
    961 
    962 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
    963   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
    964   DCHECK(!shutdown_watchdog_);
    965   base::TimeDelta actual_duration = duration;
    966 
    967   chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
    968   if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
    969     actual_duration *= 20;
    970   } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
    971              channel == chrome::VersionInfo::CHANNEL_DEV) {
    972     actual_duration *= 10;
    973   }
    974 
    975 #if defined(OS_WIN)
    976   // On Windows XP, give twice the time for shutdown.
    977   if (base::win::GetVersion() <= base::win::VERSION_XP)
    978     actual_duration *= 2;
    979 #endif
    980 
    981   shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
    982   shutdown_watchdog_->Arm();
    983 }
    984