Home | History | Annotate | Download | only in metrics
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file defines a WatchDog thread that monitors the responsiveness of other
      6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines
      7 // ThreadWatcher class which performs health check on threads that would like to
      8 // be watched. This file also defines ThreadWatcherList class that has list of
      9 // all active ThreadWatcher objects.
     10 //
     11 // ThreadWatcher class sends ping message to the watched thread and the watched
     12 // thread responds back with a pong message. It uploads response time
     13 // (difference between ping and pong times) as a histogram.
     14 //
     15 // TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is
     16 // detected, we should probably just crash, and allow the crash system to gather
     17 // then stack trace.
     18 //
     19 // Example Usage:
     20 //
     21 //   The following is an example for watching responsiveness of IO thread.
     22 //   sleep_time specifies how often ping messages have to be sent to IO thread.
     23 //   unresponsive_time is the wait time after ping message is sent, to check if
     24 //   we have received pong message or not.
     25 //
     26 //   base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5);
     27 //   base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10);
     28 //   ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time,
     29 //                                unresponsive_time);
     30 
     31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
     32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
     33 
     34 #include <map>
     35 #include <string>
     36 #include <vector>
     37 
     38 #include "base/basictypes.h"
     39 #include "base/gtest_prod_util.h"
     40 #include "base/memory/ref_counted.h"
     41 #include "base/memory/scoped_ptr.h"
     42 #include "base/message_loop.h"
     43 #include "base/metrics/histogram.h"
     44 #include "base/synchronization/lock.h"
     45 #include "base/task.h"
     46 #include "base/threading/thread.h"
     47 #include "base/time.h"
     48 #include "content/browser/browser_thread.h"
     49 #include "content/common/notification_observer.h"
     50 #include "content/common/notification_registrar.h"
     51 
     52 class CustomThreadWatcher;
     53 class ThreadWatcherList;
     54 
     55 // This class performs health check on threads that would like to be watched.
     56 class ThreadWatcher {
     57  public:
     58   // This method starts performing health check on the given thread_id. It will
     59   // create ThreadWatcher object for the given thread_id, thread_name,
     60   // sleep_time and unresponsive_time. sleep_time_ is the wait time between ping
     61   // messages. unresponsive_time_ is the wait time after ping message is sent,
     62   // to check if we have received pong message or not. It will register that
     63   // ThreadWatcher object and activate the thread watching of the given
     64   // thread_id.
     65   static void StartWatching(const BrowserThread::ID& thread_id,
     66                             const std::string& thread_name,
     67                             const base::TimeDelta& sleep_time,
     68                             const base::TimeDelta& unresponsive_time);
     69 
     70   // Return the thread_id of the thread being watched.
     71   BrowserThread::ID thread_id() const { return thread_id_; }
     72 
     73   // Return the name of the thread being watched.
     74   std::string thread_name() const { return thread_name_; }
     75 
     76   // Return the sleep time between ping messages to be sent to the thread.
     77   base::TimeDelta sleep_time() const { return sleep_time_; }
     78 
     79   // Return the the wait time to check the responsiveness of the thread.
     80   base::TimeDelta unresponsive_time() const { return unresponsive_time_; }
     81 
     82   // Returns true if we are montioring the thread.
     83   bool active() const { return active_; }
     84 
     85   // Returns ping_time_ (used by unit tests).
     86   base::TimeTicks ping_time() const { return ping_time_; }
     87 
     88   // Returns ping_sequence_number_ (used by unit tests).
     89   uint64 ping_sequence_number() const { return ping_sequence_number_; }
     90 
     91  protected:
     92   // Construct a ThreadWatcher for the given thread_id. sleep_time_ is the
     93   // wait time between ping messages. unresponsive_time_ is the wait time after
     94   // ping message is sent, to check if we have received pong message or not.
     95   ThreadWatcher(const BrowserThread::ID& thread_id,
     96                 const std::string& thread_name,
     97                 const base::TimeDelta& sleep_time,
     98                 const base::TimeDelta& unresponsive_time);
     99   virtual ~ThreadWatcher();
    100 
    101   // This method activates the thread watching which starts ping/pong messaging.
    102   virtual void ActivateThreadWatching();
    103 
    104   // This method de-activates the thread watching and revokes all tasks.
    105   virtual void DeActivateThreadWatching();
    106 
    107   // This will ensure that the watching is actively taking place, and awaken
    108   // (i.e., post a PostPingMessage) if the watcher has stopped pinging due to
    109   // lack of user activity. It will also reset ping_count_ to kPingCount.
    110   virtual void WakeUp();
    111 
    112   // This method records when ping message was sent and it will Post a task
    113   // (OnPingMessage) to the watched thread that does nothing but respond with
    114   // OnPongMessage. It also posts a task (OnCheckResponsiveness) to check
    115   // responsiveness of monitored thread that would be called after waiting
    116   // unresponsive_time_.
    117   // This method is accessible on WatchDogThread.
    118   virtual void PostPingMessage();
    119 
    120   // This method handles a Pong Message from watched thread. It will track the
    121   // response time (pong time minus ping time) via histograms. It posts a
    122   // PostPingMessage task that would be called after waiting sleep_time_.  It
    123   // increments ping_sequence_number_ by 1.
    124   // This method is accessible on WatchDogThread.
    125   virtual void OnPongMessage(uint64 ping_sequence_number);
    126 
    127   // This method will determine if the watched thread is responsive or not. If
    128   // the latest ping_sequence_number_ is not same as the ping_sequence_number
    129   // that is passed in, then we can assume that watched thread has responded
    130   // with a pong message.
    131   // This method is accessible on WatchDogThread.
    132   virtual bool OnCheckResponsiveness(uint64 ping_sequence_number);
    133 
    134  private:
    135   friend class ThreadWatcherList;
    136 
    137   // Allow tests to access our innards for testing purposes.
    138   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
    139   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding);
    140   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding);
    141   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding);
    142   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding);
    143 
    144   // Post constructor initialization.
    145   void Initialize();
    146 
    147   // Watched thread does nothing except post callback_task to the WATCHDOG
    148   // Thread. This method is called on watched thread.
    149   static void OnPingMessage(const BrowserThread::ID& thread_id,
    150                             Task* callback_task);
    151 
    152   // This is the number of ping messages to be sent when the user is idle.
    153   // ping_count_ will be initialized to kPingCount whenever user becomes active.
    154   static const int kPingCount;
    155 
    156   // The thread_id of the thread being watched. Only one instance can exist for
    157   // the given thread_id of the thread being watched.
    158   const BrowserThread::ID thread_id_;
    159 
    160   // The name of the thread being watched.
    161   const std::string thread_name_;
    162 
    163   // It is the sleep time between between the receipt of a pong message back,
    164   // and the sending of another ping message.
    165   const base::TimeDelta sleep_time_;
    166 
    167   // It is the duration from sending a ping message, until we check status to be
    168   // sure a pong message has been returned.
    169   const base::TimeDelta unresponsive_time_;
    170 
    171   // This is the last time when ping message was sent.
    172   base::TimeTicks ping_time_;
    173 
    174   // This is the sequence number of the next ping for which there is no pong. If
    175   // the instance is sleeping, then it will be the sequence number for the next
    176   // ping.
    177   uint64 ping_sequence_number_;
    178 
    179   // This is set to true if thread watcher is watching.
    180   bool active_;
    181 
    182   // The counter tracks least number of ping messages that will be sent to
    183   // watched thread before the ping-pong mechanism will go into an extended
    184   // sleep. If this value is zero, then the mechanism is in an extended sleep,
    185   // and awaiting some observed user action before continuing.
    186   int ping_count_;
    187 
    188   // Histogram that keeps track of response times for the watched thread.
    189   base::Histogram* histogram_;
    190 
    191   // We use this factory to create callback tasks for ThreadWatcher object. We
    192   // use this during ping-pong messaging between WatchDog thread and watched
    193   // thread.
    194   ScopedRunnableMethodFactory<ThreadWatcher> method_factory_;
    195 
    196   DISALLOW_COPY_AND_ASSIGN(ThreadWatcher);
    197 };
    198 
    199 // Class with a list of all active thread watchers.  A thread watcher is active
    200 // if it has been registered, which includes determing the histogram name. This
    201 // class provides utility functions to start and stop watching all browser
    202 // threads. Only one instance of this class exists.
    203 class ThreadWatcherList : public NotificationObserver {
    204  public:
    205   // A map from BrowserThread to the actual instances.
    206   typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList;
    207 
    208   // This singleton holds the global list of registered ThreadWatchers.
    209   ThreadWatcherList();
    210   // Destructor deletes all registered ThreadWatcher instances.
    211   virtual ~ThreadWatcherList();
    212 
    213   // Register() stores a pointer to the given ThreadWatcher in a global map.
    214   static void Register(ThreadWatcher* watcher);
    215 
    216   // This method returns true if the ThreadWatcher object is registerd.
    217   static bool IsRegistered(const BrowserThread::ID thread_id);
    218 
    219   // This method posts a task on WatchDogThread to start watching all browser
    220   // threads.
    221   // This method is accessible on UI thread.
    222   static void StartWatchingAll();
    223 
    224   // This method posts a task on WatchDogThread to RevokeAll tasks and to
    225   // deactive thread watching of other threads and tell NotificationService to
    226   // stop calling Observe.
    227   // This method is accessible on UI thread.
    228   static void StopWatchingAll();
    229 
    230   // RemoveAll NotificationTypes that are being observed.
    231   // This method is accessible on UI thread.
    232   static void RemoveNotifications();
    233 
    234  private:
    235   // Allow tests to access our innards for testing purposes.
    236   FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
    237 
    238   // Delete all thread watcher objects and remove them from global map.
    239   // This method is accessible on WatchDogThread.
    240   void DeleteAll();
    241 
    242   // This will ensure that the watching is actively taking place. It will wakeup
    243   // all thread watchers every 2 seconds. This is the implementation of
    244   // NotificationObserver. When a matching notification is posted to the
    245   // notification service, this method is called.
    246   // This method is accessible on UI thread.
    247   virtual void Observe(NotificationType type,
    248                        const NotificationSource& source,
    249                        const NotificationDetails& details);
    250 
    251   // This will ensure that the watching is actively taking place, and awaken
    252   // all thread watchers that are registered.
    253   // This method is accessible on WatchDogThread.
    254   virtual void WakeUpAll();
    255 
    256   // The Find() method can be used to test to see if a given ThreadWatcher was
    257   // already registered, or to retrieve a pointer to it from the global map.
    258   static ThreadWatcher* Find(const BrowserThread::ID& thread_id);
    259 
    260   // Helper function should be called only while holding lock_.
    261   ThreadWatcher* PreLockedFind(const BrowserThread::ID& thread_id);
    262 
    263   static ThreadWatcherList* global_;  // The singleton of this class.
    264 
    265   // Lock for access to registered_.
    266   base::Lock lock_;
    267 
    268   // Map of all registered watched threads, from thread_id to ThreadWatcher.
    269   RegistrationList registered_;
    270 
    271   // The registrar that holds NotificationTypes to be observed.
    272   NotificationRegistrar registrar_;
    273 
    274   // This is the last time when woke all thread watchers up.
    275   base::TimeTicks last_wakeup_time_;
    276 
    277   DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList);
    278 };
    279 
    280 // Class for WatchDogThread and in its Init method, we start watching UI, IO,
    281 // DB, FILE, CACHED threads.
    282 class WatchDogThread : public base::Thread {
    283  public:
    284   // Constructor.
    285   WatchDogThread();
    286 
    287   // Destroys the thread and stops the thread.
    288   virtual ~WatchDogThread();
    289 
    290   // Callable on any thread.  Returns whether you're currently on a
    291   // watchdog_thread_.
    292   static bool CurrentlyOnWatchDogThread();
    293 
    294   // These are the same methods in message_loop.h, but are guaranteed to either
    295   // get posted to the MessageLoop if it's still alive, or be deleted otherwise.
    296   // They return true iff the watchdog thread existed and the task was posted.
    297   // Note that even if the task is posted, there's no guarantee that it will
    298   // run, since the target thread may already have a Quit message in its queue.
    299   static bool PostTask(const tracked_objects::Location& from_here, Task* task);
    300   static bool PostDelayedTask(const tracked_objects::Location& from_here,
    301                               Task* task,
    302                               int64 delay_ms);
    303 
    304  protected:
    305   virtual void Init();
    306   virtual void CleanUp();
    307   virtual void CleanUpAfterMessageLoopDestruction();
    308 
    309  private:
    310   static bool PostTaskHelper(
    311       const tracked_objects::Location& from_here,
    312       Task* task,
    313       int64 delay_ms);
    314 
    315   // This lock protects watchdog_thread_.
    316   static base::Lock lock_;
    317 
    318   static WatchDogThread* watchdog_thread_;  // The singleton of this class.
    319 
    320   DISALLOW_COPY_AND_ASSIGN(WatchDogThread);
    321 };
    322 
    323 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling
    324 // refcounting of ThreadWatcher and ThreadWatcherList classes.
    325 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher);
    326 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList);
    327 
    328 #endif  // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
    329