1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file defines a WatchDog thread that monitors the responsiveness of other 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines 7 // ThreadWatcher class which performs health check on threads that would like to 8 // be watched. This file also defines ThreadWatcherList class that has list of 9 // all active ThreadWatcher objects. 10 // 11 // ThreadWatcher class sends ping message to the watched thread and the watched 12 // thread responds back with a pong message. It uploads response time 13 // (difference between ping and pong times) as a histogram. 14 // 15 // TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is 16 // detected, we should probably just crash, and allow the crash system to gather 17 // then stack trace. 18 // 19 // Example Usage: 20 // 21 // The following is an example for watching responsiveness of watched (IO) 22 // thread. |sleep_time| specifies how often ping messages have to be sent to 23 // watched (IO) thread. |unresponsive_time| is the wait time after ping 24 // message is sent, to check if we have received pong message or not. 25 // |unresponsive_threshold| specifies the number of unanswered ping messages 26 // after which watched (IO) thread is considered as not responsive. 27 // |crash_on_hang| specifies if we want to crash the browser when the watched 28 // (IO) thread has become sufficiently unresponsive, while other threads are 29 // sufficiently responsive. |live_threads_threshold| specifies the number of 30 // browser threads that are to be responsive when we want to crash the browser 31 // because of hung watched (IO) thread. 32 // 33 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); 34 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); 35 // uint32 unresponsive_threshold = ThreadWatcherList::kUnresponsiveCount; 36 // bool crash_on_hang = false; 37 // uint32 live_threads_threshold = ThreadWatcherList::kLiveThreadsThreshold; 38 // ThreadWatcher::StartWatching( 39 // BrowserThread::IO, "IO", sleep_time, unresponsive_time, 40 // unresponsive_threshold, crash_on_hang, live_threads_threshold); 41 42 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 43 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 44 45 #include <map> 46 #include <string> 47 #include <vector> 48 49 #include "base/basictypes.h" 50 #include "base/command_line.h" 51 #include "base/gtest_prod_util.h" 52 #include "base/memory/ref_counted.h" 53 #include "base/memory/weak_ptr.h" 54 #include "base/message_loop/message_loop.h" 55 #include "base/metrics/histogram.h" 56 #include "base/synchronization/lock.h" 57 #include "base/threading/platform_thread.h" 58 #include "base/threading/thread.h" 59 #include "base/threading/watchdog.h" 60 #include "base/time/time.h" 61 #include "content/public/browser/browser_thread.h" 62 #include "content/public/browser/notification_observer.h" 63 #include "content/public/browser/notification_registrar.h" 64 65 class CustomThreadWatcher; 66 class StartupTimeBomb; 67 class ThreadWatcherList; 68 class ThreadWatcherObserver; 69 70 // This class performs health check on threads that would like to be watched. 71 class ThreadWatcher { 72 public: 73 // base::Bind supports methods with up to 6 parameters. WatchingParams is used 74 // as a workaround that limitation for invoking ThreadWatcher::StartWatching. 75 struct WatchingParams { 76 const content::BrowserThread::ID& thread_id; 77 const std::string& thread_name; 78 const base::TimeDelta& sleep_time; 79 const base::TimeDelta& unresponsive_time; 80 uint32 unresponsive_threshold; 81 bool crash_on_hang; 82 uint32 live_threads_threshold; 83 84 WatchingParams(const content::BrowserThread::ID& thread_id_in, 85 const std::string& thread_name_in, 86 const base::TimeDelta& sleep_time_in, 87 const base::TimeDelta& unresponsive_time_in, 88 uint32 unresponsive_threshold_in, 89 bool crash_on_hang_in, 90 uint32 live_threads_threshold_in) 91 : thread_id(thread_id_in), 92 thread_name(thread_name_in), 93 sleep_time(sleep_time_in), 94 unresponsive_time(unresponsive_time_in), 95 unresponsive_threshold(unresponsive_threshold_in), 96 crash_on_hang(crash_on_hang_in), 97 live_threads_threshold(live_threads_threshold_in) { 98 } 99 }; 100 101 // This method starts performing health check on the given |thread_id|. It 102 // will create ThreadWatcher object for the given |thread_id|, |thread_name|. 103 // |sleep_time| is the wait time between ping messages. |unresponsive_time| is 104 // the wait time after ping message is sent, to check if we have received pong 105 // message or not. |unresponsive_threshold| is used to determine if the thread 106 // is responsive or not. The watched thread is considered unresponsive if it 107 // hasn't responded with a pong message for |unresponsive_threshold| number of 108 // ping messages. |crash_on_hang| specifies if browser should be crashed when 109 // the watched thread is unresponsive. |live_threads_threshold| specifies the 110 // number of browser threads that are to be responsive when we want to crash 111 // the browser and watched thread has become sufficiently unresponsive. It 112 // will register that ThreadWatcher object and activate the thread watching of 113 // the given thread_id. 114 static void StartWatching(const WatchingParams& params); 115 116 // Return the |thread_id_| of the thread being watched. 117 content::BrowserThread::ID thread_id() const { return thread_id_; } 118 119 // Return the name of the thread being watched. 120 std::string thread_name() const { return thread_name_; } 121 122 // Return the sleep time between ping messages to be sent to the thread. 123 base::TimeDelta sleep_time() const { return sleep_time_; } 124 125 // Return the the wait time to check the responsiveness of the thread. 126 base::TimeDelta unresponsive_time() const { return unresponsive_time_; } 127 128 // Returns true if we are montioring the thread. 129 bool active() const { return active_; } 130 131 // Returns |ping_time_| (used by unit tests). 132 base::TimeTicks ping_time() const { return ping_time_; } 133 134 // Returns |ping_sequence_number_| (used by unit tests). 135 uint64 ping_sequence_number() const { return ping_sequence_number_; } 136 137 protected: 138 // Construct a ThreadWatcher for the given |thread_id|. |sleep_time| is the 139 // wait time between ping messages. |unresponsive_time| is the wait time after 140 // ping message is sent, to check if we have received pong message or not. 141 explicit ThreadWatcher(const WatchingParams& params); 142 143 virtual ~ThreadWatcher(); 144 145 // This method activates the thread watching which starts ping/pong messaging. 146 virtual void ActivateThreadWatching(); 147 148 // This method de-activates the thread watching and revokes all tasks. 149 virtual void DeActivateThreadWatching(); 150 151 // This will ensure that the watching is actively taking place, and awaken 152 // (i.e., post a PostPingMessage()) if the watcher has stopped pinging due to 153 // lack of user activity. It will also reset |ping_count_| to 154 // |unresponsive_threshold_|. 155 virtual void WakeUp(); 156 157 // This method records when ping message was sent and it will Post a task 158 // (OnPingMessage()) to the watched thread that does nothing but respond with 159 // OnPongMessage(). It also posts a task (OnCheckResponsiveness()) to check 160 // responsiveness of monitored thread that would be called after waiting 161 // |unresponsive_time_|. 162 // This method is accessible on WatchDogThread. 163 virtual void PostPingMessage(); 164 165 // This method handles a Pong Message from watched thread. It will track the 166 // response time (pong time minus ping time) via histograms. It posts a 167 // PostPingMessage() task that would be called after waiting |sleep_time_|. It 168 // increments |ping_sequence_number_| by 1. 169 // This method is accessible on WatchDogThread. 170 virtual void OnPongMessage(uint64 ping_sequence_number); 171 172 // This method will determine if the watched thread is responsive or not. If 173 // the latest |ping_sequence_number_| is not same as the 174 // |ping_sequence_number| that is passed in, then we can assume that watched 175 // thread has responded with a pong message. 176 // This method is accessible on WatchDogThread. 177 virtual void OnCheckResponsiveness(uint64 ping_sequence_number); 178 179 // Set by OnCheckResponsiveness when it determines if the watched thread is 180 // responsive or not. 181 bool responsive_; 182 183 private: 184 friend class ThreadWatcherList; 185 friend class CustomThreadWatcher; 186 187 // Allow tests to access our innards for testing purposes. 188 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); 189 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding); 190 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding); 191 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding); 192 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding); 193 194 // Post constructor initialization. 195 void Initialize(); 196 197 // Watched thread does nothing except post callback_task to the WATCHDOG 198 // Thread. This method is called on watched thread. 199 static void OnPingMessage(const content::BrowserThread::ID& thread_id, 200 const base::Closure& callback_task); 201 202 // This method resets |unresponsive_count_| to zero because watched thread is 203 // responding to the ping message with a pong message. 204 void ResetHangCounters(); 205 206 // This method records watched thread is not responding to the ping message. 207 // It increments |unresponsive_count_| by 1. 208 void GotNoResponse(); 209 210 // This method returns true if the watched thread has not responded with a 211 // pong message for |unresponsive_threshold_| number of ping messages. 212 bool IsVeryUnresponsive(); 213 214 // The |thread_id_| of the thread being watched. Only one instance can exist 215 // for the given |thread_id_| of the thread being watched. 216 const content::BrowserThread::ID thread_id_; 217 218 // The name of the thread being watched. 219 const std::string thread_name_; 220 221 // Used to post messages to watched thread. 222 scoped_refptr<base::MessageLoopProxy> watched_loop_; 223 224 // It is the sleep time between the receipt of a pong message back, and the 225 // sending of another ping message. 226 const base::TimeDelta sleep_time_; 227 228 // It is the duration from sending a ping message, until we check status to be 229 // sure a pong message has been returned. 230 const base::TimeDelta unresponsive_time_; 231 232 // This is the last time when ping message was sent. 233 base::TimeTicks ping_time_; 234 235 // This is the last time when we got pong message. 236 base::TimeTicks pong_time_; 237 238 // This is the sequence number of the next ping for which there is no pong. If 239 // the instance is sleeping, then it will be the sequence number for the next 240 // ping. 241 uint64 ping_sequence_number_; 242 243 // This is set to true if thread watcher is watching. 244 bool active_; 245 246 // The counter tracks least number of ping messages that will be sent to 247 // watched thread before the ping-pong mechanism will go into an extended 248 // sleep. If this value is zero, then the mechanism is in an extended sleep, 249 // and awaiting some observed user action before continuing. 250 int ping_count_; 251 252 // Histogram that keeps track of response times for the watched thread. 253 base::HistogramBase* response_time_histogram_; 254 255 // Histogram that keeps track of unresponsive time since the last pong message 256 // when we got no response (GotNoResponse()) from the watched thread. 257 base::HistogramBase* unresponsive_time_histogram_; 258 259 // Histogram that keeps track of how many threads are responding when we got 260 // no response (GotNoResponse()) from the watched thread. 261 base::HistogramBase* responsive_count_histogram_; 262 263 // Histogram that keeps track of how many threads are not responding when we 264 // got no response (GotNoResponse()) from the watched thread. Count includes 265 // the thread that got no response. 266 base::HistogramBase* unresponsive_count_histogram_; 267 268 // This counter tracks the unresponsiveness of watched thread. If this value 269 // is zero then watched thread has responded with a pong message. This is 270 // incremented by 1 when we got no response (GotNoResponse()) from the watched 271 // thread. 272 uint32 unresponsive_count_; 273 274 // This is set to true when we would have crashed the browser because the 275 // watched thread hasn't responded at least |unresponsive_threshold_| times. 276 // It is reset to false when watched thread responds with a pong message. 277 bool hung_processing_complete_; 278 279 // This is used to determine if the watched thread is responsive or not. If 280 // watched thread's |unresponsive_count_| is greater than or equal to 281 // |unresponsive_threshold_| then we would consider it as unresponsive. 282 uint32 unresponsive_threshold_; 283 284 // This is set to true if we want to crash the browser when the watched thread 285 // has become sufficiently unresponsive, while other threads are sufficiently 286 // responsive. 287 bool crash_on_hang_; 288 289 // This specifies the number of browser threads that are to be responsive when 290 // we want to crash the browser because watched thread has become sufficiently 291 // unresponsive. 292 uint32 live_threads_threshold_; 293 294 // We use this factory to create callback tasks for ThreadWatcher object. We 295 // use this during ping-pong messaging between WatchDog thread and watched 296 // thread. 297 base::WeakPtrFactory<ThreadWatcher> weak_ptr_factory_; 298 299 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); 300 }; 301 302 // Class with a list of all active thread watchers. A thread watcher is active 303 // if it has been registered, which includes determing the histogram name. This 304 // class provides utility functions to start and stop watching all browser 305 // threads. Only one instance of this class exists. 306 class ThreadWatcherList { 307 public: 308 // A map from BrowserThread to the actual instances. 309 typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList; 310 311 // A map from thread names (UI, IO, etc) to |CrashDataThresholds|. 312 // |live_threads_threshold| specifies the maximum number of browser threads 313 // that have to be responsive when we want to crash the browser because of 314 // hung watched thread. This threshold allows us to either look for a system 315 // deadlock, or look for a solo hung thread. A small live_threads_threshold 316 // looks for a broad deadlock (few browser threads left running), and a large 317 // threshold looks for a single hung thread (this in only appropriate for a 318 // thread that *should* never have much jank, such as the IO). 319 // 320 // |unresponsive_threshold| specifies the number of unanswered ping messages 321 // after which watched (UI, IO, etc) thread is considered as not responsive. 322 // We translate "time" (given in seconds) into a number of pings. As a result, 323 // we only declare a thread unresponsive when a lot of "time" has passed (many 324 // pings), and yet our pinging thread has continued to process messages (so we 325 // know the entire PC is not hung). Set this number higher to crash less 326 // often, and lower to crash more often. 327 // 328 // The map lists all threads (by name) that can induce a crash by hanging. It 329 // is populated from the command line, or given a default list. See 330 // InitializeAndStartWatching() for the separate list of all threads that are 331 // watched, as they provide the system context of how hung *other* threads 332 // are. 333 // 334 // ThreadWatcher monitors five browser threads (i.e., UI, IO, DB, FILE, 335 // and CACHE). Out of the 5 threads, any subset may be watched, to potentially 336 // cause a crash. The following example's command line causes exactly 3 337 // threads to be watched. 338 // 339 // The example command line argument consists of "UI:3:18,IO:3:18,FILE:5:90". 340 // In that string, the first parameter specifies the thread_id: UI, IO or 341 // FILE. The second parameter specifies |live_threads_threshold|. For UI and 342 // IO threads, we would crash if the number of threads responding is less than 343 // or equal to 3. The third parameter specifies the unresponsive threshold 344 // seconds. This number is used to calculate |unresponsive_threshold|. In this 345 // example for UI and IO threads, we would crash if those threads don't 346 // respond for 18 seconds (or 9 unanswered ping messages) and for FILE thread, 347 // crash_seconds is set to 90 seconds (or 45 unanswered ping messages). 348 // 349 // The following examples explain how the data in |CrashDataThresholds| 350 // controls the crashes. 351 // 352 // Example 1: If the |live_threads_threshold| value for "IO" was 3 and 353 // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9), 354 // then we would crash if the IO thread was hung (9 unanswered ping messages) 355 // and if at least one thread is responding and total responding threads is 356 // less than or equal to 3 (this thread, plus at least one other thread is 357 // unresponsive). We would not crash if none of the threads are responding, as 358 // we'd assume such large hang counts mean that the system is generally 359 // unresponsive. 360 // Example 2: If the |live_threads_threshold| value for "UI" was any number 361 // higher than 6 and unresponsive threshold seconds is 18 (or 362 // |unresponsive_threshold| is 9), then we would always crash if the UI thread 363 // was hung (9 unanswered ping messages), no matter what the other threads are 364 // doing. 365 // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and 366 // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45), 367 // then we would only crash if the FILE thread was the ONLY hung thread 368 // (because we watch 6 threads). If there was another unresponsive thread, we 369 // would not consider this a problem worth crashing for. FILE thread would be 370 // considered as hung if it didn't respond for 45 ping messages. 371 struct CrashDataThresholds { 372 CrashDataThresholds(uint32 live_threads_threshold, 373 uint32 unresponsive_threshold); 374 CrashDataThresholds(); 375 376 uint32 live_threads_threshold; 377 uint32 unresponsive_threshold; 378 }; 379 typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap; 380 381 // This method posts a task on WatchDogThread to start watching all browser 382 // threads. 383 // This method is accessible on UI thread. 384 static void StartWatchingAll(const base::CommandLine& command_line); 385 386 // This method posts a task on WatchDogThread to RevokeAll tasks and to 387 // deactive thread watching of other threads and tell NotificationService to 388 // stop calling Observe. 389 // This method is accessible on UI thread. 390 static void StopWatchingAll(); 391 392 // Register() stores a pointer to the given ThreadWatcher in a global map. 393 static void Register(ThreadWatcher* watcher); 394 395 // This method returns true if the ThreadWatcher object is registerd. 396 static bool IsRegistered(const content::BrowserThread::ID thread_id); 397 398 // This method returns number of responsive and unresponsive watched threads. 399 static void GetStatusOfThreads(uint32* responding_thread_count, 400 uint32* unresponding_thread_count); 401 402 // This will ensure that the watching is actively taking place, and awaken 403 // all thread watchers that are registered. 404 static void WakeUpAll(); 405 406 private: 407 // Allow tests to access our innards for testing purposes. 408 friend class CustomThreadWatcher; 409 friend class ThreadWatcherListTest; 410 friend class ThreadWatcherTest; 411 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherAndroidTest, 412 ApplicationStatusNotification); 413 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherListTest, Restart); 414 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs); 415 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs); 416 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs); 417 418 // This singleton holds the global list of registered ThreadWatchers. 419 ThreadWatcherList(); 420 421 // Destructor deletes all registered ThreadWatcher instances. 422 virtual ~ThreadWatcherList(); 423 424 // Parses the command line to get |crash_on_hang_threads| map from 425 // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of 426 // |crash_on_hang| thread's names to |CrashDataThresholds|. 427 static void ParseCommandLine( 428 const base::CommandLine& command_line, 429 uint32* unresponsive_threshold, 430 CrashOnHangThreadMap* crash_on_hang_threads); 431 432 // Parses the argument |crash_on_hang_thread_names| and creates 433 // |crash_on_hang_threads| map of |crash_on_hang| thread's names to 434 // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify 435 // |live_threads_threshold|, then it uses |default_live_threads_threshold| as 436 // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|, 437 // then it uses |default_crash_seconds| as the value. 438 static void ParseCommandLineCrashOnHangThreads( 439 const std::string& crash_on_hang_thread_names, 440 uint32 default_live_threads_threshold, 441 uint32 default_crash_seconds, 442 CrashOnHangThreadMap* crash_on_hang_threads); 443 444 // This constructs the |ThreadWatcherList| singleton and starts watching 445 // browser threads by calling StartWatching() on each browser thread that is 446 // watched. It disarms StartupTimeBomb. 447 static void InitializeAndStartWatching( 448 uint32 unresponsive_threshold, 449 const CrashOnHangThreadMap& crash_on_hang_threads); 450 451 // This method calls ThreadWatcher::StartWatching() to perform health check on 452 // the given |thread_id|. 453 static void StartWatching( 454 const content::BrowserThread::ID& thread_id, 455 const std::string& thread_name, 456 const base::TimeDelta& sleep_time, 457 const base::TimeDelta& unresponsive_time, 458 uint32 unresponsive_threshold, 459 const CrashOnHangThreadMap& crash_on_hang_threads); 460 461 // Delete all thread watcher objects and remove them from global map. It also 462 // deletes |g_thread_watcher_list_|. 463 static void DeleteAll(); 464 465 // The Find() method can be used to test to see if a given ThreadWatcher was 466 // already registered, or to retrieve a pointer to it from the global map. 467 static ThreadWatcher* Find(const content::BrowserThread::ID& thread_id); 468 469 // Sets |g_stopped_| on the WatchDogThread. This is necessary to reflect the 470 // state between the delayed |StartWatchingAll| and the immediate 471 // |StopWatchingAll|. 472 static void SetStopped(bool stopped); 473 474 // The singleton of this class and is used to keep track of information about 475 // threads that are being watched. 476 static ThreadWatcherList* g_thread_watcher_list_; 477 478 // StartWatchingAll() is delayed in relation to StopWatchingAll(), so if 479 // a Stop comes first, prevent further initialization. 480 static bool g_stopped_; 481 482 // This is the wait time between ping messages. 483 static const int kSleepSeconds; 484 485 // This is the wait time after ping message is sent, to check if we have 486 // received pong message or not. 487 static const int kUnresponsiveSeconds; 488 489 // Default values for |unresponsive_threshold|. 490 static const int kUnresponsiveCount; 491 492 // Default values for |live_threads_threshold|. 493 static const int kLiveThreadsThreshold; 494 495 // Default value for the delay until |InitializeAndStartWatching| is called. 496 // Non-const for tests. 497 static int g_initialize_delay_seconds; 498 499 // Map of all registered watched threads, from thread_id to ThreadWatcher. 500 RegistrationList registered_; 501 502 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); 503 }; 504 505 // This class ensures that the thread watching is actively taking place. Only 506 // one instance of this class exists. 507 class ThreadWatcherObserver : public content::NotificationObserver { 508 public: 509 // Registers |g_thread_watcher_observer_| as the Notifications observer. 510 // |wakeup_interval| specifies how often to wake up thread watchers. This 511 // method is accessible on UI thread. 512 static void SetupNotifications(const base::TimeDelta& wakeup_interval); 513 514 // Removes all ints from |registrar_| and deletes 515 // |g_thread_watcher_observer_|. This method is accessible on UI thread. 516 static void RemoveNotifications(); 517 518 private: 519 // Constructor of |g_thread_watcher_observer_| singleton. 520 explicit ThreadWatcherObserver(const base::TimeDelta& wakeup_interval); 521 522 // Destructor of |g_thread_watcher_observer_| singleton. 523 virtual ~ThreadWatcherObserver(); 524 525 // This ensures all thread watchers are active because there is some user 526 // activity. It will wake up all thread watchers every |wakeup_interval_| 527 // seconds. This is the implementation of content::NotificationObserver. When 528 // a matching notification is posted to the notification service, this method 529 // is called. 530 virtual void Observe(int type, 531 const content::NotificationSource& source, 532 const content::NotificationDetails& details) OVERRIDE; 533 534 // The singleton of this class. 535 static ThreadWatcherObserver* g_thread_watcher_observer_; 536 537 // The registrar that holds ints to be observed. 538 content::NotificationRegistrar registrar_; 539 540 // This is the last time when woke all thread watchers up. 541 base::TimeTicks last_wakeup_time_; 542 543 // It is the time interval between wake up calls to thread watchers. 544 const base::TimeDelta wakeup_interval_; 545 546 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherObserver); 547 }; 548 549 // Class for WatchDogThread and in its Init method, we start watching UI, IO, 550 // DB, FILE, CACHED threads. 551 class WatchDogThread : public base::Thread { 552 public: 553 // Constructor. 554 WatchDogThread(); 555 556 // Destroys the thread and stops the thread. 557 virtual ~WatchDogThread(); 558 559 // Callable on any thread. Returns whether you're currently on a 560 // WatchDogThread. 561 static bool CurrentlyOnWatchDogThread(); 562 563 // These are the same methods in message_loop.h, but are guaranteed to either 564 // get posted to the MessageLoop if it's still alive, or be deleted otherwise. 565 // They return true iff the watchdog thread existed and the task was posted. 566 // Note that even if the task is posted, there's no guarantee that it will 567 // run, since the target thread may already have a Quit message in its queue. 568 static bool PostTask(const tracked_objects::Location& from_here, 569 const base::Closure& task); 570 static bool PostDelayedTask(const tracked_objects::Location& from_here, 571 const base::Closure& task, 572 base::TimeDelta delay); 573 574 protected: 575 virtual void Init() OVERRIDE; 576 virtual void CleanUp() OVERRIDE; 577 578 private: 579 static bool PostTaskHelper( 580 const tracked_objects::Location& from_here, 581 const base::Closure& task, 582 base::TimeDelta delay); 583 584 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); 585 }; 586 587 // This is a wrapper class for getting the crash dumps of the hangs during 588 // startup. 589 class StartupTimeBomb { 590 public: 591 // This singleton is instantiated when the browser process is launched. 592 StartupTimeBomb(); 593 594 // Destructor disarm's startup_watchdog_ (if it is arm'ed) so that alarm 595 // doesn't go off. 596 ~StartupTimeBomb(); 597 598 // Constructs |startup_watchdog_| which spawns a thread and starts timer. 599 // |duration| specifies how long |startup_watchdog_| will wait before it 600 // calls alarm. 601 void Arm(const base::TimeDelta& duration); 602 603 // Disarms |startup_watchdog_| thread and then deletes it which stops the 604 // Watchdog thread. 605 void Disarm(); 606 607 // Disarms |g_startup_timebomb_|. 608 static void DisarmStartupTimeBomb(); 609 610 private: 611 // Deletes |startup_watchdog_| if it is joinable. If |startup_watchdog_| is 612 // not joinable, then it will post a delayed task to try again. 613 void DeleteStartupWatchdog(); 614 615 // The singleton of this class. 616 static StartupTimeBomb* g_startup_timebomb_; 617 618 // Watches for hangs during startup until it is disarm'ed. 619 base::Watchdog* startup_watchdog_; 620 621 // The |thread_id_| on which this object is constructed. 622 const base::PlatformThreadId thread_id_; 623 624 DISALLOW_COPY_AND_ASSIGN(StartupTimeBomb); 625 }; 626 627 // This is a wrapper class for detecting hangs during shutdown. 628 class ShutdownWatcherHelper { 629 public: 630 // Create an empty holder for |shutdown_watchdog_|. 631 ShutdownWatcherHelper(); 632 633 // Destructor disarm's shutdown_watchdog_ so that alarm doesn't go off. 634 ~ShutdownWatcherHelper(); 635 636 // Constructs ShutdownWatchDogThread which spawns a thread and starts timer. 637 // |duration| specifies how long it will wait before it calls alarm. 638 void Arm(const base::TimeDelta& duration); 639 640 private: 641 // shutdown_watchdog_ watches for hangs during shutdown. 642 base::Watchdog* shutdown_watchdog_; 643 644 // The |thread_id_| on which this object is constructed. 645 const base::PlatformThreadId thread_id_; 646 647 DISALLOW_COPY_AND_ASSIGN(ShutdownWatcherHelper); 648 }; 649 650 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 651