1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/metrics/thread_watcher.h" 6 7 #include <math.h> // ceil 8 9 #include "base/bind.h" 10 #include "base/compiler_specific.h" 11 #include "base/debug/alias.h" 12 #include "base/lazy_instance.h" 13 #include "base/strings/string_number_conversions.h" 14 #include "base/strings/string_split.h" 15 #include "base/strings/string_tokenizer.h" 16 #include "base/strings/stringprintf.h" 17 #include "base/threading/thread_restrictions.h" 18 #include "build/build_config.h" 19 #include "chrome/browser/metrics/metrics_service.h" 20 #include "chrome/common/chrome_switches.h" 21 #include "chrome/common/chrome_version_info.h" 22 #include "chrome/common/dump_without_crashing.h" 23 #include "chrome/common/logging_chrome.h" 24 25 #if defined(OS_WIN) 26 #include "base/win/windows_version.h" 27 #endif 28 29 using content::BrowserThread; 30 31 namespace { 32 33 // The following are unique function names for forcing the crash when a thread 34 // is unresponsive. This makes it possible to tell from the callstack alone what 35 // thread was unresponsive. 36 // 37 // We disable optimizations for this block of functions so the compiler doesn't 38 // merge them all together. 39 MSVC_DISABLE_OPTIMIZE() 40 MSVC_PUSH_DISABLE_WARNING(4748) 41 42 int* NullPointer() { 43 return reinterpret_cast<int*>(NULL); 44 } 45 46 void NullPointerCrash(int line_number) { 47 #ifndef NDEBUG 48 *NullPointer() = line_number; // Crash. 49 #else 50 logging::DumpWithoutCrashing(); 51 #endif 52 } 53 54 NOINLINE void ShutdownCrash() { 55 NullPointerCrash(__LINE__); 56 } 57 58 NOINLINE void ThreadUnresponsive_UI() { 59 NullPointerCrash(__LINE__); 60 } 61 62 NOINLINE void ThreadUnresponsive_DB() { 63 NullPointerCrash(__LINE__); 64 } 65 66 NOINLINE void ThreadUnresponsive_FILE() { 67 NullPointerCrash(__LINE__); 68 } 69 70 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 71 NullPointerCrash(__LINE__); 72 } 73 74 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 75 NullPointerCrash(__LINE__); 76 } 77 78 NOINLINE void ThreadUnresponsive_CACHE() { 79 NullPointerCrash(__LINE__); 80 } 81 82 NOINLINE void ThreadUnresponsive_IO() { 83 NullPointerCrash(__LINE__); 84 } 85 86 MSVC_POP_WARNING() 87 MSVC_ENABLE_OPTIMIZE(); 88 89 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 90 base::debug::Alias(&thread_id); 91 92 switch (thread_id) { 93 case BrowserThread::UI: 94 return ThreadUnresponsive_UI(); 95 case BrowserThread::DB: 96 return ThreadUnresponsive_DB(); 97 case BrowserThread::FILE: 98 return ThreadUnresponsive_FILE(); 99 case BrowserThread::FILE_USER_BLOCKING: 100 return ThreadUnresponsive_FILE_USER_BLOCKING(); 101 case BrowserThread::PROCESS_LAUNCHER: 102 return ThreadUnresponsive_PROCESS_LAUNCHER(); 103 case BrowserThread::CACHE: 104 return ThreadUnresponsive_CACHE(); 105 case BrowserThread::IO: 106 return ThreadUnresponsive_IO(); 107 case BrowserThread::ID_COUNT: 108 CHECK(false); // This shouldn't actually be reached! 109 break; 110 111 // Omission of the default hander is intentional -- that way the compiler 112 // should warn if our switch becomes outdated. 113 } 114 115 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 116 } 117 118 } // namespace 119 120 // ThreadWatcher methods and members. 121 ThreadWatcher::ThreadWatcher(const WatchingParams& params) 122 : thread_id_(params.thread_id), 123 thread_name_(params.thread_name), 124 watched_loop_( 125 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 126 sleep_time_(params.sleep_time), 127 unresponsive_time_(params.unresponsive_time), 128 ping_time_(base::TimeTicks::Now()), 129 pong_time_(ping_time_), 130 ping_sequence_number_(0), 131 active_(false), 132 ping_count_(params.unresponsive_threshold), 133 response_time_histogram_(NULL), 134 unresponsive_time_histogram_(NULL), 135 unresponsive_count_(0), 136 hung_processing_complete_(false), 137 unresponsive_threshold_(params.unresponsive_threshold), 138 crash_on_hang_(params.crash_on_hang), 139 live_threads_threshold_(params.live_threads_threshold), 140 weak_ptr_factory_(this) { 141 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 142 Initialize(); 143 } 144 145 ThreadWatcher::~ThreadWatcher() {} 146 147 // static 148 void ThreadWatcher::StartWatching(const WatchingParams& params) { 149 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 150 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 151 params.sleep_time.InMilliseconds()); 152 153 // If we are not on WatchDogThread, then post a task to call StartWatching on 154 // WatchDogThread. 155 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 156 WatchDogThread::PostTask( 157 FROM_HERE, 158 base::Bind(&ThreadWatcher::StartWatching, params)); 159 return; 160 } 161 162 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 163 164 // Create a new thread watcher object for the given thread and activate it. 165 ThreadWatcher* watcher = new ThreadWatcher(params); 166 167 DCHECK(watcher); 168 // If we couldn't register the thread watcher object, we are shutting down, 169 // then don't activate thread watching. 170 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 171 return; 172 watcher->ActivateThreadWatching(); 173 } 174 175 void ThreadWatcher::ActivateThreadWatching() { 176 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 177 if (active_) return; 178 active_ = true; 179 ping_count_ = unresponsive_threshold_; 180 ResetHangCounters(); 181 base::MessageLoop::current()->PostTask( 182 FROM_HERE, 183 base::Bind(&ThreadWatcher::PostPingMessage, 184 weak_ptr_factory_.GetWeakPtr())); 185 } 186 187 void ThreadWatcher::DeActivateThreadWatching() { 188 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 189 active_ = false; 190 ping_count_ = 0; 191 weak_ptr_factory_.InvalidateWeakPtrs(); 192 } 193 194 void ThreadWatcher::WakeUp() { 195 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 196 // There is some user activity, PostPingMessage task of thread watcher if 197 // needed. 198 if (!active_) return; 199 200 // Throw away the previous |unresponsive_count_| and start over again. Just 201 // before going to sleep, |unresponsive_count_| could be very close to 202 // |unresponsive_threshold_| and when user becomes active, 203 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 204 // response for ping messages. Reset |unresponsive_count_| to start measuring 205 // the unresponsiveness of the threads when system becomes active. 206 unresponsive_count_ = 0; 207 208 if (ping_count_ <= 0) { 209 ping_count_ = unresponsive_threshold_; 210 ResetHangCounters(); 211 PostPingMessage(); 212 } else { 213 ping_count_ = unresponsive_threshold_; 214 } 215 } 216 217 void ThreadWatcher::PostPingMessage() { 218 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 219 // If we have stopped watching or if the user is idle, then stop sending 220 // ping messages. 221 if (!active_ || ping_count_ <= 0) 222 return; 223 224 // Save the current time when we have sent ping message. 225 ping_time_ = base::TimeTicks::Now(); 226 227 // Send a ping message to the watched thread. Callback will be called on 228 // the WatchDogThread. 229 base::Closure callback( 230 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 231 ping_sequence_number_)); 232 if (watched_loop_->PostTask( 233 FROM_HERE, 234 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 235 callback))) { 236 // Post a task to check the responsiveness of watched thread. 237 base::MessageLoop::current()->PostDelayedTask( 238 FROM_HERE, 239 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 240 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 241 unresponsive_time_); 242 } else { 243 // Watched thread might have gone away, stop watching it. 244 DeActivateThreadWatching(); 245 } 246 } 247 248 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 249 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 250 251 // Record watched thread's response time. 252 base::TimeTicks now = base::TimeTicks::Now(); 253 base::TimeDelta response_time = now - ping_time_; 254 response_time_histogram_->AddTime(response_time); 255 256 // Save the current time when we have got pong message. 257 pong_time_ = now; 258 259 // Check if there are any extra pings in flight. 260 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 261 if (ping_sequence_number_ != ping_sequence_number) 262 return; 263 264 // Increment sequence number for the next ping message to indicate watched 265 // thread is responsive. 266 ++ping_sequence_number_; 267 268 // If we have stopped watching or if the user is idle, then stop sending 269 // ping messages. 270 if (!active_ || --ping_count_ <= 0) 271 return; 272 273 base::MessageLoop::current()->PostDelayedTask( 274 FROM_HERE, 275 base::Bind(&ThreadWatcher::PostPingMessage, 276 weak_ptr_factory_.GetWeakPtr()), 277 sleep_time_); 278 } 279 280 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 281 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 282 // If we have stopped watching then consider thread as responding. 283 if (!active_) { 284 responsive_ = true; 285 return; 286 } 287 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 288 // that is passed in, then we can assume OnPongMessage was called. 289 // OnPongMessage increments ping_sequence_number_. 290 if (ping_sequence_number_ != ping_sequence_number) { 291 // Reset unresponsive_count_ to zero because we got a response from the 292 // watched thread. 293 ResetHangCounters(); 294 295 responsive_ = true; 296 return; 297 } 298 // Record that we got no response from watched thread. 299 GotNoResponse(); 300 301 // Post a task to check the responsiveness of watched thread. 302 base::MessageLoop::current()->PostDelayedTask( 303 FROM_HERE, 304 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 305 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 306 unresponsive_time_); 307 responsive_ = false; 308 } 309 310 void ThreadWatcher::Initialize() { 311 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 312 ThreadWatcherList::Register(this); 313 314 const std::string response_time_histogram_name = 315 "ThreadWatcher.ResponseTime." + thread_name_; 316 response_time_histogram_ = base::Histogram::FactoryTimeGet( 317 response_time_histogram_name, 318 base::TimeDelta::FromMilliseconds(1), 319 base::TimeDelta::FromSeconds(100), 50, 320 base::Histogram::kUmaTargetedHistogramFlag); 321 322 const std::string unresponsive_time_histogram_name = 323 "ThreadWatcher.Unresponsive." + thread_name_; 324 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 325 unresponsive_time_histogram_name, 326 base::TimeDelta::FromMilliseconds(1), 327 base::TimeDelta::FromSeconds(100), 50, 328 base::Histogram::kUmaTargetedHistogramFlag); 329 330 const std::string responsive_count_histogram_name = 331 "ThreadWatcher.ResponsiveThreads." + thread_name_; 332 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 333 responsive_count_histogram_name, 1, 10, 11, 334 base::Histogram::kUmaTargetedHistogramFlag); 335 336 const std::string unresponsive_count_histogram_name = 337 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 338 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 339 unresponsive_count_histogram_name, 1, 10, 11, 340 base::Histogram::kUmaTargetedHistogramFlag); 341 } 342 343 // static 344 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 345 const base::Closure& callback_task) { 346 // This method is called on watched thread. 347 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 348 WatchDogThread::PostTask(FROM_HERE, callback_task); 349 } 350 351 void ThreadWatcher::ResetHangCounters() { 352 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 353 unresponsive_count_ = 0; 354 hung_processing_complete_ = false; 355 } 356 357 void ThreadWatcher::GotNoResponse() { 358 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 359 360 ++unresponsive_count_; 361 if (!IsVeryUnresponsive()) 362 return; 363 364 // Record total unresponsive_time since last pong message. 365 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 366 unresponsive_time_histogram_->AddTime(unresponse_time); 367 368 // We have already collected stats for the non-responding watched thread. 369 if (hung_processing_complete_) 370 return; 371 372 // Record how other threads are responding. 373 uint32 responding_thread_count = 0; 374 uint32 unresponding_thread_count = 0; 375 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 376 &unresponding_thread_count); 377 378 // Record how many watched threads are responding. 379 responsive_count_histogram_->Add(responding_thread_count); 380 381 // Record how many watched threads are not responding. 382 unresponsive_count_histogram_->Add(unresponding_thread_count); 383 384 // Crash the browser if the watched thread is to be crashed on hang and if the 385 // number of other threads responding is less than or equal to 386 // live_threads_threshold_ and at least one other thread is responding. 387 if (crash_on_hang_ && 388 responding_thread_count > 0 && 389 responding_thread_count <= live_threads_threshold_) { 390 static bool crashed_once = false; 391 if (!crashed_once) { 392 crashed_once = true; 393 CrashBecauseThreadWasUnresponsive(thread_id_); 394 } 395 } 396 397 hung_processing_complete_ = true; 398 } 399 400 bool ThreadWatcher::IsVeryUnresponsive() { 401 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 402 return unresponsive_count_ >= unresponsive_threshold_; 403 } 404 405 // ThreadWatcherList methods and members. 406 // 407 // static 408 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 409 // static 410 const int ThreadWatcherList::kSleepSeconds = 1; 411 // static 412 const int ThreadWatcherList::kUnresponsiveSeconds = 2; 413 // static 414 const int ThreadWatcherList::kUnresponsiveCount = 9; 415 // static 416 const int ThreadWatcherList::kLiveThreadsThreshold = 2; 417 418 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 419 uint32 live_threads_threshold, 420 uint32 unresponsive_threshold) 421 : live_threads_threshold(live_threads_threshold), 422 unresponsive_threshold(unresponsive_threshold) { 423 } 424 425 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 426 : live_threads_threshold(kLiveThreadsThreshold), 427 unresponsive_threshold(kUnresponsiveCount) { 428 } 429 430 // static 431 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 432 uint32 unresponsive_threshold; 433 CrashOnHangThreadMap crash_on_hang_threads; 434 ParseCommandLine(command_line, 435 &unresponsive_threshold, 436 &crash_on_hang_threads); 437 438 ThreadWatcherObserver::SetupNotifications( 439 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 440 441 WatchDogThread::PostDelayedTask( 442 FROM_HERE, 443 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 444 unresponsive_threshold, 445 crash_on_hang_threads), 446 base::TimeDelta::FromSeconds(120)); 447 } 448 449 // static 450 void ThreadWatcherList::StopWatchingAll() { 451 ThreadWatcherObserver::RemoveNotifications(); 452 DeleteAll(); 453 } 454 455 // static 456 void ThreadWatcherList::Register(ThreadWatcher* watcher) { 457 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 458 if (!g_thread_watcher_list_) 459 return; 460 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 461 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 462 } 463 464 // static 465 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 466 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 467 return NULL != ThreadWatcherList::Find(thread_id); 468 } 469 470 // static 471 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 472 uint32* unresponding_thread_count) { 473 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 474 *responding_thread_count = 0; 475 *unresponding_thread_count = 0; 476 if (!g_thread_watcher_list_) 477 return; 478 479 for (RegistrationList::iterator it = 480 g_thread_watcher_list_->registered_.begin(); 481 g_thread_watcher_list_->registered_.end() != it; 482 ++it) { 483 if (it->second->IsVeryUnresponsive()) 484 ++(*unresponding_thread_count); 485 else 486 ++(*responding_thread_count); 487 } 488 } 489 490 // static 491 void ThreadWatcherList::WakeUpAll() { 492 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 493 if (!g_thread_watcher_list_) 494 return; 495 496 for (RegistrationList::iterator it = 497 g_thread_watcher_list_->registered_.begin(); 498 g_thread_watcher_list_->registered_.end() != it; 499 ++it) 500 it->second->WakeUp(); 501 } 502 503 ThreadWatcherList::ThreadWatcherList() { 504 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 505 CHECK(!g_thread_watcher_list_); 506 g_thread_watcher_list_ = this; 507 } 508 509 ThreadWatcherList::~ThreadWatcherList() { 510 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 511 DCHECK(this == g_thread_watcher_list_); 512 g_thread_watcher_list_ = NULL; 513 } 514 515 // static 516 void ThreadWatcherList::ParseCommandLine( 517 const CommandLine& command_line, 518 uint32* unresponsive_threshold, 519 CrashOnHangThreadMap* crash_on_hang_threads) { 520 // Initialize |unresponsive_threshold| to a default value. 521 *unresponsive_threshold = kUnresponsiveCount; 522 523 // Increase the unresponsive_threshold on the Stable and Beta channels to 524 // reduce the number of crashes due to ThreadWatcher. 525 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 526 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 527 *unresponsive_threshold *= 4; 528 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 529 *unresponsive_threshold *= 2; 530 } 531 532 #if defined(OS_WIN) 533 // For Windows XP (old systems), double the unresponsive_threshold to give 534 // the OS a chance to schedule UI/IO threads a time slice to respond with a 535 // pong message (to get around limitations with the OS). 536 if (base::win::GetVersion() <= base::win::VERSION_XP) 537 *unresponsive_threshold *= 2; 538 #endif 539 540 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 541 std::string crash_on_hang_thread_names; 542 bool has_command_line_overwrite = false; 543 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 544 crash_on_hang_thread_names = 545 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 546 has_command_line_overwrite = true; 547 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 548 // Default to crashing the browser if UI or IO or FILE threads are not 549 // responsive except in stable channel. 550 crash_on_hang_thread_names = base::StringPrintf( 551 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 552 kLiveThreadsThreshold, crash_seconds, 553 kLiveThreadsThreshold, crash_seconds, 554 kLiveThreadsThreshold, crash_seconds * 5); 555 } 556 557 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 558 kLiveThreadsThreshold, 559 crash_seconds, 560 crash_on_hang_threads); 561 562 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 563 has_command_line_overwrite) { 564 return; 565 } 566 567 // Set up a field trial for 100% of the users to crash if either UI or IO 568 // thread is not responsive for 30 seconds (or 15 pings). 569 scoped_refptr<base::FieldTrial> field_trial( 570 base::FieldTrialList::FactoryGetFieldTrial( 571 "ThreadWatcher", 100, "default_hung_threads", 572 2013, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 573 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 574 if (field_trial->group() == hung_thread_group) { 575 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 576 crash_on_hang_threads->end() != it; 577 ++it) { 578 if (it->first != "IO") 579 continue; 580 it->second.live_threads_threshold = INT_MAX; 581 it->second.unresponsive_threshold = 15; 582 } 583 } 584 } 585 586 // static 587 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 588 const std::string& crash_on_hang_thread_names, 589 uint32 default_live_threads_threshold, 590 uint32 default_crash_seconds, 591 CrashOnHangThreadMap* crash_on_hang_threads) { 592 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 593 std::vector<std::string> values; 594 while (tokens.GetNext()) { 595 const std::string& token = tokens.token(); 596 base::SplitString(token, ':', &values); 597 std::string thread_name = values[0]; 598 599 uint32 live_threads_threshold = default_live_threads_threshold; 600 uint32 crash_seconds = default_crash_seconds; 601 if (values.size() >= 2 && 602 (!base::StringToUint(values[1], &live_threads_threshold))) { 603 continue; 604 } 605 if (values.size() >= 3 && 606 (!base::StringToUint(values[2], &crash_seconds))) { 607 continue; 608 } 609 uint32 unresponsive_threshold = static_cast<uint32>( 610 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 611 612 CrashDataThresholds crash_data(live_threads_threshold, 613 unresponsive_threshold); 614 // Use the last specifier. 615 (*crash_on_hang_threads)[thread_name] = crash_data; 616 } 617 } 618 619 // static 620 void ThreadWatcherList::InitializeAndStartWatching( 621 uint32 unresponsive_threshold, 622 const CrashOnHangThreadMap& crash_on_hang_threads) { 623 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 624 625 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 626 CHECK(thread_watcher_list); 627 628 BrowserThread::PostTask( 629 BrowserThread::UI, 630 FROM_HERE, 631 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 632 633 const base::TimeDelta kSleepTime = 634 base::TimeDelta::FromSeconds(kSleepSeconds); 635 const base::TimeDelta kUnresponsiveTime = 636 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 637 638 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 639 unresponsive_threshold, crash_on_hang_threads); 640 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 641 unresponsive_threshold, crash_on_hang_threads); 642 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 643 unresponsive_threshold, crash_on_hang_threads); 644 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 645 unresponsive_threshold, crash_on_hang_threads); 646 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 647 unresponsive_threshold, crash_on_hang_threads); 648 } 649 650 // static 651 void ThreadWatcherList::StartWatching( 652 const BrowserThread::ID& thread_id, 653 const std::string& thread_name, 654 const base::TimeDelta& sleep_time, 655 const base::TimeDelta& unresponsive_time, 656 uint32 unresponsive_threshold, 657 const CrashOnHangThreadMap& crash_on_hang_threads) { 658 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 659 660 CrashOnHangThreadMap::const_iterator it = 661 crash_on_hang_threads.find(thread_name); 662 bool crash_on_hang = false; 663 uint32 live_threads_threshold = 0; 664 if (it != crash_on_hang_threads.end()) { 665 crash_on_hang = true; 666 live_threads_threshold = it->second.live_threads_threshold; 667 unresponsive_threshold = it->second.unresponsive_threshold; 668 } 669 670 ThreadWatcher::StartWatching( 671 ThreadWatcher::WatchingParams(thread_id, 672 thread_name, 673 sleep_time, 674 unresponsive_time, 675 unresponsive_threshold, 676 crash_on_hang, 677 live_threads_threshold)); 678 } 679 680 // static 681 void ThreadWatcherList::DeleteAll() { 682 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 683 WatchDogThread::PostTask( 684 FROM_HERE, 685 base::Bind(&ThreadWatcherList::DeleteAll)); 686 return; 687 } 688 689 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 690 if (!g_thread_watcher_list_) 691 return; 692 693 // Delete all thread watcher objects. 694 while (!g_thread_watcher_list_->registered_.empty()) { 695 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 696 delete it->second; 697 g_thread_watcher_list_->registered_.erase(it); 698 } 699 700 delete g_thread_watcher_list_; 701 } 702 703 // static 704 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 705 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 706 if (!g_thread_watcher_list_) 707 return NULL; 708 RegistrationList::iterator it = 709 g_thread_watcher_list_->registered_.find(thread_id); 710 if (g_thread_watcher_list_->registered_.end() == it) 711 return NULL; 712 return it->second; 713 } 714 715 // ThreadWatcherObserver methods and members. 716 // 717 // static 718 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 719 720 ThreadWatcherObserver::ThreadWatcherObserver( 721 const base::TimeDelta& wakeup_interval) 722 : last_wakeup_time_(base::TimeTicks::Now()), 723 wakeup_interval_(wakeup_interval) { 724 CHECK(!g_thread_watcher_observer_); 725 g_thread_watcher_observer_ = this; 726 } 727 728 ThreadWatcherObserver::~ThreadWatcherObserver() { 729 DCHECK(this == g_thread_watcher_observer_); 730 g_thread_watcher_observer_ = NULL; 731 } 732 733 // static 734 void ThreadWatcherObserver::SetupNotifications( 735 const base::TimeDelta& wakeup_interval) { 736 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 737 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 738 MetricsService::SetUpNotifications(&observer->registrar_, observer); 739 } 740 741 // static 742 void ThreadWatcherObserver::RemoveNotifications() { 743 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 744 if (!g_thread_watcher_observer_) 745 return; 746 g_thread_watcher_observer_->registrar_.RemoveAll(); 747 delete g_thread_watcher_observer_; 748 } 749 750 void ThreadWatcherObserver::Observe( 751 int type, 752 const content::NotificationSource& source, 753 const content::NotificationDetails& details) { 754 // There is some user activity, see if thread watchers are to be awakened. 755 base::TimeTicks now = base::TimeTicks::Now(); 756 if ((now - last_wakeup_time_) < wakeup_interval_) 757 return; 758 last_wakeup_time_ = now; 759 WatchDogThread::PostTask( 760 FROM_HERE, 761 base::Bind(&ThreadWatcherList::WakeUpAll)); 762 } 763 764 // WatchDogThread methods and members. 765 766 // This lock protects g_watchdog_thread. 767 static base::LazyInstance<base::Lock>::Leaky 768 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 769 770 // The singleton of this class. 771 static WatchDogThread* g_watchdog_thread = NULL; 772 773 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 774 } 775 776 WatchDogThread::~WatchDogThread() { 777 Stop(); 778 } 779 780 // static 781 bool WatchDogThread::CurrentlyOnWatchDogThread() { 782 base::AutoLock lock(g_watchdog_lock.Get()); 783 return g_watchdog_thread && 784 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 785 } 786 787 // static 788 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 789 const base::Closure& task) { 790 return PostTaskHelper(from_here, task, base::TimeDelta()); 791 } 792 793 // static 794 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 795 const base::Closure& task, 796 base::TimeDelta delay) { 797 return PostTaskHelper(from_here, task, delay); 798 } 799 800 // static 801 bool WatchDogThread::PostTaskHelper( 802 const tracked_objects::Location& from_here, 803 const base::Closure& task, 804 base::TimeDelta delay) { 805 { 806 base::AutoLock lock(g_watchdog_lock.Get()); 807 808 base::MessageLoop* message_loop = g_watchdog_thread ? 809 g_watchdog_thread->message_loop() : NULL; 810 if (message_loop) { 811 message_loop->PostDelayedTask(from_here, task, delay); 812 return true; 813 } 814 } 815 816 return false; 817 } 818 819 void WatchDogThread::Init() { 820 // This thread shouldn't be allowed to perform any blocking disk I/O. 821 base::ThreadRestrictions::SetIOAllowed(false); 822 823 base::AutoLock lock(g_watchdog_lock.Get()); 824 CHECK(!g_watchdog_thread); 825 g_watchdog_thread = this; 826 } 827 828 void WatchDogThread::CleanUp() { 829 base::AutoLock lock(g_watchdog_lock.Get()); 830 g_watchdog_thread = NULL; 831 } 832 833 namespace { 834 835 // StartupWatchDogThread methods and members. 836 // 837 // Class for detecting hangs during startup. 838 class StartupWatchDogThread : public base::Watchdog { 839 public: 840 // Constructor specifies how long the StartupWatchDogThread will wait before 841 // alarming. 842 explicit StartupWatchDogThread(const base::TimeDelta& duration) 843 : base::Watchdog(duration, "Startup watchdog thread", true) { 844 } 845 846 // Alarm is called if the time expires after an Arm() without someone calling 847 // Disarm(). When Alarm goes off, in release mode we get the crash dump 848 // without crashing and in debug mode we break into the debugger. 849 virtual void Alarm() OVERRIDE { 850 #ifndef NDEBUG 851 DCHECK(false); 852 #else 853 logging::DumpWithoutCrashing(); 854 #endif 855 } 856 857 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 858 }; 859 860 // ShutdownWatchDogThread methods and members. 861 // 862 // Class for detecting hangs during shutdown. 863 class ShutdownWatchDogThread : public base::Watchdog { 864 public: 865 // Constructor specifies how long the ShutdownWatchDogThread will wait before 866 // alarming. 867 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 868 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 869 } 870 871 // Alarm is called if the time expires after an Arm() without someone calling 872 // Disarm(). We crash the browser if this method is called. 873 virtual void Alarm() OVERRIDE { 874 ShutdownCrash(); 875 } 876 877 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 878 }; 879 } // namespace 880 881 // StartupTimeBomb methods and members. 882 // 883 // static 884 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 885 886 StartupTimeBomb::StartupTimeBomb() 887 : startup_watchdog_(NULL), 888 thread_id_(base::PlatformThread::CurrentId()) { 889 CHECK(!g_startup_timebomb_); 890 g_startup_timebomb_ = this; 891 } 892 893 StartupTimeBomb::~StartupTimeBomb() { 894 DCHECK(this == g_startup_timebomb_); 895 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 896 if (startup_watchdog_) 897 Disarm(); 898 g_startup_timebomb_ = NULL; 899 } 900 901 void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 902 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 903 DCHECK(!startup_watchdog_); 904 // TODO(rtenneti): http://crbug.com/112970. Don't arm the startup timebomb 905 // until we fix breakpad code not to crash in logging::DumpWithoutCrashing(). 906 // startup_watchdog_ = new StartupWatchDogThread(duration); 907 // startup_watchdog_->Arm(); 908 return; 909 } 910 911 void StartupTimeBomb::Disarm() { 912 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 913 if (startup_watchdog_) { 914 startup_watchdog_->Disarm(); 915 startup_watchdog_->Cleanup(); 916 DeleteStartupWatchdog(); 917 } 918 } 919 920 void StartupTimeBomb::DeleteStartupWatchdog() { 921 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 922 if (startup_watchdog_->IsJoinable()) { 923 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 924 // very fast. 925 base::ThreadRestrictions::SetIOAllowed(true); 926 delete startup_watchdog_; 927 startup_watchdog_ = NULL; 928 return; 929 } 930 base::MessageLoop::current()->PostDelayedTask( 931 FROM_HERE, 932 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 933 base::Unretained(this)), 934 base::TimeDelta::FromSeconds(10)); 935 } 936 937 // static 938 void StartupTimeBomb::DisarmStartupTimeBomb() { 939 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 940 if (g_startup_timebomb_) 941 g_startup_timebomb_->Disarm(); 942 } 943 944 // ShutdownWatcherHelper methods and members. 945 // 946 // ShutdownWatcherHelper is a wrapper class for detecting hangs during 947 // shutdown. 948 ShutdownWatcherHelper::ShutdownWatcherHelper() 949 : shutdown_watchdog_(NULL), 950 thread_id_(base::PlatformThread::CurrentId()) { 951 } 952 953 ShutdownWatcherHelper::~ShutdownWatcherHelper() { 954 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 955 if (shutdown_watchdog_) { 956 shutdown_watchdog_->Disarm(); 957 delete shutdown_watchdog_; 958 shutdown_watchdog_ = NULL; 959 } 960 } 961 962 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 963 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 964 DCHECK(!shutdown_watchdog_); 965 base::TimeDelta actual_duration = duration; 966 967 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 968 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 969 actual_duration *= 20; 970 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 971 channel == chrome::VersionInfo::CHANNEL_DEV) { 972 actual_duration *= 10; 973 } 974 975 #if defined(OS_WIN) 976 // On Windows XP, give twice the time for shutdown. 977 if (base::win::GetVersion() <= base::win::VERSION_XP) 978 actual_duration *= 2; 979 #endif 980 981 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 982 shutdown_watchdog_->Arm(); 983 } 984