1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/metrics/thread_watcher.h" 6 7 #include <math.h> // ceil 8 9 #include "base/bind.h" 10 #include "base/compiler_specific.h" 11 #include "base/debug/alias.h" 12 #include "base/lazy_instance.h" 13 #include "base/strings/string_number_conversions.h" 14 #include "base/strings/string_split.h" 15 #include "base/strings/string_tokenizer.h" 16 #include "base/strings/stringprintf.h" 17 #include "base/threading/thread_restrictions.h" 18 #include "build/build_config.h" 19 #include "chrome/browser/metrics/metrics_service.h" 20 #include "chrome/common/chrome_switches.h" 21 #include "chrome/common/chrome_version_info.h" 22 #include "chrome/common/dump_without_crashing.h" 23 #include "chrome/common/logging_chrome.h" 24 25 #if defined(OS_WIN) 26 #include "base/win/windows_version.h" 27 #endif 28 29 using content::BrowserThread; 30 31 namespace { 32 33 // The following are unique function names for forcing the crash when a thread 34 // is unresponsive. This makes it possible to tell from the callstack alone what 35 // thread was unresponsive. 36 // 37 // We disable optimizations for this block of functions so the compiler doesn't 38 // merge them all together. 39 MSVC_DISABLE_OPTIMIZE() 40 MSVC_PUSH_DISABLE_WARNING(4748) 41 42 #ifndef NDEBUG 43 int* NullPointer() { 44 return reinterpret_cast<int*>(NULL); 45 } 46 #endif 47 48 void NullPointerCrash(int line_number) { 49 #ifndef NDEBUG 50 *NullPointer() = line_number; // Crash. 51 #else 52 logging::DumpWithoutCrashing(); 53 #endif 54 } 55 56 NOINLINE void ShutdownCrash() { 57 NullPointerCrash(__LINE__); 58 } 59 60 NOINLINE void ThreadUnresponsive_UI() { 61 NullPointerCrash(__LINE__); 62 } 63 64 NOINLINE void ThreadUnresponsive_DB() { 65 NullPointerCrash(__LINE__); 66 } 67 68 NOINLINE void ThreadUnresponsive_FILE() { 69 NullPointerCrash(__LINE__); 70 } 71 72 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 73 NullPointerCrash(__LINE__); 74 } 75 76 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 77 NullPointerCrash(__LINE__); 78 } 79 80 NOINLINE void ThreadUnresponsive_CACHE() { 81 NullPointerCrash(__LINE__); 82 } 83 84 NOINLINE void ThreadUnresponsive_IO() { 85 NullPointerCrash(__LINE__); 86 } 87 88 MSVC_POP_WARNING() 89 MSVC_ENABLE_OPTIMIZE(); 90 91 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 92 base::debug::Alias(&thread_id); 93 94 switch (thread_id) { 95 case BrowserThread::UI: 96 return ThreadUnresponsive_UI(); 97 case BrowserThread::DB: 98 return ThreadUnresponsive_DB(); 99 case BrowserThread::FILE: 100 return ThreadUnresponsive_FILE(); 101 case BrowserThread::FILE_USER_BLOCKING: 102 return ThreadUnresponsive_FILE_USER_BLOCKING(); 103 case BrowserThread::PROCESS_LAUNCHER: 104 return ThreadUnresponsive_PROCESS_LAUNCHER(); 105 case BrowserThread::CACHE: 106 return ThreadUnresponsive_CACHE(); 107 case BrowserThread::IO: 108 return ThreadUnresponsive_IO(); 109 case BrowserThread::ID_COUNT: 110 CHECK(false); // This shouldn't actually be reached! 111 break; 112 113 // Omission of the default hander is intentional -- that way the compiler 114 // should warn if our switch becomes outdated. 115 } 116 117 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 118 } 119 120 } // namespace 121 122 // ThreadWatcher methods and members. 123 ThreadWatcher::ThreadWatcher(const WatchingParams& params) 124 : thread_id_(params.thread_id), 125 thread_name_(params.thread_name), 126 watched_loop_( 127 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 128 sleep_time_(params.sleep_time), 129 unresponsive_time_(params.unresponsive_time), 130 ping_time_(base::TimeTicks::Now()), 131 pong_time_(ping_time_), 132 ping_sequence_number_(0), 133 active_(false), 134 ping_count_(params.unresponsive_threshold), 135 response_time_histogram_(NULL), 136 unresponsive_time_histogram_(NULL), 137 unresponsive_count_(0), 138 hung_processing_complete_(false), 139 unresponsive_threshold_(params.unresponsive_threshold), 140 crash_on_hang_(params.crash_on_hang), 141 live_threads_threshold_(params.live_threads_threshold), 142 weak_ptr_factory_(this) { 143 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 144 Initialize(); 145 } 146 147 ThreadWatcher::~ThreadWatcher() {} 148 149 // static 150 void ThreadWatcher::StartWatching(const WatchingParams& params) { 151 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 152 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 153 params.sleep_time.InMilliseconds()); 154 155 // If we are not on WatchDogThread, then post a task to call StartWatching on 156 // WatchDogThread. 157 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 158 WatchDogThread::PostTask( 159 FROM_HERE, 160 base::Bind(&ThreadWatcher::StartWatching, params)); 161 return; 162 } 163 164 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 165 166 // Create a new thread watcher object for the given thread and activate it. 167 ThreadWatcher* watcher = new ThreadWatcher(params); 168 169 DCHECK(watcher); 170 // If we couldn't register the thread watcher object, we are shutting down, 171 // then don't activate thread watching. 172 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 173 return; 174 watcher->ActivateThreadWatching(); 175 } 176 177 void ThreadWatcher::ActivateThreadWatching() { 178 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 179 if (active_) return; 180 active_ = true; 181 ping_count_ = unresponsive_threshold_; 182 ResetHangCounters(); 183 base::MessageLoop::current()->PostTask( 184 FROM_HERE, 185 base::Bind(&ThreadWatcher::PostPingMessage, 186 weak_ptr_factory_.GetWeakPtr())); 187 } 188 189 void ThreadWatcher::DeActivateThreadWatching() { 190 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 191 active_ = false; 192 ping_count_ = 0; 193 weak_ptr_factory_.InvalidateWeakPtrs(); 194 } 195 196 void ThreadWatcher::WakeUp() { 197 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 198 // There is some user activity, PostPingMessage task of thread watcher if 199 // needed. 200 if (!active_) return; 201 202 // Throw away the previous |unresponsive_count_| and start over again. Just 203 // before going to sleep, |unresponsive_count_| could be very close to 204 // |unresponsive_threshold_| and when user becomes active, 205 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 206 // response for ping messages. Reset |unresponsive_count_| to start measuring 207 // the unresponsiveness of the threads when system becomes active. 208 unresponsive_count_ = 0; 209 210 if (ping_count_ <= 0) { 211 ping_count_ = unresponsive_threshold_; 212 ResetHangCounters(); 213 PostPingMessage(); 214 } else { 215 ping_count_ = unresponsive_threshold_; 216 } 217 } 218 219 void ThreadWatcher::PostPingMessage() { 220 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 221 // If we have stopped watching or if the user is idle, then stop sending 222 // ping messages. 223 if (!active_ || ping_count_ <= 0) 224 return; 225 226 // Save the current time when we have sent ping message. 227 ping_time_ = base::TimeTicks::Now(); 228 229 // Send a ping message to the watched thread. Callback will be called on 230 // the WatchDogThread. 231 base::Closure callback( 232 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 233 ping_sequence_number_)); 234 if (watched_loop_->PostTask( 235 FROM_HERE, 236 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 237 callback))) { 238 // Post a task to check the responsiveness of watched thread. 239 base::MessageLoop::current()->PostDelayedTask( 240 FROM_HERE, 241 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 242 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 243 unresponsive_time_); 244 } else { 245 // Watched thread might have gone away, stop watching it. 246 DeActivateThreadWatching(); 247 } 248 } 249 250 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 251 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 252 253 // Record watched thread's response time. 254 base::TimeTicks now = base::TimeTicks::Now(); 255 base::TimeDelta response_time = now - ping_time_; 256 response_time_histogram_->AddTime(response_time); 257 258 // Save the current time when we have got pong message. 259 pong_time_ = now; 260 261 // Check if there are any extra pings in flight. 262 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 263 if (ping_sequence_number_ != ping_sequence_number) 264 return; 265 266 // Increment sequence number for the next ping message to indicate watched 267 // thread is responsive. 268 ++ping_sequence_number_; 269 270 // If we have stopped watching or if the user is idle, then stop sending 271 // ping messages. 272 if (!active_ || --ping_count_ <= 0) 273 return; 274 275 base::MessageLoop::current()->PostDelayedTask( 276 FROM_HERE, 277 base::Bind(&ThreadWatcher::PostPingMessage, 278 weak_ptr_factory_.GetWeakPtr()), 279 sleep_time_); 280 } 281 282 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 283 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 284 // If we have stopped watching then consider thread as responding. 285 if (!active_) { 286 responsive_ = true; 287 return; 288 } 289 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 290 // that is passed in, then we can assume OnPongMessage was called. 291 // OnPongMessage increments ping_sequence_number_. 292 if (ping_sequence_number_ != ping_sequence_number) { 293 // Reset unresponsive_count_ to zero because we got a response from the 294 // watched thread. 295 ResetHangCounters(); 296 297 responsive_ = true; 298 return; 299 } 300 // Record that we got no response from watched thread. 301 GotNoResponse(); 302 303 // Post a task to check the responsiveness of watched thread. 304 base::MessageLoop::current()->PostDelayedTask( 305 FROM_HERE, 306 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 307 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 308 unresponsive_time_); 309 responsive_ = false; 310 } 311 312 void ThreadWatcher::Initialize() { 313 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 314 ThreadWatcherList::Register(this); 315 316 const std::string response_time_histogram_name = 317 "ThreadWatcher.ResponseTime." + thread_name_; 318 response_time_histogram_ = base::Histogram::FactoryTimeGet( 319 response_time_histogram_name, 320 base::TimeDelta::FromMilliseconds(1), 321 base::TimeDelta::FromSeconds(100), 50, 322 base::Histogram::kUmaTargetedHistogramFlag); 323 324 const std::string unresponsive_time_histogram_name = 325 "ThreadWatcher.Unresponsive." + thread_name_; 326 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 327 unresponsive_time_histogram_name, 328 base::TimeDelta::FromMilliseconds(1), 329 base::TimeDelta::FromSeconds(100), 50, 330 base::Histogram::kUmaTargetedHistogramFlag); 331 332 const std::string responsive_count_histogram_name = 333 "ThreadWatcher.ResponsiveThreads." + thread_name_; 334 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 335 responsive_count_histogram_name, 1, 10, 11, 336 base::Histogram::kUmaTargetedHistogramFlag); 337 338 const std::string unresponsive_count_histogram_name = 339 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 340 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 341 unresponsive_count_histogram_name, 1, 10, 11, 342 base::Histogram::kUmaTargetedHistogramFlag); 343 } 344 345 // static 346 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 347 const base::Closure& callback_task) { 348 // This method is called on watched thread. 349 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 350 WatchDogThread::PostTask(FROM_HERE, callback_task); 351 } 352 353 void ThreadWatcher::ResetHangCounters() { 354 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 355 unresponsive_count_ = 0; 356 hung_processing_complete_ = false; 357 } 358 359 void ThreadWatcher::GotNoResponse() { 360 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 361 362 ++unresponsive_count_; 363 if (!IsVeryUnresponsive()) 364 return; 365 366 // Record total unresponsive_time since last pong message. 367 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 368 unresponsive_time_histogram_->AddTime(unresponse_time); 369 370 // We have already collected stats for the non-responding watched thread. 371 if (hung_processing_complete_) 372 return; 373 374 // Record how other threads are responding. 375 uint32 responding_thread_count = 0; 376 uint32 unresponding_thread_count = 0; 377 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 378 &unresponding_thread_count); 379 380 // Record how many watched threads are responding. 381 responsive_count_histogram_->Add(responding_thread_count); 382 383 // Record how many watched threads are not responding. 384 unresponsive_count_histogram_->Add(unresponding_thread_count); 385 386 // Crash the browser if the watched thread is to be crashed on hang and if the 387 // number of other threads responding is less than or equal to 388 // live_threads_threshold_ and at least one other thread is responding. 389 if (crash_on_hang_ && 390 responding_thread_count > 0 && 391 responding_thread_count <= live_threads_threshold_) { 392 static bool crashed_once = false; 393 if (!crashed_once) { 394 crashed_once = true; 395 CrashBecauseThreadWasUnresponsive(thread_id_); 396 } 397 } 398 399 hung_processing_complete_ = true; 400 } 401 402 bool ThreadWatcher::IsVeryUnresponsive() { 403 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 404 return unresponsive_count_ >= unresponsive_threshold_; 405 } 406 407 // ThreadWatcherList methods and members. 408 // 409 // static 410 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 411 // static 412 const int ThreadWatcherList::kSleepSeconds = 1; 413 // static 414 const int ThreadWatcherList::kUnresponsiveSeconds = 2; 415 // static 416 const int ThreadWatcherList::kUnresponsiveCount = 9; 417 // static 418 const int ThreadWatcherList::kLiveThreadsThreshold = 2; 419 420 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 421 uint32 live_threads_threshold, 422 uint32 unresponsive_threshold) 423 : live_threads_threshold(live_threads_threshold), 424 unresponsive_threshold(unresponsive_threshold) { 425 } 426 427 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 428 : live_threads_threshold(kLiveThreadsThreshold), 429 unresponsive_threshold(kUnresponsiveCount) { 430 } 431 432 // static 433 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 434 // TODO(rtenneti): Enable ThreadWatcher. 435 uint32 unresponsive_threshold; 436 CrashOnHangThreadMap crash_on_hang_threads; 437 ParseCommandLine(command_line, 438 &unresponsive_threshold, 439 &crash_on_hang_threads); 440 441 ThreadWatcherObserver::SetupNotifications( 442 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 443 444 WatchDogThread::PostDelayedTask( 445 FROM_HERE, 446 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 447 unresponsive_threshold, 448 crash_on_hang_threads), 449 base::TimeDelta::FromSeconds(120)); 450 } 451 452 // static 453 void ThreadWatcherList::StopWatchingAll() { 454 // TODO(rtenneti): Enable ThreadWatcher. 455 ThreadWatcherObserver::RemoveNotifications(); 456 DeleteAll(); 457 } 458 459 // static 460 void ThreadWatcherList::Register(ThreadWatcher* watcher) { 461 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 462 if (!g_thread_watcher_list_) 463 return; 464 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 465 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 466 } 467 468 // static 469 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 470 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 471 return NULL != ThreadWatcherList::Find(thread_id); 472 } 473 474 // static 475 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 476 uint32* unresponding_thread_count) { 477 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 478 *responding_thread_count = 0; 479 *unresponding_thread_count = 0; 480 if (!g_thread_watcher_list_) 481 return; 482 483 for (RegistrationList::iterator it = 484 g_thread_watcher_list_->registered_.begin(); 485 g_thread_watcher_list_->registered_.end() != it; 486 ++it) { 487 if (it->second->IsVeryUnresponsive()) 488 ++(*unresponding_thread_count); 489 else 490 ++(*responding_thread_count); 491 } 492 } 493 494 // static 495 void ThreadWatcherList::WakeUpAll() { 496 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 497 if (!g_thread_watcher_list_) 498 return; 499 500 for (RegistrationList::iterator it = 501 g_thread_watcher_list_->registered_.begin(); 502 g_thread_watcher_list_->registered_.end() != it; 503 ++it) 504 it->second->WakeUp(); 505 } 506 507 ThreadWatcherList::ThreadWatcherList() { 508 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 509 CHECK(!g_thread_watcher_list_); 510 g_thread_watcher_list_ = this; 511 } 512 513 ThreadWatcherList::~ThreadWatcherList() { 514 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 515 DCHECK(this == g_thread_watcher_list_); 516 g_thread_watcher_list_ = NULL; 517 } 518 519 // static 520 void ThreadWatcherList::ParseCommandLine( 521 const CommandLine& command_line, 522 uint32* unresponsive_threshold, 523 CrashOnHangThreadMap* crash_on_hang_threads) { 524 // Initialize |unresponsive_threshold| to a default value. 525 *unresponsive_threshold = kUnresponsiveCount; 526 527 // Increase the unresponsive_threshold on the Stable and Beta channels to 528 // reduce the number of crashes due to ThreadWatcher. 529 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 530 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 531 *unresponsive_threshold *= 4; 532 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 533 *unresponsive_threshold *= 2; 534 } 535 536 #if defined(OS_WIN) 537 // For Windows XP (old systems), double the unresponsive_threshold to give 538 // the OS a chance to schedule UI/IO threads a time slice to respond with a 539 // pong message (to get around limitations with the OS). 540 if (base::win::GetVersion() <= base::win::VERSION_XP) 541 *unresponsive_threshold *= 2; 542 #endif 543 544 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 545 std::string crash_on_hang_thread_names; 546 bool has_command_line_overwrite = false; 547 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 548 crash_on_hang_thread_names = 549 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 550 has_command_line_overwrite = true; 551 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 552 // Default to crashing the browser if UI or IO or FILE threads are not 553 // responsive except in stable channel. 554 crash_on_hang_thread_names = base::StringPrintf( 555 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 556 kLiveThreadsThreshold, crash_seconds, 557 kLiveThreadsThreshold, crash_seconds, 558 kLiveThreadsThreshold, crash_seconds * 5); 559 } 560 561 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 562 kLiveThreadsThreshold, 563 crash_seconds, 564 crash_on_hang_threads); 565 566 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 567 has_command_line_overwrite) { 568 return; 569 } 570 571 // Set up a field trial for 100% of the users to crash if either UI or IO 572 // thread is not responsive for 30 seconds (or 15 pings). 573 scoped_refptr<base::FieldTrial> field_trial( 574 base::FieldTrialList::FactoryGetFieldTrial( 575 "ThreadWatcher", 100, "default_hung_threads", 576 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 577 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 578 if (field_trial->group() == hung_thread_group) { 579 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 580 crash_on_hang_threads->end() != it; 581 ++it) { 582 if (it->first == "FILE") 583 continue; 584 it->second.live_threads_threshold = INT_MAX; 585 if (it->first == "UI") { 586 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 587 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 588 // it to a more reasonable time ala IO thread. 589 it->second.unresponsive_threshold = 60; 590 } else { 591 it->second.unresponsive_threshold = 15; 592 } 593 } 594 } 595 } 596 597 // static 598 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 599 const std::string& crash_on_hang_thread_names, 600 uint32 default_live_threads_threshold, 601 uint32 default_crash_seconds, 602 CrashOnHangThreadMap* crash_on_hang_threads) { 603 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 604 std::vector<std::string> values; 605 while (tokens.GetNext()) { 606 const std::string& token = tokens.token(); 607 base::SplitString(token, ':', &values); 608 std::string thread_name = values[0]; 609 610 uint32 live_threads_threshold = default_live_threads_threshold; 611 uint32 crash_seconds = default_crash_seconds; 612 if (values.size() >= 2 && 613 (!base::StringToUint(values[1], &live_threads_threshold))) { 614 continue; 615 } 616 if (values.size() >= 3 && 617 (!base::StringToUint(values[2], &crash_seconds))) { 618 continue; 619 } 620 uint32 unresponsive_threshold = static_cast<uint32>( 621 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 622 623 CrashDataThresholds crash_data(live_threads_threshold, 624 unresponsive_threshold); 625 // Use the last specifier. 626 (*crash_on_hang_threads)[thread_name] = crash_data; 627 } 628 } 629 630 // static 631 void ThreadWatcherList::InitializeAndStartWatching( 632 uint32 unresponsive_threshold, 633 const CrashOnHangThreadMap& crash_on_hang_threads) { 634 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 635 636 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 637 CHECK(thread_watcher_list); 638 639 BrowserThread::PostTask( 640 BrowserThread::UI, 641 FROM_HERE, 642 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 643 644 const base::TimeDelta kSleepTime = 645 base::TimeDelta::FromSeconds(kSleepSeconds); 646 const base::TimeDelta kUnresponsiveTime = 647 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 648 649 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 650 unresponsive_threshold, crash_on_hang_threads); 651 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 652 unresponsive_threshold, crash_on_hang_threads); 653 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 654 unresponsive_threshold, crash_on_hang_threads); 655 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 656 unresponsive_threshold, crash_on_hang_threads); 657 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 658 unresponsive_threshold, crash_on_hang_threads); 659 } 660 661 // static 662 void ThreadWatcherList::StartWatching( 663 const BrowserThread::ID& thread_id, 664 const std::string& thread_name, 665 const base::TimeDelta& sleep_time, 666 const base::TimeDelta& unresponsive_time, 667 uint32 unresponsive_threshold, 668 const CrashOnHangThreadMap& crash_on_hang_threads) { 669 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 670 671 CrashOnHangThreadMap::const_iterator it = 672 crash_on_hang_threads.find(thread_name); 673 bool crash_on_hang = false; 674 uint32 live_threads_threshold = 0; 675 if (it != crash_on_hang_threads.end()) { 676 crash_on_hang = true; 677 live_threads_threshold = it->second.live_threads_threshold; 678 unresponsive_threshold = it->second.unresponsive_threshold; 679 } 680 681 ThreadWatcher::StartWatching( 682 ThreadWatcher::WatchingParams(thread_id, 683 thread_name, 684 sleep_time, 685 unresponsive_time, 686 unresponsive_threshold, 687 crash_on_hang, 688 live_threads_threshold)); 689 } 690 691 // static 692 void ThreadWatcherList::DeleteAll() { 693 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 694 WatchDogThread::PostTask( 695 FROM_HERE, 696 base::Bind(&ThreadWatcherList::DeleteAll)); 697 return; 698 } 699 700 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 701 if (!g_thread_watcher_list_) 702 return; 703 704 // Delete all thread watcher objects. 705 while (!g_thread_watcher_list_->registered_.empty()) { 706 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 707 delete it->second; 708 g_thread_watcher_list_->registered_.erase(it); 709 } 710 711 delete g_thread_watcher_list_; 712 } 713 714 // static 715 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 716 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 717 if (!g_thread_watcher_list_) 718 return NULL; 719 RegistrationList::iterator it = 720 g_thread_watcher_list_->registered_.find(thread_id); 721 if (g_thread_watcher_list_->registered_.end() == it) 722 return NULL; 723 return it->second; 724 } 725 726 // ThreadWatcherObserver methods and members. 727 // 728 // static 729 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 730 731 ThreadWatcherObserver::ThreadWatcherObserver( 732 const base::TimeDelta& wakeup_interval) 733 : last_wakeup_time_(base::TimeTicks::Now()), 734 wakeup_interval_(wakeup_interval) { 735 CHECK(!g_thread_watcher_observer_); 736 g_thread_watcher_observer_ = this; 737 } 738 739 ThreadWatcherObserver::~ThreadWatcherObserver() { 740 DCHECK(this == g_thread_watcher_observer_); 741 g_thread_watcher_observer_ = NULL; 742 } 743 744 // static 745 void ThreadWatcherObserver::SetupNotifications( 746 const base::TimeDelta& wakeup_interval) { 747 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 748 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 749 MetricsService::SetUpNotifications(&observer->registrar_, observer); 750 } 751 752 // static 753 void ThreadWatcherObserver::RemoveNotifications() { 754 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 755 if (!g_thread_watcher_observer_) 756 return; 757 g_thread_watcher_observer_->registrar_.RemoveAll(); 758 delete g_thread_watcher_observer_; 759 } 760 761 void ThreadWatcherObserver::Observe( 762 int type, 763 const content::NotificationSource& source, 764 const content::NotificationDetails& details) { 765 // There is some user activity, see if thread watchers are to be awakened. 766 base::TimeTicks now = base::TimeTicks::Now(); 767 if ((now - last_wakeup_time_) < wakeup_interval_) 768 return; 769 last_wakeup_time_ = now; 770 WatchDogThread::PostTask( 771 FROM_HERE, 772 base::Bind(&ThreadWatcherList::WakeUpAll)); 773 } 774 775 // WatchDogThread methods and members. 776 777 // This lock protects g_watchdog_thread. 778 static base::LazyInstance<base::Lock>::Leaky 779 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 780 781 // The singleton of this class. 782 static WatchDogThread* g_watchdog_thread = NULL; 783 784 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 785 } 786 787 WatchDogThread::~WatchDogThread() { 788 Stop(); 789 } 790 791 // static 792 bool WatchDogThread::CurrentlyOnWatchDogThread() { 793 base::AutoLock lock(g_watchdog_lock.Get()); 794 return g_watchdog_thread && 795 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 796 } 797 798 // static 799 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 800 const base::Closure& task) { 801 return PostTaskHelper(from_here, task, base::TimeDelta()); 802 } 803 804 // static 805 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 806 const base::Closure& task, 807 base::TimeDelta delay) { 808 return PostTaskHelper(from_here, task, delay); 809 } 810 811 // static 812 bool WatchDogThread::PostTaskHelper( 813 const tracked_objects::Location& from_here, 814 const base::Closure& task, 815 base::TimeDelta delay) { 816 { 817 base::AutoLock lock(g_watchdog_lock.Get()); 818 819 base::MessageLoop* message_loop = g_watchdog_thread ? 820 g_watchdog_thread->message_loop() : NULL; 821 if (message_loop) { 822 message_loop->PostDelayedTask(from_here, task, delay); 823 return true; 824 } 825 } 826 827 return false; 828 } 829 830 void WatchDogThread::Init() { 831 // This thread shouldn't be allowed to perform any blocking disk I/O. 832 base::ThreadRestrictions::SetIOAllowed(false); 833 834 base::AutoLock lock(g_watchdog_lock.Get()); 835 CHECK(!g_watchdog_thread); 836 g_watchdog_thread = this; 837 } 838 839 void WatchDogThread::CleanUp() { 840 base::AutoLock lock(g_watchdog_lock.Get()); 841 g_watchdog_thread = NULL; 842 } 843 844 namespace { 845 846 // StartupWatchDogThread methods and members. 847 // 848 // Class for detecting hangs during startup. 849 class StartupWatchDogThread : public base::Watchdog { 850 public: 851 // Constructor specifies how long the StartupWatchDogThread will wait before 852 // alarming. 853 explicit StartupWatchDogThread(const base::TimeDelta& duration) 854 : base::Watchdog(duration, "Startup watchdog thread", true) { 855 } 856 857 // Alarm is called if the time expires after an Arm() without someone calling 858 // Disarm(). When Alarm goes off, in release mode we get the crash dump 859 // without crashing and in debug mode we break into the debugger. 860 virtual void Alarm() OVERRIDE { 861 #ifndef NDEBUG 862 DCHECK(false); 863 #else 864 logging::DumpWithoutCrashing(); 865 #endif 866 } 867 868 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 869 }; 870 871 // ShutdownWatchDogThread methods and members. 872 // 873 // Class for detecting hangs during shutdown. 874 class ShutdownWatchDogThread : public base::Watchdog { 875 public: 876 // Constructor specifies how long the ShutdownWatchDogThread will wait before 877 // alarming. 878 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 879 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 880 } 881 882 // Alarm is called if the time expires after an Arm() without someone calling 883 // Disarm(). We crash the browser if this method is called. 884 virtual void Alarm() OVERRIDE { 885 ShutdownCrash(); 886 } 887 888 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 889 }; 890 } // namespace 891 892 // StartupTimeBomb methods and members. 893 // 894 // static 895 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 896 897 StartupTimeBomb::StartupTimeBomb() 898 : startup_watchdog_(NULL), 899 thread_id_(base::PlatformThread::CurrentId()) { 900 CHECK(!g_startup_timebomb_); 901 g_startup_timebomb_ = this; 902 } 903 904 StartupTimeBomb::~StartupTimeBomb() { 905 DCHECK(this == g_startup_timebomb_); 906 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 907 if (startup_watchdog_) 908 Disarm(); 909 g_startup_timebomb_ = NULL; 910 } 911 912 void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 913 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 914 DCHECK(!startup_watchdog_); 915 startup_watchdog_ = new StartupWatchDogThread(duration); 916 startup_watchdog_->Arm(); 917 return; 918 } 919 920 void StartupTimeBomb::Disarm() { 921 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 922 if (startup_watchdog_) { 923 startup_watchdog_->Disarm(); 924 startup_watchdog_->Cleanup(); 925 DeleteStartupWatchdog(); 926 } 927 } 928 929 void StartupTimeBomb::DeleteStartupWatchdog() { 930 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 931 if (startup_watchdog_->IsJoinable()) { 932 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 933 // very fast. 934 base::ThreadRestrictions::SetIOAllowed(true); 935 delete startup_watchdog_; 936 startup_watchdog_ = NULL; 937 return; 938 } 939 base::MessageLoop::current()->PostDelayedTask( 940 FROM_HERE, 941 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 942 base::Unretained(this)), 943 base::TimeDelta::FromSeconds(10)); 944 } 945 946 // static 947 void StartupTimeBomb::DisarmStartupTimeBomb() { 948 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 949 if (g_startup_timebomb_) 950 g_startup_timebomb_->Disarm(); 951 } 952 953 // ShutdownWatcherHelper methods and members. 954 // 955 // ShutdownWatcherHelper is a wrapper class for detecting hangs during 956 // shutdown. 957 ShutdownWatcherHelper::ShutdownWatcherHelper() 958 : shutdown_watchdog_(NULL), 959 thread_id_(base::PlatformThread::CurrentId()) { 960 } 961 962 ShutdownWatcherHelper::~ShutdownWatcherHelper() { 963 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 964 if (shutdown_watchdog_) { 965 shutdown_watchdog_->Disarm(); 966 delete shutdown_watchdog_; 967 shutdown_watchdog_ = NULL; 968 } 969 } 970 971 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 972 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 973 DCHECK(!shutdown_watchdog_); 974 base::TimeDelta actual_duration = duration; 975 976 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 977 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 978 actual_duration *= 20; 979 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 980 channel == chrome::VersionInfo::CHANNEL_DEV) { 981 actual_duration *= 10; 982 } 983 984 #if defined(OS_WIN) 985 // On Windows XP, give twice the time for shutdown. 986 if (base::win::GetVersion() <= base::win::VERSION_XP) 987 actual_duration *= 2; 988 #endif 989 990 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 991 shutdown_watchdog_->Arm(); 992 } 993