1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/metrics/thread_watcher.h" 6 7 #include <math.h> // ceil 8 9 #include "base/bind.h" 10 #include "base/compiler_specific.h" 11 #include "base/debug/alias.h" 12 #include "base/debug/debugger.h" 13 #include "base/debug/dump_without_crashing.h" 14 #include "base/lazy_instance.h" 15 #include "base/metrics/field_trial.h" 16 #include "base/strings/string_number_conversions.h" 17 #include "base/strings/string_split.h" 18 #include "base/strings/string_tokenizer.h" 19 #include "base/strings/stringprintf.h" 20 #include "base/threading/thread_restrictions.h" 21 #include "build/build_config.h" 22 #include "chrome/browser/chrome_notification_types.h" 23 #include "chrome/common/chrome_switches.h" 24 #include "chrome/common/chrome_version_info.h" 25 #include "chrome/common/logging_chrome.h" 26 #include "content/public/browser/notification_service.h" 27 28 #if defined(OS_WIN) 29 #include "base/win/windows_version.h" 30 #endif 31 32 using content::BrowserThread; 33 34 namespace { 35 36 // The following are unique function names for forcing the crash when a thread 37 // is unresponsive. This makes it possible to tell from the callstack alone what 38 // thread was unresponsive. 39 // 40 // We disable optimizations for this block of functions so the compiler doesn't 41 // merge them all together. 42 MSVC_DISABLE_OPTIMIZE() 43 MSVC_PUSH_DISABLE_WARNING(4748) 44 45 void ReportThreadHang() { 46 #if defined(NDEBUG) 47 base::debug::DumpWithoutCrashing(); 48 #else 49 base::debug::BreakDebugger(); 50 #endif 51 } 52 53 #if !defined(OS_ANDROID) || !defined(NDEBUG) 54 // TODO(rtenneti): Enabled crashing, after getting data. 55 NOINLINE void StartupHang() { 56 ReportThreadHang(); 57 } 58 #endif // OS_ANDROID 59 60 NOINLINE void ShutdownHang() { 61 ReportThreadHang(); 62 } 63 64 NOINLINE void ThreadUnresponsive_UI() { 65 ReportThreadHang(); 66 } 67 68 NOINLINE void ThreadUnresponsive_DB() { 69 ReportThreadHang(); 70 } 71 72 NOINLINE void ThreadUnresponsive_FILE() { 73 ReportThreadHang(); 74 } 75 76 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() { 77 ReportThreadHang(); 78 } 79 80 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() { 81 ReportThreadHang(); 82 } 83 84 NOINLINE void ThreadUnresponsive_CACHE() { 85 ReportThreadHang(); 86 } 87 88 NOINLINE void ThreadUnresponsive_IO() { 89 ReportThreadHang(); 90 } 91 92 MSVC_POP_WARNING() 93 MSVC_ENABLE_OPTIMIZE(); 94 95 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) { 96 base::debug::Alias(&thread_id); 97 98 switch (thread_id) { 99 case BrowserThread::UI: 100 return ThreadUnresponsive_UI(); 101 case BrowserThread::DB: 102 return ThreadUnresponsive_DB(); 103 case BrowserThread::FILE: 104 return ThreadUnresponsive_FILE(); 105 case BrowserThread::FILE_USER_BLOCKING: 106 return ThreadUnresponsive_FILE_USER_BLOCKING(); 107 case BrowserThread::PROCESS_LAUNCHER: 108 return ThreadUnresponsive_PROCESS_LAUNCHER(); 109 case BrowserThread::CACHE: 110 return ThreadUnresponsive_CACHE(); 111 case BrowserThread::IO: 112 return ThreadUnresponsive_IO(); 113 case BrowserThread::ID_COUNT: 114 CHECK(false); // This shouldn't actually be reached! 115 break; 116 117 // Omission of the default hander is intentional -- that way the compiler 118 // should warn if our switch becomes outdated. 119 } 120 121 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached. 122 } 123 124 } // namespace 125 126 // ThreadWatcher methods and members. 127 ThreadWatcher::ThreadWatcher(const WatchingParams& params) 128 : thread_id_(params.thread_id), 129 thread_name_(params.thread_name), 130 watched_loop_( 131 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)), 132 sleep_time_(params.sleep_time), 133 unresponsive_time_(params.unresponsive_time), 134 ping_time_(base::TimeTicks::Now()), 135 pong_time_(ping_time_), 136 ping_sequence_number_(0), 137 active_(false), 138 ping_count_(params.unresponsive_threshold), 139 response_time_histogram_(NULL), 140 unresponsive_time_histogram_(NULL), 141 unresponsive_count_(0), 142 hung_processing_complete_(false), 143 unresponsive_threshold_(params.unresponsive_threshold), 144 crash_on_hang_(params.crash_on_hang), 145 live_threads_threshold_(params.live_threads_threshold), 146 weak_ptr_factory_(this) { 147 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 148 Initialize(); 149 } 150 151 ThreadWatcher::~ThreadWatcher() {} 152 153 // static 154 void ThreadWatcher::StartWatching(const WatchingParams& params) { 155 DCHECK_GE(params.sleep_time.InMilliseconds(), 0); 156 DCHECK_GE(params.unresponsive_time.InMilliseconds(), 157 params.sleep_time.InMilliseconds()); 158 159 // If we are not on WatchDogThread, then post a task to call StartWatching on 160 // WatchDogThread. 161 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 162 WatchDogThread::PostTask( 163 FROM_HERE, 164 base::Bind(&ThreadWatcher::StartWatching, params)); 165 return; 166 } 167 168 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 169 170 // Create a new thread watcher object for the given thread and activate it. 171 ThreadWatcher* watcher = new ThreadWatcher(params); 172 173 DCHECK(watcher); 174 // If we couldn't register the thread watcher object, we are shutting down, 175 // then don't activate thread watching. 176 if (!ThreadWatcherList::IsRegistered(params.thread_id)) 177 return; 178 watcher->ActivateThreadWatching(); 179 } 180 181 void ThreadWatcher::ActivateThreadWatching() { 182 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 183 if (active_) return; 184 active_ = true; 185 ping_count_ = unresponsive_threshold_; 186 ResetHangCounters(); 187 base::MessageLoop::current()->PostTask( 188 FROM_HERE, 189 base::Bind(&ThreadWatcher::PostPingMessage, 190 weak_ptr_factory_.GetWeakPtr())); 191 } 192 193 void ThreadWatcher::DeActivateThreadWatching() { 194 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 195 active_ = false; 196 ping_count_ = 0; 197 weak_ptr_factory_.InvalidateWeakPtrs(); 198 } 199 200 void ThreadWatcher::WakeUp() { 201 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 202 // There is some user activity, PostPingMessage task of thread watcher if 203 // needed. 204 if (!active_) return; 205 206 // Throw away the previous |unresponsive_count_| and start over again. Just 207 // before going to sleep, |unresponsive_count_| could be very close to 208 // |unresponsive_threshold_| and when user becomes active, 209 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no 210 // response for ping messages. Reset |unresponsive_count_| to start measuring 211 // the unresponsiveness of the threads when system becomes active. 212 unresponsive_count_ = 0; 213 214 if (ping_count_ <= 0) { 215 ping_count_ = unresponsive_threshold_; 216 ResetHangCounters(); 217 PostPingMessage(); 218 } else { 219 ping_count_ = unresponsive_threshold_; 220 } 221 } 222 223 void ThreadWatcher::PostPingMessage() { 224 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 225 // If we have stopped watching or if the user is idle, then stop sending 226 // ping messages. 227 if (!active_ || ping_count_ <= 0) 228 return; 229 230 // Save the current time when we have sent ping message. 231 ping_time_ = base::TimeTicks::Now(); 232 233 // Send a ping message to the watched thread. Callback will be called on 234 // the WatchDogThread. 235 base::Closure callback( 236 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(), 237 ping_sequence_number_)); 238 if (watched_loop_->PostTask( 239 FROM_HERE, 240 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_, 241 callback))) { 242 // Post a task to check the responsiveness of watched thread. 243 base::MessageLoop::current()->PostDelayedTask( 244 FROM_HERE, 245 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 246 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 247 unresponsive_time_); 248 } else { 249 // Watched thread might have gone away, stop watching it. 250 DeActivateThreadWatching(); 251 } 252 } 253 254 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) { 255 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 256 257 // Record watched thread's response time. 258 base::TimeTicks now = base::TimeTicks::Now(); 259 base::TimeDelta response_time = now - ping_time_; 260 response_time_histogram_->AddTime(response_time); 261 262 // Save the current time when we have got pong message. 263 pong_time_ = now; 264 265 // Check if there are any extra pings in flight. 266 DCHECK_EQ(ping_sequence_number_, ping_sequence_number); 267 if (ping_sequence_number_ != ping_sequence_number) 268 return; 269 270 // Increment sequence number for the next ping message to indicate watched 271 // thread is responsive. 272 ++ping_sequence_number_; 273 274 // If we have stopped watching or if the user is idle, then stop sending 275 // ping messages. 276 if (!active_ || --ping_count_ <= 0) 277 return; 278 279 base::MessageLoop::current()->PostDelayedTask( 280 FROM_HERE, 281 base::Bind(&ThreadWatcher::PostPingMessage, 282 weak_ptr_factory_.GetWeakPtr()), 283 sleep_time_); 284 } 285 286 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) { 287 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 288 // If we have stopped watching then consider thread as responding. 289 if (!active_) { 290 responsive_ = true; 291 return; 292 } 293 // If the latest ping_sequence_number_ is not same as the ping_sequence_number 294 // that is passed in, then we can assume OnPongMessage was called. 295 // OnPongMessage increments ping_sequence_number_. 296 if (ping_sequence_number_ != ping_sequence_number) { 297 // Reset unresponsive_count_ to zero because we got a response from the 298 // watched thread. 299 ResetHangCounters(); 300 301 responsive_ = true; 302 return; 303 } 304 // Record that we got no response from watched thread. 305 GotNoResponse(); 306 307 // Post a task to check the responsiveness of watched thread. 308 base::MessageLoop::current()->PostDelayedTask( 309 FROM_HERE, 310 base::Bind(&ThreadWatcher::OnCheckResponsiveness, 311 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_), 312 unresponsive_time_); 313 responsive_ = false; 314 } 315 316 void ThreadWatcher::Initialize() { 317 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 318 ThreadWatcherList::Register(this); 319 320 const std::string response_time_histogram_name = 321 "ThreadWatcher.ResponseTime." + thread_name_; 322 response_time_histogram_ = base::Histogram::FactoryTimeGet( 323 response_time_histogram_name, 324 base::TimeDelta::FromMilliseconds(1), 325 base::TimeDelta::FromSeconds(100), 50, 326 base::Histogram::kUmaTargetedHistogramFlag); 327 328 const std::string unresponsive_time_histogram_name = 329 "ThreadWatcher.Unresponsive." + thread_name_; 330 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet( 331 unresponsive_time_histogram_name, 332 base::TimeDelta::FromMilliseconds(1), 333 base::TimeDelta::FromSeconds(100), 50, 334 base::Histogram::kUmaTargetedHistogramFlag); 335 336 const std::string responsive_count_histogram_name = 337 "ThreadWatcher.ResponsiveThreads." + thread_name_; 338 responsive_count_histogram_ = base::LinearHistogram::FactoryGet( 339 responsive_count_histogram_name, 1, 10, 11, 340 base::Histogram::kUmaTargetedHistogramFlag); 341 342 const std::string unresponsive_count_histogram_name = 343 "ThreadWatcher.UnresponsiveThreads." + thread_name_; 344 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet( 345 unresponsive_count_histogram_name, 1, 10, 11, 346 base::Histogram::kUmaTargetedHistogramFlag); 347 } 348 349 // static 350 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id, 351 const base::Closure& callback_task) { 352 // This method is called on watched thread. 353 DCHECK(BrowserThread::CurrentlyOn(thread_id)); 354 WatchDogThread::PostTask(FROM_HERE, callback_task); 355 } 356 357 void ThreadWatcher::ResetHangCounters() { 358 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 359 unresponsive_count_ = 0; 360 hung_processing_complete_ = false; 361 } 362 363 void ThreadWatcher::GotNoResponse() { 364 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 365 366 ++unresponsive_count_; 367 if (!IsVeryUnresponsive()) 368 return; 369 370 // Record total unresponsive_time since last pong message. 371 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 372 unresponsive_time_histogram_->AddTime(unresponse_time); 373 374 // We have already collected stats for the non-responding watched thread. 375 if (hung_processing_complete_) 376 return; 377 378 // Record how other threads are responding. 379 uint32 responding_thread_count = 0; 380 uint32 unresponding_thread_count = 0; 381 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count, 382 &unresponding_thread_count); 383 384 // Record how many watched threads are responding. 385 responsive_count_histogram_->Add(responding_thread_count); 386 387 // Record how many watched threads are not responding. 388 unresponsive_count_histogram_->Add(unresponding_thread_count); 389 390 // Crash the browser if the watched thread is to be crashed on hang and if the 391 // number of other threads responding is less than or equal to 392 // live_threads_threshold_ and at least one other thread is responding. 393 if (crash_on_hang_ && 394 responding_thread_count > 0 && 395 responding_thread_count <= live_threads_threshold_) { 396 static bool crashed_once = false; 397 if (!crashed_once) { 398 crashed_once = true; 399 CrashBecauseThreadWasUnresponsive(thread_id_); 400 } 401 } 402 403 hung_processing_complete_ = true; 404 } 405 406 bool ThreadWatcher::IsVeryUnresponsive() { 407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 408 return unresponsive_count_ >= unresponsive_threshold_; 409 } 410 411 // ThreadWatcherList methods and members. 412 // 413 // static 414 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL; 415 // static 416 bool ThreadWatcherList::g_stopped_ = false; 417 // static 418 const int ThreadWatcherList::kSleepSeconds = 1; 419 // static 420 const int ThreadWatcherList::kUnresponsiveSeconds = 2; 421 // static 422 const int ThreadWatcherList::kUnresponsiveCount = 9; 423 // static 424 const int ThreadWatcherList::kLiveThreadsThreshold = 2; 425 // static, non-const for tests. 426 int ThreadWatcherList::g_initialize_delay_seconds = 120; 427 428 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( 429 uint32 live_threads_threshold, 430 uint32 unresponsive_threshold) 431 : live_threads_threshold(live_threads_threshold), 432 unresponsive_threshold(unresponsive_threshold) { 433 } 434 435 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() 436 : live_threads_threshold(kLiveThreadsThreshold), 437 unresponsive_threshold(kUnresponsiveCount) { 438 } 439 440 // static 441 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { 442 // TODO(rtenneti): Enable ThreadWatcher. 443 uint32 unresponsive_threshold; 444 CrashOnHangThreadMap crash_on_hang_threads; 445 ParseCommandLine(command_line, 446 &unresponsive_threshold, 447 &crash_on_hang_threads); 448 449 ThreadWatcherObserver::SetupNotifications( 450 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold)); 451 452 WatchDogThread::PostTask( 453 FROM_HERE, 454 base::Bind(&ThreadWatcherList::SetStopped, false)); 455 456 WatchDogThread::PostDelayedTask( 457 FROM_HERE, 458 base::Bind(&ThreadWatcherList::InitializeAndStartWatching, 459 unresponsive_threshold, 460 crash_on_hang_threads), 461 base::TimeDelta::FromSeconds(g_initialize_delay_seconds)); 462 } 463 464 // static 465 void ThreadWatcherList::StopWatchingAll() { 466 // TODO(rtenneti): Enable ThreadWatcher. 467 ThreadWatcherObserver::RemoveNotifications(); 468 DeleteAll(); 469 } 470 471 // static 472 void ThreadWatcherList::Register(ThreadWatcher* watcher) { 473 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 474 if (!g_thread_watcher_list_) 475 return; 476 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id())); 477 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher; 478 } 479 480 // static 481 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) { 482 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 483 return NULL != ThreadWatcherList::Find(thread_id); 484 } 485 486 // static 487 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count, 488 uint32* unresponding_thread_count) { 489 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 490 *responding_thread_count = 0; 491 *unresponding_thread_count = 0; 492 if (!g_thread_watcher_list_) 493 return; 494 495 for (RegistrationList::iterator it = 496 g_thread_watcher_list_->registered_.begin(); 497 g_thread_watcher_list_->registered_.end() != it; 498 ++it) { 499 if (it->second->IsVeryUnresponsive()) 500 ++(*unresponding_thread_count); 501 else 502 ++(*responding_thread_count); 503 } 504 } 505 506 // static 507 void ThreadWatcherList::WakeUpAll() { 508 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 509 if (!g_thread_watcher_list_) 510 return; 511 512 for (RegistrationList::iterator it = 513 g_thread_watcher_list_->registered_.begin(); 514 g_thread_watcher_list_->registered_.end() != it; 515 ++it) 516 it->second->WakeUp(); 517 } 518 519 ThreadWatcherList::ThreadWatcherList() { 520 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 521 CHECK(!g_thread_watcher_list_); 522 g_thread_watcher_list_ = this; 523 } 524 525 ThreadWatcherList::~ThreadWatcherList() { 526 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 527 DCHECK(this == g_thread_watcher_list_); 528 g_thread_watcher_list_ = NULL; 529 } 530 531 // static 532 void ThreadWatcherList::ParseCommandLine( 533 const CommandLine& command_line, 534 uint32* unresponsive_threshold, 535 CrashOnHangThreadMap* crash_on_hang_threads) { 536 // Initialize |unresponsive_threshold| to a default value. 537 // TODO(rtenneti): Changed the default value to 4 times, until we can triage 538 // hangs automatically (and to reduce the crash dumps). 539 *unresponsive_threshold = kUnresponsiveCount * 4; 540 541 // Increase the unresponsive_threshold on the Stable and Beta channels to 542 // reduce the number of crashes due to ThreadWatcher. 543 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 544 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 545 *unresponsive_threshold *= 4; 546 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) { 547 *unresponsive_threshold *= 2; 548 } 549 550 #if defined(OS_WIN) 551 // For Windows XP (old systems), double the unresponsive_threshold to give 552 // the OS a chance to schedule UI/IO threads a time slice to respond with a 553 // pong message (to get around limitations with the OS). 554 if (base::win::GetVersion() <= base::win::VERSION_XP) 555 *unresponsive_threshold *= 2; 556 #endif 557 558 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; 559 std::string crash_on_hang_thread_names; 560 bool has_command_line_overwrite = false; 561 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { 562 crash_on_hang_thread_names = 563 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); 564 has_command_line_overwrite = true; 565 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { 566 // Default to crashing the browser if UI or IO or FILE threads are not 567 // responsive except in stable channel. 568 crash_on_hang_thread_names = base::StringPrintf( 569 "UI:%d:%d,IO:%d:%d,FILE:%d:%d", 570 kLiveThreadsThreshold, crash_seconds, 571 kLiveThreadsThreshold, crash_seconds, 572 kLiveThreadsThreshold, crash_seconds * 5); 573 } 574 575 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, 576 kLiveThreadsThreshold, 577 crash_seconds, 578 crash_on_hang_threads); 579 580 if (channel != chrome::VersionInfo::CHANNEL_CANARY || 581 has_command_line_overwrite) { 582 return; 583 } 584 585 const char* kFieldTrialName = "ThreadWatcher"; 586 587 // Nothing else to be done if the trial has already been set (i.e., when 588 // StartWatchingAll() has been already called once). 589 if (base::FieldTrialList::TrialExists(kFieldTrialName)) 590 return; 591 592 // Set up a field trial for 100% of the users to crash if either UI or IO 593 // thread is not responsive for 30 seconds (or 15 pings). 594 scoped_refptr<base::FieldTrial> field_trial( 595 base::FieldTrialList::FactoryGetFieldTrial( 596 kFieldTrialName, 100, "default_hung_threads", 597 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL)); 598 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100); 599 if (field_trial->group() == hung_thread_group) { 600 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin(); 601 crash_on_hang_threads->end() != it; 602 ++it) { 603 if (it->first == "FILE") 604 continue; 605 it->second.live_threads_threshold = INT_MAX; 606 if (it->first == "UI") { 607 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch 608 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce 609 // it to a more reasonable time ala IO thread. 610 it->second.unresponsive_threshold = 60; 611 } else { 612 it->second.unresponsive_threshold = 15; 613 } 614 } 615 } 616 } 617 618 // static 619 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( 620 const std::string& crash_on_hang_thread_names, 621 uint32 default_live_threads_threshold, 622 uint32 default_crash_seconds, 623 CrashOnHangThreadMap* crash_on_hang_threads) { 624 base::StringTokenizer tokens(crash_on_hang_thread_names, ","); 625 std::vector<std::string> values; 626 while (tokens.GetNext()) { 627 const std::string& token = tokens.token(); 628 base::SplitString(token, ':', &values); 629 std::string thread_name = values[0]; 630 631 uint32 live_threads_threshold = default_live_threads_threshold; 632 uint32 crash_seconds = default_crash_seconds; 633 if (values.size() >= 2 && 634 (!base::StringToUint(values[1], &live_threads_threshold))) { 635 continue; 636 } 637 if (values.size() >= 3 && 638 (!base::StringToUint(values[2], &crash_seconds))) { 639 continue; 640 } 641 uint32 unresponsive_threshold = static_cast<uint32>( 642 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); 643 644 CrashDataThresholds crash_data(live_threads_threshold, 645 unresponsive_threshold); 646 // Use the last specifier. 647 (*crash_on_hang_threads)[thread_name] = crash_data; 648 } 649 } 650 651 // static 652 void ThreadWatcherList::InitializeAndStartWatching( 653 uint32 unresponsive_threshold, 654 const CrashOnHangThreadMap& crash_on_hang_threads) { 655 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 656 657 // Disarm the startup timebomb, even if stop has been called. 658 BrowserThread::PostTask( 659 BrowserThread::UI, 660 FROM_HERE, 661 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb)); 662 663 // This method is deferred in relationship to its StopWatchingAll() 664 // counterpart. If a previous initialization has already happened, or if 665 // stop has been called, there's nothing left to do here. 666 if (g_thread_watcher_list_ || g_stopped_) 667 return; 668 669 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList(); 670 CHECK(thread_watcher_list); 671 672 const base::TimeDelta kSleepTime = 673 base::TimeDelta::FromSeconds(kSleepSeconds); 674 const base::TimeDelta kUnresponsiveTime = 675 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 676 677 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime, 678 unresponsive_threshold, crash_on_hang_threads); 679 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime, 680 unresponsive_threshold, crash_on_hang_threads); 681 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime, 682 unresponsive_threshold, crash_on_hang_threads); 683 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime, 684 unresponsive_threshold, crash_on_hang_threads); 685 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime, 686 unresponsive_threshold, crash_on_hang_threads); 687 } 688 689 // static 690 void ThreadWatcherList::StartWatching( 691 const BrowserThread::ID& thread_id, 692 const std::string& thread_name, 693 const base::TimeDelta& sleep_time, 694 const base::TimeDelta& unresponsive_time, 695 uint32 unresponsive_threshold, 696 const CrashOnHangThreadMap& crash_on_hang_threads) { 697 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 698 699 CrashOnHangThreadMap::const_iterator it = 700 crash_on_hang_threads.find(thread_name); 701 bool crash_on_hang = false; 702 uint32 live_threads_threshold = 0; 703 if (it != crash_on_hang_threads.end()) { 704 crash_on_hang = true; 705 live_threads_threshold = it->second.live_threads_threshold; 706 unresponsive_threshold = it->second.unresponsive_threshold; 707 } 708 709 ThreadWatcher::StartWatching( 710 ThreadWatcher::WatchingParams(thread_id, 711 thread_name, 712 sleep_time, 713 unresponsive_time, 714 unresponsive_threshold, 715 crash_on_hang, 716 live_threads_threshold)); 717 } 718 719 // static 720 void ThreadWatcherList::DeleteAll() { 721 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 722 WatchDogThread::PostTask( 723 FROM_HERE, 724 base::Bind(&ThreadWatcherList::DeleteAll)); 725 return; 726 } 727 728 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 729 730 SetStopped(true); 731 732 if (!g_thread_watcher_list_) 733 return; 734 735 // Delete all thread watcher objects. 736 while (!g_thread_watcher_list_->registered_.empty()) { 737 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin(); 738 delete it->second; 739 g_thread_watcher_list_->registered_.erase(it); 740 } 741 742 delete g_thread_watcher_list_; 743 } 744 745 // static 746 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) { 747 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 748 if (!g_thread_watcher_list_) 749 return NULL; 750 RegistrationList::iterator it = 751 g_thread_watcher_list_->registered_.find(thread_id); 752 if (g_thread_watcher_list_->registered_.end() == it) 753 return NULL; 754 return it->second; 755 } 756 757 // static 758 void ThreadWatcherList::SetStopped(bool stopped) { 759 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 760 g_stopped_ = stopped; 761 } 762 763 // ThreadWatcherObserver methods and members. 764 // 765 // static 766 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL; 767 768 ThreadWatcherObserver::ThreadWatcherObserver( 769 const base::TimeDelta& wakeup_interval) 770 : last_wakeup_time_(base::TimeTicks::Now()), 771 wakeup_interval_(wakeup_interval) { 772 CHECK(!g_thread_watcher_observer_); 773 g_thread_watcher_observer_ = this; 774 } 775 776 ThreadWatcherObserver::~ThreadWatcherObserver() { 777 DCHECK(this == g_thread_watcher_observer_); 778 g_thread_watcher_observer_ = NULL; 779 } 780 781 // static 782 void ThreadWatcherObserver::SetupNotifications( 783 const base::TimeDelta& wakeup_interval) { 784 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 785 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval); 786 observer->registrar_.Add( 787 observer, 788 chrome::NOTIFICATION_BROWSER_OPENED, 789 content::NotificationService::AllBrowserContextsAndSources()); 790 observer->registrar_.Add(observer, 791 chrome::NOTIFICATION_BROWSER_CLOSED, 792 content::NotificationService::AllSources()); 793 observer->registrar_.Add(observer, 794 chrome::NOTIFICATION_TAB_PARENTED, 795 content::NotificationService::AllSources()); 796 observer->registrar_.Add(observer, 797 chrome::NOTIFICATION_TAB_CLOSING, 798 content::NotificationService::AllSources()); 799 observer->registrar_.Add(observer, 800 content::NOTIFICATION_LOAD_START, 801 content::NotificationService::AllSources()); 802 observer->registrar_.Add(observer, 803 content::NOTIFICATION_LOAD_STOP, 804 content::NotificationService::AllSources()); 805 observer->registrar_.Add(observer, 806 content::NOTIFICATION_RENDERER_PROCESS_CLOSED, 807 content::NotificationService::AllSources()); 808 observer->registrar_.Add(observer, 809 content::NOTIFICATION_RENDER_WIDGET_HOST_HANG, 810 content::NotificationService::AllSources()); 811 observer->registrar_.Add(observer, 812 chrome::NOTIFICATION_OMNIBOX_OPENED_URL, 813 content::NotificationService::AllSources()); 814 } 815 816 // static 817 void ThreadWatcherObserver::RemoveNotifications() { 818 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 819 if (!g_thread_watcher_observer_) 820 return; 821 g_thread_watcher_observer_->registrar_.RemoveAll(); 822 delete g_thread_watcher_observer_; 823 } 824 825 void ThreadWatcherObserver::Observe( 826 int type, 827 const content::NotificationSource& source, 828 const content::NotificationDetails& details) { 829 // There is some user activity, see if thread watchers are to be awakened. 830 base::TimeTicks now = base::TimeTicks::Now(); 831 if ((now - last_wakeup_time_) < wakeup_interval_) 832 return; 833 last_wakeup_time_ = now; 834 WatchDogThread::PostTask( 835 FROM_HERE, 836 base::Bind(&ThreadWatcherList::WakeUpAll)); 837 } 838 839 // WatchDogThread methods and members. 840 841 // This lock protects g_watchdog_thread. 842 static base::LazyInstance<base::Lock>::Leaky 843 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER; 844 845 // The singleton of this class. 846 static WatchDogThread* g_watchdog_thread = NULL; 847 848 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") { 849 } 850 851 WatchDogThread::~WatchDogThread() { 852 Stop(); 853 } 854 855 // static 856 bool WatchDogThread::CurrentlyOnWatchDogThread() { 857 base::AutoLock lock(g_watchdog_lock.Get()); 858 return g_watchdog_thread && 859 g_watchdog_thread->message_loop() == base::MessageLoop::current(); 860 } 861 862 // static 863 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here, 864 const base::Closure& task) { 865 return PostTaskHelper(from_here, task, base::TimeDelta()); 866 } 867 868 // static 869 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here, 870 const base::Closure& task, 871 base::TimeDelta delay) { 872 return PostTaskHelper(from_here, task, delay); 873 } 874 875 // static 876 bool WatchDogThread::PostTaskHelper( 877 const tracked_objects::Location& from_here, 878 const base::Closure& task, 879 base::TimeDelta delay) { 880 { 881 base::AutoLock lock(g_watchdog_lock.Get()); 882 883 base::MessageLoop* message_loop = g_watchdog_thread ? 884 g_watchdog_thread->message_loop() : NULL; 885 if (message_loop) { 886 message_loop->PostDelayedTask(from_here, task, delay); 887 return true; 888 } 889 } 890 891 return false; 892 } 893 894 void WatchDogThread::Init() { 895 // This thread shouldn't be allowed to perform any blocking disk I/O. 896 base::ThreadRestrictions::SetIOAllowed(false); 897 898 base::AutoLock lock(g_watchdog_lock.Get()); 899 CHECK(!g_watchdog_thread); 900 g_watchdog_thread = this; 901 } 902 903 void WatchDogThread::CleanUp() { 904 base::AutoLock lock(g_watchdog_lock.Get()); 905 g_watchdog_thread = NULL; 906 } 907 908 namespace { 909 910 // StartupWatchDogThread methods and members. 911 // 912 // Class for detecting hangs during startup. 913 class StartupWatchDogThread : public base::Watchdog { 914 public: 915 // Constructor specifies how long the StartupWatchDogThread will wait before 916 // alarming. 917 explicit StartupWatchDogThread(const base::TimeDelta& duration) 918 : base::Watchdog(duration, "Startup watchdog thread", true) { 919 #if defined(OS_ANDROID) 920 // TODO(rtenneti): Delete this code, after getting data. 921 start_time_clock_= base::Time::Now(); 922 start_time_monotonic_ = base::TimeTicks::Now(); 923 start_time_thread_now_ = base::TimeTicks::IsThreadNowSupported() 924 ? base::TimeTicks::ThreadNow() : base::TimeTicks::Now(); 925 #endif // OS_ANDROID 926 } 927 928 // Alarm is called if the time expires after an Arm() without someone calling 929 // Disarm(). When Alarm goes off, in release mode we get the crash dump 930 // without crashing and in debug mode we break into the debugger. 931 virtual void Alarm() OVERRIDE { 932 #if !defined(NDEBUG) 933 StartupHang(); 934 return; 935 #elif !defined(OS_ANDROID) 936 WatchDogThread::PostTask(FROM_HERE, base::Bind(&StartupHang)); 937 return; 938 #else // Android release: gather stats to figure out when to crash. 939 // TODO(rtenneti): Delete this code, after getting data. 940 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeDuration", 941 base::Time::Now() - start_time_clock_); 942 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeTicksDuration", 943 base::TimeTicks::Now() - start_time_monotonic_); 944 if (base::TimeTicks::IsThreadNowSupported()) { 945 UMA_HISTOGRAM_TIMES( 946 "StartupTimeBomb.Alarm.ThreadNowDuration", 947 base::TimeTicks::ThreadNow() - start_time_thread_now_); 948 } 949 return; 950 #endif // OS_ANDROID 951 } 952 953 private: 954 #if defined(OS_ANDROID) 955 // TODO(rtenneti): Delete this code, after getting data. 956 base::Time start_time_clock_; 957 base::TimeTicks start_time_monotonic_; 958 base::TimeTicks start_time_thread_now_; 959 #endif // OS_ANDROID 960 961 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread); 962 }; 963 964 // ShutdownWatchDogThread methods and members. 965 // 966 // Class for detecting hangs during shutdown. 967 class ShutdownWatchDogThread : public base::Watchdog { 968 public: 969 // Constructor specifies how long the ShutdownWatchDogThread will wait before 970 // alarming. 971 explicit ShutdownWatchDogThread(const base::TimeDelta& duration) 972 : base::Watchdog(duration, "Shutdown watchdog thread", true) { 973 } 974 975 // Alarm is called if the time expires after an Arm() without someone calling 976 // Disarm(). We crash the browser if this method is called. 977 virtual void Alarm() OVERRIDE { 978 ShutdownHang(); 979 } 980 981 private: 982 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread); 983 }; 984 } // namespace 985 986 // StartupTimeBomb methods and members. 987 // 988 // static 989 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL; 990 991 StartupTimeBomb::StartupTimeBomb() 992 : startup_watchdog_(NULL), 993 thread_id_(base::PlatformThread::CurrentId()) { 994 CHECK(!g_startup_timebomb_); 995 g_startup_timebomb_ = this; 996 } 997 998 StartupTimeBomb::~StartupTimeBomb() { 999 DCHECK(this == g_startup_timebomb_); 1000 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1001 if (startup_watchdog_) 1002 Disarm(); 1003 g_startup_timebomb_ = NULL; 1004 } 1005 1006 void StartupTimeBomb::Arm(const base::TimeDelta& duration) { 1007 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1008 DCHECK(!startup_watchdog_); 1009 startup_watchdog_ = new StartupWatchDogThread(duration); 1010 startup_watchdog_->Arm(); 1011 return; 1012 } 1013 1014 void StartupTimeBomb::Disarm() { 1015 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1016 if (startup_watchdog_) { 1017 startup_watchdog_->Disarm(); 1018 startup_watchdog_->Cleanup(); 1019 DeleteStartupWatchdog(); 1020 } 1021 } 1022 1023 void StartupTimeBomb::DeleteStartupWatchdog() { 1024 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1025 if (startup_watchdog_->IsJoinable()) { 1026 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns 1027 // very fast. 1028 base::ThreadRestrictions::SetIOAllowed(true); 1029 delete startup_watchdog_; 1030 startup_watchdog_ = NULL; 1031 return; 1032 } 1033 base::MessageLoop::current()->PostDelayedTask( 1034 FROM_HERE, 1035 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog, 1036 base::Unretained(this)), 1037 base::TimeDelta::FromSeconds(10)); 1038 } 1039 1040 // static 1041 void StartupTimeBomb::DisarmStartupTimeBomb() { 1042 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 1043 if (g_startup_timebomb_) 1044 g_startup_timebomb_->Disarm(); 1045 } 1046 1047 // ShutdownWatcherHelper methods and members. 1048 // 1049 // ShutdownWatcherHelper is a wrapper class for detecting hangs during 1050 // shutdown. 1051 ShutdownWatcherHelper::ShutdownWatcherHelper() 1052 : shutdown_watchdog_(NULL), 1053 thread_id_(base::PlatformThread::CurrentId()) { 1054 } 1055 1056 ShutdownWatcherHelper::~ShutdownWatcherHelper() { 1057 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1058 if (shutdown_watchdog_) { 1059 shutdown_watchdog_->Disarm(); 1060 delete shutdown_watchdog_; 1061 shutdown_watchdog_ = NULL; 1062 } 1063 } 1064 1065 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) { 1066 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId()); 1067 DCHECK(!shutdown_watchdog_); 1068 base::TimeDelta actual_duration = duration; 1069 1070 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 1071 if (channel == chrome::VersionInfo::CHANNEL_STABLE) { 1072 actual_duration *= 20; 1073 } else if (channel == chrome::VersionInfo::CHANNEL_BETA || 1074 channel == chrome::VersionInfo::CHANNEL_DEV) { 1075 actual_duration *= 10; 1076 } 1077 1078 #if defined(OS_WIN) 1079 // On Windows XP, give twice the time for shutdown. 1080 if (base::win::GetVersion() <= base::win::VERSION_XP) 1081 actual_duration *= 2; 1082 #endif 1083 1084 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration); 1085 shutdown_watchdog_->Arm(); 1086 } 1087