Home | History | Annotate | Download | only in gpu
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #if defined(OS_WIN)
      6 #include <windows.h>
      7 #endif
      8 
      9 #include "content/gpu/gpu_watchdog_thread.h"
     10 
     11 #include "base/bind.h"
     12 #include "base/bind_helpers.h"
     13 #include "base/command_line.h"
     14 #include "base/compiler_specific.h"
     15 #include "base/power_monitor/power_monitor.h"
     16 #include "base/process/process.h"
     17 #include "build/build_config.h"
     18 #include "content/public/common/content_switches.h"
     19 #include "content/public/common/result_codes.h"
     20 
     21 namespace content {
     22 namespace {
     23 const int64 kCheckPeriodMs = 2000;
     24 }  // namespace
     25 
     26 GpuWatchdogThread::GpuWatchdogThread(int timeout)
     27     : base::Thread("Watchdog"),
     28       watched_message_loop_(base::MessageLoop::current()),
     29       timeout_(base::TimeDelta::FromMilliseconds(timeout)),
     30       armed_(false),
     31 #if defined(OS_WIN)
     32       watched_thread_handle_(0),
     33       arm_cpu_time_(),
     34 #endif
     35       task_observer_(this),
     36       weak_factory_(this),
     37       suspended_(false) {
     38   DCHECK(timeout >= 0);
     39 
     40 #if defined(OS_WIN)
     41   // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
     42   // to identify another. DuplicateHandle creates a "real" handle that can be
     43   // used for this purpose.
     44   BOOL result = DuplicateHandle(GetCurrentProcess(),
     45                                 GetCurrentThread(),
     46                                 GetCurrentProcess(),
     47                                 &watched_thread_handle_,
     48                                 THREAD_QUERY_INFORMATION,
     49                                 FALSE,
     50                                 0);
     51   DCHECK(result);
     52 #endif
     53 
     54   watched_message_loop_->AddTaskObserver(&task_observer_);
     55 }
     56 
     57 void GpuWatchdogThread::PostAcknowledge() {
     58   // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
     59   // the method factory. Rely on reference counting instead.
     60   message_loop()->PostTask(
     61       FROM_HERE,
     62       base::Bind(&GpuWatchdogThread::OnAcknowledge, this));
     63 }
     64 
     65 void GpuWatchdogThread::CheckArmed() {
     66   // Acknowledge the watchdog if it has armed itself. The watchdog will not
     67   // change its armed state until it is acknowledged.
     68   if (armed()) {
     69     PostAcknowledge();
     70   }
     71 }
     72 
     73 void GpuWatchdogThread::Init() {
     74   // Schedule the first check.
     75   OnCheck(false);
     76 }
     77 
     78 void GpuWatchdogThread::CleanUp() {
     79   weak_factory_.InvalidateWeakPtrs();
     80 }
     81 
     82 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
     83     GpuWatchdogThread* watchdog)
     84     : watchdog_(watchdog) {
     85 }
     86 
     87 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
     88 }
     89 
     90 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
     91     const base::PendingTask& pending_task) {
     92   watchdog_->CheckArmed();
     93 }
     94 
     95 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
     96     const base::PendingTask& pending_task) {
     97   watchdog_->CheckArmed();
     98 }
     99 
    100 GpuWatchdogThread::~GpuWatchdogThread() {
    101   // Verify that the thread was explicitly stopped. If the thread is stopped
    102   // implicitly by the destructor, CleanUp() will not be called.
    103   DCHECK(!weak_factory_.HasWeakPtrs());
    104 
    105 #if defined(OS_WIN)
    106   CloseHandle(watched_thread_handle_);
    107 #endif
    108 
    109   base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
    110   if (power_monitor)
    111     power_monitor->RemoveObserver(this);
    112 
    113   watched_message_loop_->RemoveTaskObserver(&task_observer_);
    114 }
    115 
    116 void GpuWatchdogThread::OnAcknowledge() {
    117   CHECK(base::PlatformThread::CurrentId() == thread_id());
    118 
    119   // The check has already been acknowledged and another has already been
    120   // scheduled by a previous call to OnAcknowledge. It is normal for a
    121   // watched thread to see armed_ being true multiple times before
    122   // the OnAcknowledge task is run on the watchdog thread.
    123   if (!armed_)
    124     return;
    125 
    126   // Revoke any pending hang termination.
    127   weak_factory_.InvalidateWeakPtrs();
    128   armed_ = false;
    129 
    130   if (suspended_)
    131     return;
    132 
    133   // If it took a long time for the acknowledgement, assume the computer was
    134   // recently suspended.
    135   bool was_suspended = (base::Time::Now() > suspension_timeout_);
    136 
    137   // The monitored thread has responded. Post a task to check it again.
    138   message_loop()->PostDelayedTask(
    139       FROM_HERE,
    140       base::Bind(&GpuWatchdogThread::OnCheck, weak_factory_.GetWeakPtr(),
    141           was_suspended),
    142       base::TimeDelta::FromMilliseconds(kCheckPeriodMs));
    143 }
    144 
    145 void GpuWatchdogThread::OnCheck(bool after_suspend) {
    146   CHECK(base::PlatformThread::CurrentId() == thread_id());
    147 
    148   // Do not create any new termination tasks if one has already been created
    149   // or the system is suspended.
    150   if (armed_ || suspended_)
    151     return;
    152 
    153   // Must set armed before posting the task. This task might be the only task
    154   // that will activate the TaskObserver on the watched thread and it must not
    155   // miss the false -> true transition.
    156   armed_ = true;
    157 
    158 #if defined(OS_WIN)
    159   arm_cpu_time_ = GetWatchedThreadTime();
    160 #endif
    161 
    162   // Immediately after the computer is woken up from being suspended it might
    163   // be pretty sluggish, so allow some extra time before the next timeout.
    164   base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
    165   suspension_timeout_ = base::Time::Now() + timeout * 2;
    166 
    167   // Post a task to the monitored thread that does nothing but wake up the
    168   // TaskObserver. Any other tasks that are pending on the watched thread will
    169   // also wake up the observer. This simply ensures there is at least one.
    170   watched_message_loop_->PostTask(
    171       FROM_HERE,
    172       base::Bind(&base::DoNothing));
    173 
    174   // Post a task to the watchdog thread to exit if the monitored thread does
    175   // not respond in time.
    176   message_loop()->PostDelayedTask(
    177       FROM_HERE,
    178       base::Bind(
    179           &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
    180           weak_factory_.GetWeakPtr()),
    181       timeout);
    182 }
    183 
    184 // Use the --disable-gpu-watchdog command line switch to disable this.
    185 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
    186   // Should not get here while the system is suspended.
    187   DCHECK(!suspended_);
    188 
    189 #if defined(OS_WIN)
    190   // Defer termination until a certain amount of CPU time has elapsed on the
    191   // watched thread.
    192   base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
    193   if (time_since_arm < timeout_) {
    194     message_loop()->PostDelayedTask(
    195         FROM_HERE,
    196         base::Bind(
    197             &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
    198             weak_factory_.GetWeakPtr()),
    199         timeout_ - time_since_arm);
    200     return;
    201   }
    202 #endif
    203 
    204   // If the watchdog woke up significantly behind schedule, disarm and reset
    205   // the watchdog check. This is to prevent the watchdog thread from terminating
    206   // when a machine wakes up from sleep or hibernation, which would otherwise
    207   // appear to be a hang.
    208   if (base::Time::Now() > suspension_timeout_) {
    209     armed_ = false;
    210     OnCheck(true);
    211     return;
    212   }
    213 
    214   // For minimal developer annoyance, don't keep terminating. You need to skip
    215   // the call to base::Process::Terminate below in a debugger for this to be
    216   // useful.
    217   static bool terminated = false;
    218   if (terminated)
    219     return;
    220 
    221 #if defined(OS_WIN)
    222   if (IsDebuggerPresent())
    223     return;
    224 #endif
    225 
    226   LOG(ERROR) << "The GPU process hung. Terminating after "
    227              << timeout_.InMilliseconds() << " ms.";
    228 
    229   // Deliberately crash the process to create a crash dump.
    230   *((volatile int*)0) = 0x1337;
    231 
    232   terminated = true;
    233 }
    234 
    235 void GpuWatchdogThread::AddPowerObserver() {
    236   message_loop()->PostTask(
    237       FROM_HERE,
    238       base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this));
    239 }
    240 
    241 void GpuWatchdogThread::OnAddPowerObserver() {
    242   base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
    243   DCHECK(power_monitor);
    244   power_monitor->AddObserver(this);
    245 }
    246 
    247 void GpuWatchdogThread::OnSuspend() {
    248   suspended_ = true;
    249 
    250   // When suspending force an acknowledgement to cancel any pending termination
    251   // tasks.
    252   OnAcknowledge();
    253 }
    254 
    255 void GpuWatchdogThread::OnResume() {
    256   suspended_ = false;
    257 
    258   // After resuming jump-start the watchdog again.
    259   armed_ = false;
    260   OnCheck(true);
    261 }
    262 
    263 #if defined(OS_WIN)
    264 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() {
    265   FILETIME creation_time;
    266   FILETIME exit_time;
    267   FILETIME user_time;
    268   FILETIME kernel_time;
    269   BOOL result = GetThreadTimes(watched_thread_handle_,
    270                                &creation_time,
    271                                &exit_time,
    272                                &kernel_time,
    273                                &user_time);
    274   DCHECK(result);
    275 
    276   ULARGE_INTEGER user_time64;
    277   user_time64.HighPart = user_time.dwHighDateTime;
    278   user_time64.LowPart = user_time.dwLowDateTime;
    279 
    280   ULARGE_INTEGER kernel_time64;
    281   kernel_time64.HighPart = kernel_time.dwHighDateTime;
    282   kernel_time64.LowPart = kernel_time.dwLowDateTime;
    283 
    284   // Time is reported in units of 100 nanoseconds. Kernel and user time are
    285   // summed to deal with to kinds of hangs. One is where the GPU process is
    286   // stuck in user level, never calling into the kernel and kernel time is
    287   // not increasing. The other is where either the kernel hangs and never
    288   // returns to user level or where user level code
    289   // calls into kernel level repeatedly, giving up its quanta before it is
    290   // tracked, for example a loop that repeatedly Sleeps.
    291   return base::TimeDelta::FromMilliseconds(static_cast<int64>(
    292       (user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
    293 }
    294 #endif
    295 
    296 }  // namespace content
    297