1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if defined(OS_WIN) 6 #include <windows.h> 7 #endif 8 9 #include "content/gpu/gpu_watchdog_thread.h" 10 11 #include "base/bind.h" 12 #include "base/bind_helpers.h" 13 #include "base/command_line.h" 14 #include "base/compiler_specific.h" 15 #include "base/power_monitor/power_monitor.h" 16 #include "base/process/process.h" 17 #include "build/build_config.h" 18 #include "content/public/common/content_switches.h" 19 #include "content/public/common/result_codes.h" 20 21 namespace content { 22 namespace { 23 const int64 kCheckPeriodMs = 2000; 24 } // namespace 25 26 GpuWatchdogThread::GpuWatchdogThread(int timeout) 27 : base::Thread("Watchdog"), 28 watched_message_loop_(base::MessageLoop::current()), 29 timeout_(base::TimeDelta::FromMilliseconds(timeout)), 30 armed_(false), 31 #if defined(OS_WIN) 32 watched_thread_handle_(0), 33 arm_cpu_time_(), 34 #endif 35 task_observer_(this), 36 weak_factory_(this), 37 suspended_(false) { 38 DCHECK(timeout >= 0); 39 40 #if defined(OS_WIN) 41 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread 42 // to identify another. DuplicateHandle creates a "real" handle that can be 43 // used for this purpose. 44 BOOL result = DuplicateHandle(GetCurrentProcess(), 45 GetCurrentThread(), 46 GetCurrentProcess(), 47 &watched_thread_handle_, 48 THREAD_QUERY_INFORMATION, 49 FALSE, 50 0); 51 DCHECK(result); 52 #endif 53 54 watched_message_loop_->AddTaskObserver(&task_observer_); 55 } 56 57 void GpuWatchdogThread::PostAcknowledge() { 58 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use 59 // the method factory. Rely on reference counting instead. 60 message_loop()->PostTask( 61 FROM_HERE, 62 base::Bind(&GpuWatchdogThread::OnAcknowledge, this)); 63 } 64 65 void GpuWatchdogThread::CheckArmed() { 66 // Acknowledge the watchdog if it has armed itself. The watchdog will not 67 // change its armed state until it is acknowledged. 68 if (armed()) { 69 PostAcknowledge(); 70 } 71 } 72 73 void GpuWatchdogThread::Init() { 74 // Schedule the first check. 75 OnCheck(false); 76 } 77 78 void GpuWatchdogThread::CleanUp() { 79 weak_factory_.InvalidateWeakPtrs(); 80 } 81 82 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver( 83 GpuWatchdogThread* watchdog) 84 : watchdog_(watchdog) { 85 } 86 87 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() { 88 } 89 90 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask( 91 const base::PendingTask& pending_task) { 92 watchdog_->CheckArmed(); 93 } 94 95 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask( 96 const base::PendingTask& pending_task) { 97 watchdog_->CheckArmed(); 98 } 99 100 GpuWatchdogThread::~GpuWatchdogThread() { 101 // Verify that the thread was explicitly stopped. If the thread is stopped 102 // implicitly by the destructor, CleanUp() will not be called. 103 DCHECK(!weak_factory_.HasWeakPtrs()); 104 105 #if defined(OS_WIN) 106 CloseHandle(watched_thread_handle_); 107 #endif 108 109 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); 110 if (power_monitor) 111 power_monitor->RemoveObserver(this); 112 113 watched_message_loop_->RemoveTaskObserver(&task_observer_); 114 } 115 116 void GpuWatchdogThread::OnAcknowledge() { 117 CHECK(base::PlatformThread::CurrentId() == thread_id()); 118 119 // The check has already been acknowledged and another has already been 120 // scheduled by a previous call to OnAcknowledge. It is normal for a 121 // watched thread to see armed_ being true multiple times before 122 // the OnAcknowledge task is run on the watchdog thread. 123 if (!armed_) 124 return; 125 126 // Revoke any pending hang termination. 127 weak_factory_.InvalidateWeakPtrs(); 128 armed_ = false; 129 130 if (suspended_) 131 return; 132 133 // If it took a long time for the acknowledgement, assume the computer was 134 // recently suspended. 135 bool was_suspended = (base::Time::Now() > suspension_timeout_); 136 137 // The monitored thread has responded. Post a task to check it again. 138 message_loop()->PostDelayedTask( 139 FROM_HERE, 140 base::Bind(&GpuWatchdogThread::OnCheck, weak_factory_.GetWeakPtr(), 141 was_suspended), 142 base::TimeDelta::FromMilliseconds(kCheckPeriodMs)); 143 } 144 145 void GpuWatchdogThread::OnCheck(bool after_suspend) { 146 CHECK(base::PlatformThread::CurrentId() == thread_id()); 147 148 // Do not create any new termination tasks if one has already been created 149 // or the system is suspended. 150 if (armed_ || suspended_) 151 return; 152 153 // Must set armed before posting the task. This task might be the only task 154 // that will activate the TaskObserver on the watched thread and it must not 155 // miss the false -> true transition. 156 armed_ = true; 157 158 #if defined(OS_WIN) 159 arm_cpu_time_ = GetWatchedThreadTime(); 160 #endif 161 162 // Immediately after the computer is woken up from being suspended it might 163 // be pretty sluggish, so allow some extra time before the next timeout. 164 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1); 165 suspension_timeout_ = base::Time::Now() + timeout * 2; 166 167 // Post a task to the monitored thread that does nothing but wake up the 168 // TaskObserver. Any other tasks that are pending on the watched thread will 169 // also wake up the observer. This simply ensures there is at least one. 170 watched_message_loop_->PostTask( 171 FROM_HERE, 172 base::Bind(&base::DoNothing)); 173 174 // Post a task to the watchdog thread to exit if the monitored thread does 175 // not respond in time. 176 message_loop()->PostDelayedTask( 177 FROM_HERE, 178 base::Bind( 179 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, 180 weak_factory_.GetWeakPtr()), 181 timeout); 182 } 183 184 // Use the --disable-gpu-watchdog command line switch to disable this. 185 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() { 186 // Should not get here while the system is suspended. 187 DCHECK(!suspended_); 188 189 #if defined(OS_WIN) 190 // Defer termination until a certain amount of CPU time has elapsed on the 191 // watched thread. 192 base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_; 193 if (time_since_arm < timeout_) { 194 message_loop()->PostDelayedTask( 195 FROM_HERE, 196 base::Bind( 197 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, 198 weak_factory_.GetWeakPtr()), 199 timeout_ - time_since_arm); 200 return; 201 } 202 #endif 203 204 // If the watchdog woke up significantly behind schedule, disarm and reset 205 // the watchdog check. This is to prevent the watchdog thread from terminating 206 // when a machine wakes up from sleep or hibernation, which would otherwise 207 // appear to be a hang. 208 if (base::Time::Now() > suspension_timeout_) { 209 armed_ = false; 210 OnCheck(true); 211 return; 212 } 213 214 // For minimal developer annoyance, don't keep terminating. You need to skip 215 // the call to base::Process::Terminate below in a debugger for this to be 216 // useful. 217 static bool terminated = false; 218 if (terminated) 219 return; 220 221 #if defined(OS_WIN) 222 if (IsDebuggerPresent()) 223 return; 224 #endif 225 226 LOG(ERROR) << "The GPU process hung. Terminating after " 227 << timeout_.InMilliseconds() << " ms."; 228 229 // Deliberately crash the process to create a crash dump. 230 *((volatile int*)0) = 0x1337; 231 232 terminated = true; 233 } 234 235 void GpuWatchdogThread::AddPowerObserver() { 236 message_loop()->PostTask( 237 FROM_HERE, 238 base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this)); 239 } 240 241 void GpuWatchdogThread::OnAddPowerObserver() { 242 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); 243 DCHECK(power_monitor); 244 power_monitor->AddObserver(this); 245 } 246 247 void GpuWatchdogThread::OnSuspend() { 248 suspended_ = true; 249 250 // When suspending force an acknowledgement to cancel any pending termination 251 // tasks. 252 OnAcknowledge(); 253 } 254 255 void GpuWatchdogThread::OnResume() { 256 suspended_ = false; 257 258 // After resuming jump-start the watchdog again. 259 armed_ = false; 260 OnCheck(true); 261 } 262 263 #if defined(OS_WIN) 264 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() { 265 FILETIME creation_time; 266 FILETIME exit_time; 267 FILETIME user_time; 268 FILETIME kernel_time; 269 BOOL result = GetThreadTimes(watched_thread_handle_, 270 &creation_time, 271 &exit_time, 272 &kernel_time, 273 &user_time); 274 DCHECK(result); 275 276 ULARGE_INTEGER user_time64; 277 user_time64.HighPart = user_time.dwHighDateTime; 278 user_time64.LowPart = user_time.dwLowDateTime; 279 280 ULARGE_INTEGER kernel_time64; 281 kernel_time64.HighPart = kernel_time.dwHighDateTime; 282 kernel_time64.LowPart = kernel_time.dwLowDateTime; 283 284 // Time is reported in units of 100 nanoseconds. Kernel and user time are 285 // summed to deal with to kinds of hangs. One is where the GPU process is 286 // stuck in user level, never calling into the kernel and kernel time is 287 // not increasing. The other is where either the kernel hangs and never 288 // returns to user level or where user level code 289 // calls into kernel level repeatedly, giving up its quanta before it is 290 // tracked, for example a loop that repeatedly Sleeps. 291 return base::TimeDelta::FromMilliseconds(static_cast<int64>( 292 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000)); 293 } 294 #endif 295 296 } // namespace content 297