1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if defined(OS_WIN) 6 #include <windows.h> 7 #endif 8 9 #include "content/gpu/gpu_watchdog_thread.h" 10 11 #include "base/bind.h" 12 #include "base/bind_helpers.h" 13 #include "base/command_line.h" 14 #include "base/compiler_specific.h" 15 #include "base/files/file_util.h" 16 #include "base/power_monitor/power_monitor.h" 17 #include "base/process/process.h" 18 #include "build/build_config.h" 19 #include "content/public/common/content_switches.h" 20 #include "content/public/common/result_codes.h" 21 22 namespace content { 23 namespace { 24 const int64 kCheckPeriodMs = 2000; 25 #if defined(OS_CHROMEOS) 26 const base::FilePath::CharType 27 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active"); 28 #endif 29 } // namespace 30 31 GpuWatchdogThread::GpuWatchdogThread(int timeout) 32 : base::Thread("Watchdog"), 33 watched_message_loop_(base::MessageLoop::current()), 34 timeout_(base::TimeDelta::FromMilliseconds(timeout)), 35 armed_(false), 36 #if defined(OS_WIN) 37 watched_thread_handle_(0), 38 arm_cpu_time_(), 39 #endif 40 task_observer_(this), 41 suspended_(false), 42 weak_factory_(this) { 43 DCHECK(timeout >= 0); 44 45 #if defined(OS_WIN) 46 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread 47 // to identify another. DuplicateHandle creates a "real" handle that can be 48 // used for this purpose. 49 BOOL result = DuplicateHandle(GetCurrentProcess(), 50 GetCurrentThread(), 51 GetCurrentProcess(), 52 &watched_thread_handle_, 53 THREAD_QUERY_INFORMATION, 54 FALSE, 55 0); 56 DCHECK(result); 57 #endif 58 59 #if defined(OS_CHROMEOS) 60 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r"); 61 #endif 62 watched_message_loop_->AddTaskObserver(&task_observer_); 63 } 64 65 void GpuWatchdogThread::PostAcknowledge() { 66 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use 67 // the method factory. Rely on reference counting instead. 68 message_loop()->PostTask( 69 FROM_HERE, 70 base::Bind(&GpuWatchdogThread::OnAcknowledge, this)); 71 } 72 73 void GpuWatchdogThread::CheckArmed() { 74 // Acknowledge the watchdog if it has armed itself. The watchdog will not 75 // change its armed state until it is acknowledged. 76 if (armed()) { 77 PostAcknowledge(); 78 } 79 } 80 81 void GpuWatchdogThread::Init() { 82 // Schedule the first check. 83 OnCheck(false); 84 } 85 86 void GpuWatchdogThread::CleanUp() { 87 weak_factory_.InvalidateWeakPtrs(); 88 } 89 90 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver( 91 GpuWatchdogThread* watchdog) 92 : watchdog_(watchdog) { 93 } 94 95 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() { 96 } 97 98 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask( 99 const base::PendingTask& pending_task) { 100 watchdog_->CheckArmed(); 101 } 102 103 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask( 104 const base::PendingTask& pending_task) { 105 watchdog_->CheckArmed(); 106 } 107 108 GpuWatchdogThread::~GpuWatchdogThread() { 109 // Verify that the thread was explicitly stopped. If the thread is stopped 110 // implicitly by the destructor, CleanUp() will not be called. 111 DCHECK(!weak_factory_.HasWeakPtrs()); 112 113 #if defined(OS_WIN) 114 CloseHandle(watched_thread_handle_); 115 #endif 116 117 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); 118 if (power_monitor) 119 power_monitor->RemoveObserver(this); 120 121 #if defined(OS_CHROMEOS) 122 if (tty_file_) 123 fclose(tty_file_); 124 #endif 125 126 watched_message_loop_->RemoveTaskObserver(&task_observer_); 127 } 128 129 void GpuWatchdogThread::OnAcknowledge() { 130 CHECK(base::PlatformThread::CurrentId() == thread_id()); 131 132 // The check has already been acknowledged and another has already been 133 // scheduled by a previous call to OnAcknowledge. It is normal for a 134 // watched thread to see armed_ being true multiple times before 135 // the OnAcknowledge task is run on the watchdog thread. 136 if (!armed_) 137 return; 138 139 // Revoke any pending hang termination. 140 weak_factory_.InvalidateWeakPtrs(); 141 armed_ = false; 142 143 if (suspended_) 144 return; 145 146 // If it took a long time for the acknowledgement, assume the computer was 147 // recently suspended. 148 bool was_suspended = (base::Time::Now() > suspension_timeout_); 149 150 // The monitored thread has responded. Post a task to check it again. 151 message_loop()->PostDelayedTask( 152 FROM_HERE, 153 base::Bind(&GpuWatchdogThread::OnCheck, weak_factory_.GetWeakPtr(), 154 was_suspended), 155 base::TimeDelta::FromMilliseconds(kCheckPeriodMs)); 156 } 157 158 void GpuWatchdogThread::OnCheck(bool after_suspend) { 159 CHECK(base::PlatformThread::CurrentId() == thread_id()); 160 161 // Do not create any new termination tasks if one has already been created 162 // or the system is suspended. 163 if (armed_ || suspended_) 164 return; 165 166 // Must set armed before posting the task. This task might be the only task 167 // that will activate the TaskObserver on the watched thread and it must not 168 // miss the false -> true transition. 169 armed_ = true; 170 171 #if defined(OS_WIN) 172 arm_cpu_time_ = GetWatchedThreadTime(); 173 #endif 174 175 // Immediately after the computer is woken up from being suspended it might 176 // be pretty sluggish, so allow some extra time before the next timeout. 177 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1); 178 suspension_timeout_ = base::Time::Now() + timeout * 2; 179 180 // Post a task to the monitored thread that does nothing but wake up the 181 // TaskObserver. Any other tasks that are pending on the watched thread will 182 // also wake up the observer. This simply ensures there is at least one. 183 watched_message_loop_->PostTask( 184 FROM_HERE, 185 base::Bind(&base::DoNothing)); 186 187 // Post a task to the watchdog thread to exit if the monitored thread does 188 // not respond in time. 189 message_loop()->PostDelayedTask( 190 FROM_HERE, 191 base::Bind( 192 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, 193 weak_factory_.GetWeakPtr()), 194 timeout); 195 } 196 197 // Use the --disable-gpu-watchdog command line switch to disable this. 198 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() { 199 // Should not get here while the system is suspended. 200 DCHECK(!suspended_); 201 202 #if defined(OS_WIN) 203 // Defer termination until a certain amount of CPU time has elapsed on the 204 // watched thread. 205 base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_; 206 if (time_since_arm < timeout_) { 207 message_loop()->PostDelayedTask( 208 FROM_HERE, 209 base::Bind( 210 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, 211 weak_factory_.GetWeakPtr()), 212 timeout_ - time_since_arm); 213 return; 214 } 215 #endif 216 217 // If the watchdog woke up significantly behind schedule, disarm and reset 218 // the watchdog check. This is to prevent the watchdog thread from terminating 219 // when a machine wakes up from sleep or hibernation, which would otherwise 220 // appear to be a hang. 221 if (base::Time::Now() > suspension_timeout_) { 222 armed_ = false; 223 OnCheck(true); 224 return; 225 } 226 227 // For minimal developer annoyance, don't keep terminating. You need to skip 228 // the call to base::Process::Terminate below in a debugger for this to be 229 // useful. 230 static bool terminated = false; 231 if (terminated) 232 return; 233 234 #if defined(OS_WIN) 235 if (IsDebuggerPresent()) 236 return; 237 #endif 238 239 #if defined(OS_CHROMEOS) 240 // Don't crash if we're not on tty1. This avoids noise in the GPU process 241 // crashes caused by people who use VT2 but still enable crash reporting. 242 char tty_string[8] = {0}; 243 if (tty_file_ && 244 !fseek(tty_file_, 0, SEEK_SET) && 245 fread(tty_string, 1, 7, tty_file_)) { 246 int tty_number = -1; 247 int num_res = sscanf(tty_string, "tty%d", &tty_number); 248 if (num_res == 1 && tty_number != 1) 249 return; 250 } 251 #endif 252 253 LOG(ERROR) << "The GPU process hung. Terminating after " 254 << timeout_.InMilliseconds() << " ms."; 255 256 // Deliberately crash the process to create a crash dump. 257 *((volatile int*)0) = 0x1337; 258 259 terminated = true; 260 } 261 262 void GpuWatchdogThread::AddPowerObserver() { 263 message_loop()->PostTask( 264 FROM_HERE, 265 base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this)); 266 } 267 268 void GpuWatchdogThread::OnAddPowerObserver() { 269 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); 270 DCHECK(power_monitor); 271 power_monitor->AddObserver(this); 272 } 273 274 void GpuWatchdogThread::OnSuspend() { 275 suspended_ = true; 276 277 // When suspending force an acknowledgement to cancel any pending termination 278 // tasks. 279 OnAcknowledge(); 280 } 281 282 void GpuWatchdogThread::OnResume() { 283 suspended_ = false; 284 285 // After resuming jump-start the watchdog again. 286 armed_ = false; 287 OnCheck(true); 288 } 289 290 #if defined(OS_WIN) 291 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() { 292 FILETIME creation_time; 293 FILETIME exit_time; 294 FILETIME user_time; 295 FILETIME kernel_time; 296 BOOL result = GetThreadTimes(watched_thread_handle_, 297 &creation_time, 298 &exit_time, 299 &kernel_time, 300 &user_time); 301 DCHECK(result); 302 303 ULARGE_INTEGER user_time64; 304 user_time64.HighPart = user_time.dwHighDateTime; 305 user_time64.LowPart = user_time.dwLowDateTime; 306 307 ULARGE_INTEGER kernel_time64; 308 kernel_time64.HighPart = kernel_time.dwHighDateTime; 309 kernel_time64.LowPart = kernel_time.dwLowDateTime; 310 311 // Time is reported in units of 100 nanoseconds. Kernel and user time are 312 // summed to deal with to kinds of hangs. One is where the GPU process is 313 // stuck in user level, never calling into the kernel and kernel time is 314 // not increasing. The other is where either the kernel hangs and never 315 // returns to user level or where user level code 316 // calls into kernel level repeatedly, giving up its quanta before it is 317 // tracked, for example a loop that repeatedly Sleeps. 318 return base::TimeDelta::FromMilliseconds(static_cast<int64>( 319 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000)); 320 } 321 #endif 322 323 } // namespace content 324