1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import android.app.IActivityController; 20 import android.os.Binder; 21 import android.os.RemoteException; 22 import com.android.server.am.ActivityManagerService; 23 import com.android.server.power.PowerManagerService; 24 25 import android.app.AlarmManager; 26 import android.app.PendingIntent; 27 import android.content.BroadcastReceiver; 28 import android.content.ContentResolver; 29 import android.content.Context; 30 import android.content.Intent; 31 import android.content.IntentFilter; 32 import android.os.BatteryManager; 33 import android.os.Debug; 34 import android.os.Handler; 35 import android.os.Looper; 36 import android.os.Message; 37 import android.os.Process; 38 import android.os.ServiceManager; 39 import android.os.SystemClock; 40 import android.os.SystemProperties; 41 import android.util.EventLog; 42 import android.util.Log; 43 import android.util.Slog; 44 45 import java.io.File; 46 import java.io.FileWriter; 47 import java.io.IOException; 48 import java.util.ArrayList; 49 import java.util.Calendar; 50 51 /** This class calls its monitor every minute. Killing this process if they don't return **/ 52 public class Watchdog extends Thread { 53 static final String TAG = "Watchdog"; 54 static final boolean localLOGV = false || false; 55 56 // Set this to true to use debug default values. 57 static final boolean DB = false; 58 59 // Set this to true to have the watchdog record kernel thread stacks when it fires 60 static final boolean RECORD_KERNEL_THREADS = true; 61 62 static final int MONITOR = 2718; 63 64 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 65 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 66 67 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 68 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 69 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 70 71 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 72 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 73 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 74 75 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 76 77 static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { 78 "/system/bin/mediaserver", 79 "/system/bin/sdcard", 80 "/system/bin/surfaceflinger" 81 }; 82 83 static Watchdog sWatchdog; 84 85 /* This handler will be used to post message back onto the main thread */ 86 final Handler mHandler; 87 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 88 ContentResolver mResolver; 89 BatteryService mBattery; 90 PowerManagerService mPower; 91 AlarmManagerService mAlarm; 92 ActivityManagerService mActivity; 93 boolean mCompleted; 94 Monitor mCurrentMonitor; 95 96 int mPhonePid; 97 IActivityController mController; 98 boolean mAllowRestart = true; 99 100 final Calendar mCalendar = Calendar.getInstance(); 101 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 102 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 103 boolean mNeedScheduledCheck; 104 PendingIntent mCheckupIntent; 105 PendingIntent mRebootIntent; 106 107 long mBootTime; 108 int mRebootInterval; 109 110 boolean mReqRebootNoWait; // should wait for one interval before reboot? 111 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 112 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 113 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 114 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 115 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 116 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 117 118 /** 119 * Used for scheduling monitor callbacks and checking memory usage. 120 */ 121 final class HeartbeatHandler extends Handler { 122 HeartbeatHandler(Looper looper) { 123 super(looper); 124 } 125 126 @Override 127 public void handleMessage(Message msg) { 128 switch (msg.what) { 129 case MONITOR: { 130 // See if we should force a reboot. 131 int rebootInterval = mReqRebootInterval >= 0 132 ? mReqRebootInterval : REBOOT_DEFAULT_INTERVAL; 133 if (mRebootInterval != rebootInterval) { 134 mRebootInterval = rebootInterval; 135 // We have been running long enough that a reboot can 136 // be considered... 137 checkReboot(false); 138 } 139 140 final int size = mMonitors.size(); 141 for (int i = 0 ; i < size ; i++) { 142 synchronized (Watchdog.this) { 143 mCurrentMonitor = mMonitors.get(i); 144 } 145 mCurrentMonitor.monitor(); 146 } 147 148 synchronized (Watchdog.this) { 149 mCompleted = true; 150 mCurrentMonitor = null; 151 } 152 } break; 153 } 154 } 155 } 156 157 final class RebootReceiver extends BroadcastReceiver { 158 @Override 159 public void onReceive(Context c, Intent intent) { 160 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 161 checkReboot(true); 162 } 163 } 164 165 final class RebootRequestReceiver extends BroadcastReceiver { 166 @Override 167 public void onReceive(Context c, Intent intent) { 168 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 169 mReqRebootInterval = intent.getIntExtra("interval", -1); 170 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 171 mReqRebootWindow = intent.getIntExtra("window", -1); 172 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 173 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 174 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 175 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 176 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 177 mReqRecheckInterval, mReqRebootStartTime, 178 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 179 checkReboot(true); 180 } 181 } 182 183 public interface Monitor { 184 void monitor(); 185 } 186 187 public static Watchdog getInstance() { 188 if (sWatchdog == null) { 189 sWatchdog = new Watchdog(); 190 } 191 192 return sWatchdog; 193 } 194 195 private Watchdog() { 196 super("watchdog"); 197 // Explicitly bind the HeartbeatHandler to run on the ServerThread, so 198 // that it can't get accidentally bound to another thread. 199 mHandler = new HeartbeatHandler(Looper.getMainLooper()); 200 } 201 202 public void init(Context context, BatteryService battery, 203 PowerManagerService power, AlarmManagerService alarm, 204 ActivityManagerService activity) { 205 mResolver = context.getContentResolver(); 206 mBattery = battery; 207 mPower = power; 208 mAlarm = alarm; 209 mActivity = activity; 210 211 context.registerReceiver(new RebootReceiver(), 212 new IntentFilter(REBOOT_ACTION)); 213 mRebootIntent = PendingIntent.getBroadcast(context, 214 0, new Intent(REBOOT_ACTION), 0); 215 216 context.registerReceiver(new RebootRequestReceiver(), 217 new IntentFilter(Intent.ACTION_REBOOT), 218 android.Manifest.permission.REBOOT, null); 219 220 mBootTime = System.currentTimeMillis(); 221 } 222 223 public void processStarted(String name, int pid) { 224 synchronized (this) { 225 if ("com.android.phone".equals(name)) { 226 mPhonePid = pid; 227 } 228 } 229 } 230 231 public void setActivityController(IActivityController controller) { 232 synchronized (this) { 233 mController = controller; 234 } 235 } 236 237 public void setAllowRestart(boolean allowRestart) { 238 synchronized (this) { 239 mAllowRestart = allowRestart; 240 } 241 } 242 243 public void addMonitor(Monitor monitor) { 244 synchronized (this) { 245 if (isAlive()) { 246 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 247 } 248 mMonitors.add(monitor); 249 } 250 } 251 252 void checkReboot(boolean fromAlarm) { 253 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 254 : REBOOT_DEFAULT_INTERVAL; 255 mRebootInterval = rebootInterval; 256 if (rebootInterval <= 0) { 257 // No reboot interval requested. 258 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 259 mAlarm.remove(mRebootIntent); 260 return; 261 } 262 263 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 264 : REBOOT_DEFAULT_START_TIME; 265 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 266 : REBOOT_DEFAULT_WINDOW) * 1000; 267 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 268 : MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000; 269 270 retrieveBrutalityAmount(); 271 272 long realStartTime; 273 long now; 274 275 synchronized (this) { 276 now = System.currentTimeMillis(); 277 realStartTime = computeCalendarTime(mCalendar, now, 278 rebootStartTime); 279 280 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 281 if (DB || mReqRebootNoWait || 282 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 283 if (fromAlarm && rebootWindowMillis <= 0) { 284 // No reboot window -- just immediately reboot. 285 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 286 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 287 (int)rebootWindowMillis, ""); 288 rebootSystem("Checkin scheduled forced"); 289 return; 290 } 291 292 // Are we within the reboot window? 293 if (now < realStartTime) { 294 // Schedule alarm for next check interval. 295 realStartTime = computeCalendarTime(mCalendar, 296 now, rebootStartTime); 297 } else if (now < (realStartTime+rebootWindowMillis)) { 298 String doit = shouldWeBeBrutalLocked(now); 299 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 300 (int)rebootInterval, (int)rebootStartTime*1000, 301 (int)rebootWindowMillis, doit != null ? doit : ""); 302 if (doit == null) { 303 rebootSystem("Checked scheduled range"); 304 return; 305 } 306 307 // Schedule next alarm either within the window or in the 308 // next interval. 309 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 310 realStartTime = computeCalendarTime(mCalendar, 311 now + rebootIntervalMillis, rebootStartTime); 312 } else { 313 realStartTime = now + recheckInterval; 314 } 315 } else { 316 // Schedule alarm for next check interval. 317 realStartTime = computeCalendarTime(mCalendar, 318 now + rebootIntervalMillis, rebootStartTime); 319 } 320 } 321 } 322 323 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 324 + ((realStartTime-now)/1000/60) + "m from now"); 325 mAlarm.remove(mRebootIntent); 326 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 327 } 328 329 /** 330 * Perform a full reboot of the system. 331 */ 332 void rebootSystem(String reason) { 333 Slog.i(TAG, "Rebooting system because: " + reason); 334 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 335 pms.reboot(false, reason, false); 336 } 337 338 /** 339 * Load the current Gservices settings for when 340 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 341 * Must not be called with the lock held. 342 */ 343 void retrieveBrutalityAmount() { 344 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 345 : MEMCHECK_DEFAULT_MIN_SCREEN_OFF) * 1000; 346 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 347 : MEMCHECK_DEFAULT_MIN_ALARM) * 1000; 348 } 349 350 /** 351 * Determine whether it is a good time to kill, crash, or otherwise 352 * plunder the current situation for the overall long-term benefit of 353 * the world. 354 * 355 * @param curTime The current system time. 356 * @return Returns null if this is a good time, else a String with the 357 * text of why it is not a good time. 358 */ 359 String shouldWeBeBrutalLocked(long curTime) { 360 if (mBattery == null || !mBattery.isPowered(BatteryManager.BATTERY_PLUGGED_ANY)) { 361 return "battery"; 362 } 363 364 if (mMinScreenOff >= 0 && (mPower == null || 365 mPower.timeSinceScreenWasLastOn() < mMinScreenOff)) { 366 return "screen"; 367 } 368 369 if (mMinAlarm >= 0 && (mAlarm == null || 370 mAlarm.timeToNextAlarm() < mMinAlarm)) { 371 return "alarm"; 372 } 373 374 return null; 375 } 376 377 static long computeCalendarTime(Calendar c, long curTime, 378 long secondsSinceMidnight) { 379 380 // start with now 381 c.setTimeInMillis(curTime); 382 383 int val = (int)secondsSinceMidnight / (60*60); 384 c.set(Calendar.HOUR_OF_DAY, val); 385 secondsSinceMidnight -= val * (60*60); 386 val = (int)secondsSinceMidnight / 60; 387 c.set(Calendar.MINUTE, val); 388 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 389 c.set(Calendar.MILLISECOND, 0); 390 391 long newTime = c.getTimeInMillis(); 392 if (newTime < curTime) { 393 // The given time (in seconds since midnight) has already passed for today, so advance 394 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 395 c.add(Calendar.DAY_OF_MONTH, 1); 396 newTime = c.getTimeInMillis(); 397 } 398 399 return newTime; 400 } 401 402 @Override 403 public void run() { 404 boolean waitedHalf = false; 405 while (true) { 406 mCompleted = false; 407 mHandler.sendEmptyMessage(MONITOR); 408 409 410 final String name; 411 final boolean allowRestart; 412 synchronized (this) { 413 long timeout = TIME_TO_WAIT; 414 415 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 416 // wait while asleep. If the device is asleep then the thing that we are waiting 417 // to timeout on is asleep as well and won't have a chance to run, causing a false 418 // positive on when to kill things. 419 long start = SystemClock.uptimeMillis(); 420 while (timeout > 0) { 421 try { 422 wait(timeout); 423 } catch (InterruptedException e) { 424 Log.wtf(TAG, e); 425 } 426 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 427 } 428 429 if (mCompleted) { 430 // The monitors have returned. 431 waitedHalf = false; 432 continue; 433 } 434 435 if (!waitedHalf) { 436 // We've waited half the deadlock-detection interval. Pull a stack 437 // trace and wait another half. 438 ArrayList<Integer> pids = new ArrayList<Integer>(); 439 pids.add(Process.myPid()); 440 ActivityManagerService.dumpStackTraces(true, pids, null, null, 441 NATIVE_STACKS_OF_INTEREST); 442 waitedHalf = true; 443 continue; 444 } 445 446 name = (mCurrentMonitor != null) ? 447 mCurrentMonitor.getClass().getName() : "null"; 448 allowRestart = mAllowRestart; 449 } 450 451 // If we got here, that means that the system is most likely hung. 452 // First collect stack traces from all threads of the system process. 453 // Then kill this process so that the system will restart. 454 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 455 456 ArrayList<Integer> pids = new ArrayList<Integer>(); 457 pids.add(Process.myPid()); 458 if (mPhonePid > 0) pids.add(mPhonePid); 459 // Pass !waitedHalf so that just in case we somehow wind up here without having 460 // dumped the halfway stacks, we properly re-initialize the trace file. 461 final File stack = ActivityManagerService.dumpStackTraces( 462 !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST); 463 464 // Give some extra time to make sure the stack traces get written. 465 // The system's been hanging for a minute, another second or two won't hurt much. 466 SystemClock.sleep(2000); 467 468 // Pull our own kernel thread stacks as well if we're configured for that 469 if (RECORD_KERNEL_THREADS) { 470 dumpKernelStackTraces(); 471 } 472 473 // Trigger the kernel to dump all blocked threads to the kernel log 474 try { 475 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger"); 476 sysrq_trigger.write("w"); 477 sysrq_trigger.close(); 478 } catch (IOException e) { 479 Slog.e(TAG, "Failed to write to /proc/sysrq-trigger"); 480 Slog.e(TAG, e.getMessage()); 481 } 482 483 // Try to add the error to the dropbox, but assuming that the ActivityManager 484 // itself may be deadlocked. (which has happened, causing this statement to 485 // deadlock and the watchdog as a whole to be ineffective) 486 Thread dropboxThread = new Thread("watchdogWriteToDropbox") { 487 public void run() { 488 mActivity.addErrorToDropBox( 489 "watchdog", null, "system_server", null, null, 490 name, null, stack, null); 491 } 492 }; 493 dropboxThread.start(); 494 try { 495 dropboxThread.join(2000); // wait up to 2 seconds for it to return. 496 } catch (InterruptedException ignored) {} 497 498 IActivityController controller; 499 synchronized (this) { 500 controller = mController; 501 } 502 if (controller != null) { 503 Slog.i(TAG, "Reporting stuck state to activity controller"); 504 try { 505 Binder.setDumpDisabled("Service dumps disabled due to hung system process."); 506 // 1 = keep waiting, -1 = kill system 507 int res = controller.systemNotResponding(name); 508 if (res >= 0) { 509 Slog.i(TAG, "Activity controller requested to coninue to wait"); 510 waitedHalf = false; 511 continue; 512 } 513 } catch (RemoteException e) { 514 } 515 } 516 517 // Only kill the process if the debugger is not attached. 518 if (Debug.isDebuggerConnected()) { 519 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 520 } else if (!allowRestart) { 521 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process"); 522 } else { 523 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 524 Process.killProcess(Process.myPid()); 525 System.exit(10); 526 } 527 528 waitedHalf = false; 529 } 530 } 531 532 private File dumpKernelStackTraces() { 533 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 534 if (tracesPath == null || tracesPath.length() == 0) { 535 return null; 536 } 537 538 native_dumpKernelStacks(tracesPath); 539 return new File(tracesPath); 540 } 541 542 private native void native_dumpKernelStacks(String tracesPath); 543 } 544