1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import com.android.server.am.ActivityManagerService; 20 21 import android.app.AlarmManager; 22 import android.app.PendingIntent; 23 import android.content.BroadcastReceiver; 24 import android.content.ContentResolver; 25 import android.content.Context; 26 import android.content.Intent; 27 import android.content.IntentFilter; 28 import android.os.Debug; 29 import android.os.Handler; 30 import android.os.Message; 31 import android.os.Process; 32 import android.os.ServiceManager; 33 import android.os.SystemClock; 34 import android.os.SystemProperties; 35 import android.provider.Settings; 36 import android.util.EventLog; 37 import android.util.Log; 38 import android.util.Slog; 39 40 import java.io.File; 41 import java.util.ArrayList; 42 import java.util.Calendar; 43 44 /** This class calls its monitor every minute. Killing this process if they don't return **/ 45 public class Watchdog extends Thread { 46 static final String TAG = "Watchdog"; 47 static final boolean localLOGV = false || false; 48 49 // Set this to true to use debug default values. 50 static final boolean DB = false; 51 52 // Set this to true to have the watchdog record kernel thread stacks when it fires 53 static final boolean RECORD_KERNEL_THREADS = true; 54 55 static final int MONITOR = 2718; 56 57 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 58 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 59 60 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 61 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 62 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 63 64 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 65 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 66 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 67 68 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 69 70 static Watchdog sWatchdog; 71 72 /* This handler will be used to post message back onto the main thread */ 73 final Handler mHandler; 74 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 75 ContentResolver mResolver; 76 BatteryService mBattery; 77 PowerManagerService mPower; 78 AlarmManagerService mAlarm; 79 ActivityManagerService mActivity; 80 boolean mCompleted; 81 boolean mForceKillSystem; 82 Monitor mCurrentMonitor; 83 84 int mPhonePid; 85 86 final Calendar mCalendar = Calendar.getInstance(); 87 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 88 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 89 boolean mNeedScheduledCheck; 90 PendingIntent mCheckupIntent; 91 PendingIntent mRebootIntent; 92 93 long mBootTime; 94 int mRebootInterval; 95 96 boolean mReqRebootNoWait; // should wait for one interval before reboot? 97 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 98 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 99 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 100 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 101 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 102 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 103 104 /** 105 * Used for scheduling monitor callbacks and checking memory usage. 106 */ 107 final class HeartbeatHandler extends Handler { 108 @Override 109 public void handleMessage(Message msg) { 110 switch (msg.what) { 111 case MONITOR: { 112 // See if we should force a reboot. 113 int rebootInterval = mReqRebootInterval >= 0 114 ? mReqRebootInterval : Settings.Secure.getInt( 115 mResolver, Settings.Secure.REBOOT_INTERVAL, 116 REBOOT_DEFAULT_INTERVAL); 117 if (mRebootInterval != rebootInterval) { 118 mRebootInterval = rebootInterval; 119 // We have been running long enough that a reboot can 120 // be considered... 121 checkReboot(false); 122 } 123 124 final int size = mMonitors.size(); 125 for (int i = 0 ; i < size ; i++) { 126 mCurrentMonitor = mMonitors.get(i); 127 mCurrentMonitor.monitor(); 128 } 129 130 synchronized (Watchdog.this) { 131 mCompleted = true; 132 mCurrentMonitor = null; 133 } 134 } break; 135 } 136 } 137 } 138 139 final class RebootReceiver extends BroadcastReceiver { 140 @Override 141 public void onReceive(Context c, Intent intent) { 142 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 143 checkReboot(true); 144 } 145 } 146 147 final class RebootRequestReceiver extends BroadcastReceiver { 148 @Override 149 public void onReceive(Context c, Intent intent) { 150 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 151 mReqRebootInterval = intent.getIntExtra("interval", -1); 152 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 153 mReqRebootWindow = intent.getIntExtra("window", -1); 154 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 155 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 156 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 157 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 158 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 159 mReqRecheckInterval, mReqRebootStartTime, 160 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 161 checkReboot(true); 162 } 163 } 164 165 public interface Monitor { 166 void monitor(); 167 } 168 169 public static Watchdog getInstance() { 170 if (sWatchdog == null) { 171 sWatchdog = new Watchdog(); 172 } 173 174 return sWatchdog; 175 } 176 177 private Watchdog() { 178 super("watchdog"); 179 mHandler = new HeartbeatHandler(); 180 } 181 182 public void init(Context context, BatteryService battery, 183 PowerManagerService power, AlarmManagerService alarm, 184 ActivityManagerService activity) { 185 mResolver = context.getContentResolver(); 186 mBattery = battery; 187 mPower = power; 188 mAlarm = alarm; 189 mActivity = activity; 190 191 context.registerReceiver(new RebootReceiver(), 192 new IntentFilter(REBOOT_ACTION)); 193 mRebootIntent = PendingIntent.getBroadcast(context, 194 0, new Intent(REBOOT_ACTION), 0); 195 196 context.registerReceiver(new RebootRequestReceiver(), 197 new IntentFilter(Intent.ACTION_REBOOT), 198 android.Manifest.permission.REBOOT, null); 199 200 mBootTime = System.currentTimeMillis(); 201 } 202 203 public void processStarted(String name, int pid) { 204 synchronized (this) { 205 if ("com.android.phone".equals(name)) { 206 mPhonePid = pid; 207 } 208 } 209 } 210 211 public void addMonitor(Monitor monitor) { 212 synchronized (this) { 213 if (isAlive()) { 214 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 215 } 216 mMonitors.add(monitor); 217 } 218 } 219 220 void checkReboot(boolean fromAlarm) { 221 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 222 : Settings.Secure.getInt( 223 mResolver, Settings.Secure.REBOOT_INTERVAL, 224 REBOOT_DEFAULT_INTERVAL); 225 mRebootInterval = rebootInterval; 226 if (rebootInterval <= 0) { 227 // No reboot interval requested. 228 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 229 mAlarm.remove(mRebootIntent); 230 return; 231 } 232 233 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 234 : Settings.Secure.getLong( 235 mResolver, Settings.Secure.REBOOT_START_TIME, 236 REBOOT_DEFAULT_START_TIME); 237 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 238 : Settings.Secure.getLong( 239 mResolver, Settings.Secure.REBOOT_WINDOW, 240 REBOOT_DEFAULT_WINDOW)) * 1000; 241 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 242 : Settings.Secure.getLong( 243 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL, 244 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000; 245 246 retrieveBrutalityAmount(); 247 248 long realStartTime; 249 long now; 250 251 synchronized (this) { 252 now = System.currentTimeMillis(); 253 realStartTime = computeCalendarTime(mCalendar, now, 254 rebootStartTime); 255 256 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 257 if (DB || mReqRebootNoWait || 258 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 259 if (fromAlarm && rebootWindowMillis <= 0) { 260 // No reboot window -- just immediately reboot. 261 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 262 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 263 (int)rebootWindowMillis, ""); 264 rebootSystem("Checkin scheduled forced"); 265 return; 266 } 267 268 // Are we within the reboot window? 269 if (now < realStartTime) { 270 // Schedule alarm for next check interval. 271 realStartTime = computeCalendarTime(mCalendar, 272 now, rebootStartTime); 273 } else if (now < (realStartTime+rebootWindowMillis)) { 274 String doit = shouldWeBeBrutalLocked(now); 275 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 276 (int)rebootInterval, (int)rebootStartTime*1000, 277 (int)rebootWindowMillis, doit != null ? doit : ""); 278 if (doit == null) { 279 rebootSystem("Checked scheduled range"); 280 return; 281 } 282 283 // Schedule next alarm either within the window or in the 284 // next interval. 285 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 286 realStartTime = computeCalendarTime(mCalendar, 287 now + rebootIntervalMillis, rebootStartTime); 288 } else { 289 realStartTime = now + recheckInterval; 290 } 291 } else { 292 // Schedule alarm for next check interval. 293 realStartTime = computeCalendarTime(mCalendar, 294 now + rebootIntervalMillis, rebootStartTime); 295 } 296 } 297 } 298 299 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 300 + ((realStartTime-now)/1000/60) + "m from now"); 301 mAlarm.remove(mRebootIntent); 302 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 303 } 304 305 /** 306 * Perform a full reboot of the system. 307 */ 308 void rebootSystem(String reason) { 309 Slog.i(TAG, "Rebooting system because: " + reason); 310 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 311 pms.reboot(reason); 312 } 313 314 /** 315 * Load the current Gservices settings for when 316 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 317 * Must not be called with the lock held. 318 */ 319 void retrieveBrutalityAmount() { 320 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 321 : Settings.Secure.getInt( 322 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF, 323 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000; 324 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 325 : Settings.Secure.getInt( 326 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM, 327 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000; 328 } 329 330 /** 331 * Determine whether it is a good time to kill, crash, or otherwise 332 * plunder the current situation for the overall long-term benefit of 333 * the world. 334 * 335 * @param curTime The current system time. 336 * @return Returns null if this is a good time, else a String with the 337 * text of why it is not a good time. 338 */ 339 String shouldWeBeBrutalLocked(long curTime) { 340 if (mBattery == null || !mBattery.isPowered()) { 341 return "battery"; 342 } 343 344 if (mMinScreenOff >= 0 && (mPower == null || 345 mPower.timeSinceScreenOn() < mMinScreenOff)) { 346 return "screen"; 347 } 348 349 if (mMinAlarm >= 0 && (mAlarm == null || 350 mAlarm.timeToNextAlarm() < mMinAlarm)) { 351 return "alarm"; 352 } 353 354 return null; 355 } 356 357 static long computeCalendarTime(Calendar c, long curTime, 358 long secondsSinceMidnight) { 359 360 // start with now 361 c.setTimeInMillis(curTime); 362 363 int val = (int)secondsSinceMidnight / (60*60); 364 c.set(Calendar.HOUR_OF_DAY, val); 365 secondsSinceMidnight -= val * (60*60); 366 val = (int)secondsSinceMidnight / 60; 367 c.set(Calendar.MINUTE, val); 368 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 369 c.set(Calendar.MILLISECOND, 0); 370 371 long newTime = c.getTimeInMillis(); 372 if (newTime < curTime) { 373 // The given time (in seconds since midnight) has already passed for today, so advance 374 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 375 c.add(Calendar.DAY_OF_MONTH, 1); 376 newTime = c.getTimeInMillis(); 377 } 378 379 return newTime; 380 } 381 382 @Override 383 public void run() { 384 boolean waitedHalf = false; 385 while (true) { 386 mCompleted = false; 387 mHandler.sendEmptyMessage(MONITOR); 388 389 synchronized (this) { 390 long timeout = TIME_TO_WAIT; 391 392 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 393 // wait while asleep. If the device is asleep then the thing that we are waiting 394 // to timeout on is asleep as well and won't have a chance to run, causing a false 395 // positive on when to kill things. 396 long start = SystemClock.uptimeMillis(); 397 while (timeout > 0 && !mForceKillSystem) { 398 try { 399 wait(timeout); // notifyAll() is called when mForceKillSystem is set 400 } catch (InterruptedException e) { 401 Log.wtf(TAG, e); 402 } 403 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 404 } 405 406 if (mCompleted && !mForceKillSystem) { 407 // The monitors have returned. 408 waitedHalf = false; 409 continue; 410 } 411 412 if (!waitedHalf) { 413 // We've waited half the deadlock-detection interval. Pull a stack 414 // trace and wait another half. 415 ArrayList<Integer> pids = new ArrayList<Integer>(); 416 pids.add(Process.myPid()); 417 ActivityManagerService.dumpStackTraces(true, pids, null, null); 418 waitedHalf = true; 419 continue; 420 } 421 } 422 423 // If we got here, that means that the system is most likely hung. 424 // First collect stack traces from all threads of the system process. 425 // Then kill this process so that the system will restart. 426 427 final String name = (mCurrentMonitor != null) ? 428 mCurrentMonitor.getClass().getName() : "null"; 429 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 430 431 ArrayList<Integer> pids = new ArrayList<Integer>(); 432 pids.add(Process.myPid()); 433 if (mPhonePid > 0) pids.add(mPhonePid); 434 // Pass !waitedHalf so that just in case we somehow wind up here without having 435 // dumped the halfway stacks, we properly re-initialize the trace file. 436 final File stack = ActivityManagerService.dumpStackTraces( 437 !waitedHalf, pids, null, null); 438 439 // Give some extra time to make sure the stack traces get written. 440 // The system's been hanging for a minute, another second or two won't hurt much. 441 SystemClock.sleep(2000); 442 443 // Pull our own kernel thread stacks as well if we're configured for that 444 if (RECORD_KERNEL_THREADS) { 445 dumpKernelStackTraces(); 446 } 447 448 // Try to add the error to the dropbox, but assuming that the ActivityManager 449 // itself may be deadlocked. (which has happened, causing this statement to 450 // deadlock and the watchdog as a whole to be ineffective) 451 Thread dropboxThread = new Thread("watchdogWriteToDropbox") { 452 public void run() { 453 mActivity.addErrorToDropBox( 454 "watchdog", null, "system_server", null, null, 455 name, null, stack, null); 456 } 457 }; 458 dropboxThread.start(); 459 try { 460 dropboxThread.join(2000); // wait up to 2 seconds for it to return. 461 } catch (InterruptedException ignored) {} 462 463 // Only kill the process if the debugger is not attached. 464 if (!Debug.isDebuggerConnected()) { 465 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 466 Process.killProcess(Process.myPid()); 467 System.exit(10); 468 } else { 469 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 470 } 471 472 waitedHalf = false; 473 } 474 } 475 476 private File dumpKernelStackTraces() { 477 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 478 if (tracesPath == null || tracesPath.length() == 0) { 479 return null; 480 } 481 482 native_dumpKernelStacks(tracesPath); 483 return new File(tracesPath); 484 } 485 486 private native void native_dumpKernelStacks(String tracesPath); 487 } 488