1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import com.android.server.am.ActivityManagerService; 20 21 import android.app.AlarmManager; 22 import android.app.PendingIntent; 23 import android.content.BroadcastReceiver; 24 import android.content.ContentResolver; 25 import android.content.Context; 26 import android.content.Intent; 27 import android.content.IntentFilter; 28 import android.os.Debug; 29 import android.os.Handler; 30 import android.os.Message; 31 import android.os.Process; 32 import android.os.ServiceManager; 33 import android.os.SystemClock; 34 import android.os.SystemProperties; 35 import android.provider.Settings; 36 import android.util.Config; 37 import android.util.EventLog; 38 import android.util.Log; 39 import android.util.Slog; 40 41 import java.io.File; 42 import java.util.ArrayList; 43 import java.util.Calendar; 44 45 /** This class calls its monitor every minute. Killing this process if they don't return **/ 46 public class Watchdog extends Thread { 47 static final String TAG = "Watchdog"; 48 static final boolean localLOGV = false || Config.LOGV; 49 50 // Set this to true to use debug default values. 51 static final boolean DB = false; 52 53 // Set this to true to have the watchdog record kernel thread stacks when it fires 54 static final boolean RECORD_KERNEL_THREADS = true; 55 56 static final int MONITOR = 2718; 57 58 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 59 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 60 61 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 62 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 63 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 64 65 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 66 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 67 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 68 69 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 70 71 static Watchdog sWatchdog; 72 73 /* This handler will be used to post message back onto the main thread */ 74 final Handler mHandler; 75 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 76 ContentResolver mResolver; 77 BatteryService mBattery; 78 PowerManagerService mPower; 79 AlarmManagerService mAlarm; 80 ActivityManagerService mActivity; 81 boolean mCompleted; 82 boolean mForceKillSystem; 83 Monitor mCurrentMonitor; 84 85 int mPhonePid; 86 87 final Calendar mCalendar = Calendar.getInstance(); 88 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 89 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 90 boolean mNeedScheduledCheck; 91 PendingIntent mCheckupIntent; 92 PendingIntent mRebootIntent; 93 94 long mBootTime; 95 int mRebootInterval; 96 97 boolean mReqRebootNoWait; // should wait for one interval before reboot? 98 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 99 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 100 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 101 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 102 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 103 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 104 105 /** 106 * Used for scheduling monitor callbacks and checking memory usage. 107 */ 108 final class HeartbeatHandler extends Handler { 109 @Override 110 public void handleMessage(Message msg) { 111 switch (msg.what) { 112 case MONITOR: { 113 // See if we should force a reboot. 114 int rebootInterval = mReqRebootInterval >= 0 115 ? mReqRebootInterval : Settings.Secure.getInt( 116 mResolver, Settings.Secure.REBOOT_INTERVAL, 117 REBOOT_DEFAULT_INTERVAL); 118 if (mRebootInterval != rebootInterval) { 119 mRebootInterval = rebootInterval; 120 // We have been running long enough that a reboot can 121 // be considered... 122 checkReboot(false); 123 } 124 125 final int size = mMonitors.size(); 126 for (int i = 0 ; i < size ; i++) { 127 mCurrentMonitor = mMonitors.get(i); 128 mCurrentMonitor.monitor(); 129 } 130 131 synchronized (Watchdog.this) { 132 mCompleted = true; 133 mCurrentMonitor = null; 134 } 135 } break; 136 } 137 } 138 } 139 140 final class RebootReceiver extends BroadcastReceiver { 141 @Override 142 public void onReceive(Context c, Intent intent) { 143 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 144 checkReboot(true); 145 } 146 } 147 148 final class RebootRequestReceiver extends BroadcastReceiver { 149 @Override 150 public void onReceive(Context c, Intent intent) { 151 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 152 mReqRebootInterval = intent.getIntExtra("interval", -1); 153 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 154 mReqRebootWindow = intent.getIntExtra("window", -1); 155 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 156 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 157 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 158 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 159 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 160 mReqRecheckInterval, mReqRebootStartTime, 161 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 162 checkReboot(true); 163 } 164 } 165 166 public interface Monitor { 167 void monitor(); 168 } 169 170 public static Watchdog getInstance() { 171 if (sWatchdog == null) { 172 sWatchdog = new Watchdog(); 173 } 174 175 return sWatchdog; 176 } 177 178 private Watchdog() { 179 super("watchdog"); 180 mHandler = new HeartbeatHandler(); 181 } 182 183 public void init(Context context, BatteryService battery, 184 PowerManagerService power, AlarmManagerService alarm, 185 ActivityManagerService activity) { 186 mResolver = context.getContentResolver(); 187 mBattery = battery; 188 mPower = power; 189 mAlarm = alarm; 190 mActivity = activity; 191 192 context.registerReceiver(new RebootReceiver(), 193 new IntentFilter(REBOOT_ACTION)); 194 mRebootIntent = PendingIntent.getBroadcast(context, 195 0, new Intent(REBOOT_ACTION), 0); 196 197 context.registerReceiver(new RebootRequestReceiver(), 198 new IntentFilter(Intent.ACTION_REBOOT), 199 android.Manifest.permission.REBOOT, null); 200 201 mBootTime = System.currentTimeMillis(); 202 } 203 204 public void processStarted(String name, int pid) { 205 synchronized (this) { 206 if ("com.android.phone".equals(name)) { 207 mPhonePid = pid; 208 } 209 } 210 } 211 212 public void addMonitor(Monitor monitor) { 213 synchronized (this) { 214 if (isAlive()) { 215 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 216 } 217 mMonitors.add(monitor); 218 } 219 } 220 221 void checkReboot(boolean fromAlarm) { 222 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 223 : Settings.Secure.getInt( 224 mResolver, Settings.Secure.REBOOT_INTERVAL, 225 REBOOT_DEFAULT_INTERVAL); 226 mRebootInterval = rebootInterval; 227 if (rebootInterval <= 0) { 228 // No reboot interval requested. 229 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 230 mAlarm.remove(mRebootIntent); 231 return; 232 } 233 234 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 235 : Settings.Secure.getLong( 236 mResolver, Settings.Secure.REBOOT_START_TIME, 237 REBOOT_DEFAULT_START_TIME); 238 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 239 : Settings.Secure.getLong( 240 mResolver, Settings.Secure.REBOOT_WINDOW, 241 REBOOT_DEFAULT_WINDOW)) * 1000; 242 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 243 : Settings.Secure.getLong( 244 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL, 245 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000; 246 247 retrieveBrutalityAmount(); 248 249 long realStartTime; 250 long now; 251 252 synchronized (this) { 253 now = System.currentTimeMillis(); 254 realStartTime = computeCalendarTime(mCalendar, now, 255 rebootStartTime); 256 257 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 258 if (DB || mReqRebootNoWait || 259 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 260 if (fromAlarm && rebootWindowMillis <= 0) { 261 // No reboot window -- just immediately reboot. 262 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 263 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 264 (int)rebootWindowMillis, ""); 265 rebootSystem("Checkin scheduled forced"); 266 return; 267 } 268 269 // Are we within the reboot window? 270 if (now < realStartTime) { 271 // Schedule alarm for next check interval. 272 realStartTime = computeCalendarTime(mCalendar, 273 now, rebootStartTime); 274 } else if (now < (realStartTime+rebootWindowMillis)) { 275 String doit = shouldWeBeBrutalLocked(now); 276 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 277 (int)rebootInterval, (int)rebootStartTime*1000, 278 (int)rebootWindowMillis, doit != null ? doit : ""); 279 if (doit == null) { 280 rebootSystem("Checked scheduled range"); 281 return; 282 } 283 284 // Schedule next alarm either within the window or in the 285 // next interval. 286 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 287 realStartTime = computeCalendarTime(mCalendar, 288 now + rebootIntervalMillis, rebootStartTime); 289 } else { 290 realStartTime = now + recheckInterval; 291 } 292 } else { 293 // Schedule alarm for next check interval. 294 realStartTime = computeCalendarTime(mCalendar, 295 now + rebootIntervalMillis, rebootStartTime); 296 } 297 } 298 } 299 300 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 301 + ((realStartTime-now)/1000/60) + "m from now"); 302 mAlarm.remove(mRebootIntent); 303 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 304 } 305 306 /** 307 * Perform a full reboot of the system. 308 */ 309 void rebootSystem(String reason) { 310 Slog.i(TAG, "Rebooting system because: " + reason); 311 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 312 pms.reboot(reason); 313 } 314 315 /** 316 * Load the current Gservices settings for when 317 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 318 * Must not be called with the lock held. 319 */ 320 void retrieveBrutalityAmount() { 321 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 322 : Settings.Secure.getInt( 323 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF, 324 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000; 325 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 326 : Settings.Secure.getInt( 327 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM, 328 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000; 329 } 330 331 /** 332 * Determine whether it is a good time to kill, crash, or otherwise 333 * plunder the current situation for the overall long-term benefit of 334 * the world. 335 * 336 * @param curTime The current system time. 337 * @return Returns null if this is a good time, else a String with the 338 * text of why it is not a good time. 339 */ 340 String shouldWeBeBrutalLocked(long curTime) { 341 if (mBattery == null || !mBattery.isPowered()) { 342 return "battery"; 343 } 344 345 if (mMinScreenOff >= 0 && (mPower == null || 346 mPower.timeSinceScreenOn() < mMinScreenOff)) { 347 return "screen"; 348 } 349 350 if (mMinAlarm >= 0 && (mAlarm == null || 351 mAlarm.timeToNextAlarm() < mMinAlarm)) { 352 return "alarm"; 353 } 354 355 return null; 356 } 357 358 static long computeCalendarTime(Calendar c, long curTime, 359 long secondsSinceMidnight) { 360 361 // start with now 362 c.setTimeInMillis(curTime); 363 364 int val = (int)secondsSinceMidnight / (60*60); 365 c.set(Calendar.HOUR_OF_DAY, val); 366 secondsSinceMidnight -= val * (60*60); 367 val = (int)secondsSinceMidnight / 60; 368 c.set(Calendar.MINUTE, val); 369 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 370 c.set(Calendar.MILLISECOND, 0); 371 372 long newTime = c.getTimeInMillis(); 373 if (newTime < curTime) { 374 // The given time (in seconds since midnight) has already passed for today, so advance 375 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 376 c.add(Calendar.DAY_OF_MONTH, 1); 377 newTime = c.getTimeInMillis(); 378 } 379 380 return newTime; 381 } 382 383 @Override 384 public void run() { 385 boolean waitedHalf = false; 386 while (true) { 387 mCompleted = false; 388 mHandler.sendEmptyMessage(MONITOR); 389 390 synchronized (this) { 391 long timeout = TIME_TO_WAIT; 392 393 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 394 // wait while asleep. If the device is asleep then the thing that we are waiting 395 // to timeout on is asleep as well and won't have a chance to run, causing a false 396 // positive on when to kill things. 397 long start = SystemClock.uptimeMillis(); 398 while (timeout > 0 && !mForceKillSystem) { 399 try { 400 wait(timeout); // notifyAll() is called when mForceKillSystem is set 401 } catch (InterruptedException e) { 402 Log.wtf(TAG, e); 403 } 404 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 405 } 406 407 if (mCompleted && !mForceKillSystem) { 408 // The monitors have returned. 409 waitedHalf = false; 410 continue; 411 } 412 413 if (!waitedHalf) { 414 // We've waited half the deadlock-detection interval. Pull a stack 415 // trace and wait another half. 416 ArrayList<Integer> pids = new ArrayList<Integer>(); 417 pids.add(Process.myPid()); 418 ActivityManagerService.dumpStackTraces(true, pids, null, null); 419 waitedHalf = true; 420 continue; 421 } 422 } 423 424 // If we got here, that means that the system is most likely hung. 425 // First collect stack traces from all threads of the system process. 426 // Then kill this process so that the system will restart. 427 428 String name = (mCurrentMonitor != null) ? 429 mCurrentMonitor.getClass().getName() : "null"; 430 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 431 432 ArrayList<Integer> pids = new ArrayList<Integer>(); 433 pids.add(Process.myPid()); 434 if (mPhonePid > 0) pids.add(mPhonePid); 435 // Pass !waitedHalf so that just in case we somehow wind up here without having 436 // dumped the halfway stacks, we properly re-initialize the trace file. 437 File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids, null, null); 438 439 // Give some extra time to make sure the stack traces get written. 440 // The system's been hanging for a minute, another second or two won't hurt much. 441 SystemClock.sleep(2000); 442 443 // Pull our own kernel thread stacks as well if we're configured for that 444 if (RECORD_KERNEL_THREADS) { 445 dumpKernelStackTraces(); 446 } 447 448 mActivity.addErrorToDropBox("watchdog", null, null, null, name, null, stack, null); 449 450 // Only kill the process if the debugger is not attached. 451 if (!Debug.isDebuggerConnected()) { 452 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 453 Process.killProcess(Process.myPid()); 454 System.exit(10); 455 } else { 456 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 457 } 458 459 waitedHalf = false; 460 } 461 } 462 463 private File dumpKernelStackTraces() { 464 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 465 if (tracesPath == null || tracesPath.length() == 0) { 466 return null; 467 } 468 469 native_dumpKernelStacks(tracesPath); 470 return new File(tracesPath); 471 } 472 473 private native void native_dumpKernelStacks(String tracesPath); 474 } 475