1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import com.android.server.am.ActivityManagerService; 20 21 import android.app.AlarmManager; 22 import android.app.PendingIntent; 23 import android.content.BroadcastReceiver; 24 import android.content.ContentResolver; 25 import android.content.Context; 26 import android.content.Intent; 27 import android.content.IntentFilter; 28 import android.os.Debug; 29 import android.os.Handler; 30 import android.os.Message; 31 import android.os.Process; 32 import android.os.ServiceManager; 33 import android.os.SystemClock; 34 import android.os.SystemProperties; 35 import android.provider.Settings; 36 import android.util.Config; 37 import android.util.EventLog; 38 import android.util.Log; 39 import android.util.Slog; 40 41 import java.io.File; 42 import java.io.FileInputStream; 43 import java.io.FileOutputStream; 44 import java.io.IOException; 45 import java.util.ArrayList; 46 import java.util.Calendar; 47 48 /** This class calls its monitor every minute. Killing this process if they don't return **/ 49 public class Watchdog extends Thread { 50 static final String TAG = "Watchdog"; 51 static final boolean localLOGV = false || Config.LOGV; 52 53 // Set this to true to use debug default values. 54 static final boolean DB = false; 55 56 // Set this to true to have the watchdog record kernel thread stacks when it fires 57 static final boolean RECORD_KERNEL_THREADS = true; 58 59 static final int MONITOR = 2718; 60 static final int GLOBAL_PSS = 2719; 61 62 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 63 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 64 65 static final int MEMCHECK_DEFAULT_INTERVAL = DB ? 30 : 30*60; // 30 minutes 66 static final int MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL = DB ? 60 : 2*60*60; // 2 hours 67 static final int MEMCHECK_DEFAULT_SYSTEM_SOFT_THRESHOLD = (DB ? 10:16)*1024*1024; // 16MB 68 static final int MEMCHECK_DEFAULT_SYSTEM_HARD_THRESHOLD = (DB ? 14:20)*1024*1024; // 20MB 69 static final int MEMCHECK_DEFAULT_PHONE_SOFT_THRESHOLD = (DB ? 4:8)*1024*1024; // 8MB 70 static final int MEMCHECK_DEFAULT_PHONE_HARD_THRESHOLD = (DB ? 8:12)*1024*1024; // 12MB 71 72 static final int MEMCHECK_DEFAULT_EXEC_START_TIME = 1*60*60; // 1:00am 73 static final int MEMCHECK_DEFAULT_EXEC_END_TIME = 5*60*60; // 5:00am 74 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 75 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 76 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 77 78 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 79 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 80 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 81 82 static final String CHECKUP_ACTION = "com.android.service.Watchdog.CHECKUP"; 83 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 84 85 static Watchdog sWatchdog; 86 87 /* This handler will be used to post message back onto the main thread */ 88 final Handler mHandler; 89 final Runnable mGlobalPssCollected; 90 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 91 ContentResolver mResolver; 92 BatteryService mBattery; 93 PowerManagerService mPower; 94 AlarmManagerService mAlarm; 95 ActivityManagerService mActivity; 96 boolean mCompleted; 97 boolean mForceKillSystem; 98 Monitor mCurrentMonitor; 99 100 PssRequestor mPhoneReq; 101 int mPhonePid; 102 int mPhonePss; 103 104 long mLastMemCheckTime = -(MEMCHECK_DEFAULT_INTERVAL*1000); 105 boolean mHavePss; 106 long mLastMemCheckRealtime = -(MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL*1000); 107 boolean mHaveGlobalPss; 108 final MemMonitor mSystemMemMonitor = new MemMonitor("system", 109 Settings.Secure.MEMCHECK_SYSTEM_ENABLED, 110 Settings.Secure.MEMCHECK_SYSTEM_SOFT_THRESHOLD, 111 MEMCHECK_DEFAULT_SYSTEM_SOFT_THRESHOLD, 112 Settings.Secure.MEMCHECK_SYSTEM_HARD_THRESHOLD, 113 MEMCHECK_DEFAULT_SYSTEM_HARD_THRESHOLD); 114 final MemMonitor mPhoneMemMonitor = new MemMonitor("com.android.phone", 115 Settings.Secure.MEMCHECK_PHONE_ENABLED, 116 Settings.Secure.MEMCHECK_PHONE_SOFT_THRESHOLD, 117 MEMCHECK_DEFAULT_PHONE_SOFT_THRESHOLD, 118 Settings.Secure.MEMCHECK_PHONE_HARD_THRESHOLD, 119 MEMCHECK_DEFAULT_PHONE_HARD_THRESHOLD); 120 121 final Calendar mCalendar = Calendar.getInstance(); 122 long mMemcheckLastTime; 123 long mMemcheckExecStartTime; 124 long mMemcheckExecEndTime; 125 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 126 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 127 boolean mNeedScheduledCheck; 128 PendingIntent mCheckupIntent; 129 PendingIntent mRebootIntent; 130 131 long mBootTime; 132 int mRebootInterval; 133 134 boolean mReqRebootNoWait; // should wait for one interval before reboot? 135 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 136 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 137 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 138 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 139 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 140 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 141 142 /** 143 * This class monitors the memory in a particular process. 144 */ 145 final class MemMonitor { 146 final String mProcessName; 147 final String mEnabledSetting; 148 final String mSoftSetting; 149 final String mHardSetting; 150 151 int mSoftThreshold; 152 int mHardThreshold; 153 boolean mEnabled; 154 long mLastPss; 155 156 static final int STATE_OK = 0; 157 static final int STATE_SOFT = 1; 158 static final int STATE_HARD = 2; 159 int mState; 160 161 MemMonitor(String processName, String enabledSetting, 162 String softSetting, int defSoftThreshold, 163 String hardSetting, int defHardThreshold) { 164 mProcessName = processName; 165 mEnabledSetting = enabledSetting; 166 mSoftSetting = softSetting; 167 mHardSetting = hardSetting; 168 mSoftThreshold = defSoftThreshold; 169 mHardThreshold = defHardThreshold; 170 } 171 172 void retrieveSettings(ContentResolver resolver) { 173 mSoftThreshold = Settings.Secure.getInt( 174 resolver, mSoftSetting, mSoftThreshold); 175 mHardThreshold = Settings.Secure.getInt( 176 resolver, mHardSetting, mHardThreshold); 177 mEnabled = Settings.Secure.getInt( 178 resolver, mEnabledSetting, 0) != 0; 179 } 180 181 boolean checkLocked(long curTime, int pid, int pss) { 182 mLastPss = pss; 183 if (mLastPss < mSoftThreshold) { 184 mState = STATE_OK; 185 } else if (mLastPss < mHardThreshold) { 186 mState = STATE_SOFT; 187 } else { 188 mState = STATE_HARD; 189 } 190 EventLog.writeEvent(EventLogTags.WATCHDOG_PROC_PSS, mProcessName, pid, mLastPss); 191 192 if (mState == STATE_OK) { 193 // Memory is good, don't recover. 194 return false; 195 } 196 197 if (mState == STATE_HARD) { 198 // Memory is really bad, kill right now. 199 EventLog.writeEvent(EventLogTags.WATCHDOG_HARD_RESET, mProcessName, pid, 200 mHardThreshold, mLastPss); 201 return mEnabled; 202 } 203 204 // It is time to schedule a reset... 205 // Check if we are currently within the time to kill processes due 206 // to memory use. 207 computeMemcheckTimesLocked(curTime); 208 String skipReason = null; 209 if (curTime < mMemcheckExecStartTime || curTime > mMemcheckExecEndTime) { 210 skipReason = "time"; 211 } else { 212 skipReason = shouldWeBeBrutalLocked(curTime); 213 } 214 EventLog.writeEvent(EventLogTags.WATCHDOG_SOFT_RESET, mProcessName, pid, 215 mSoftThreshold, mLastPss, skipReason != null ? skipReason : ""); 216 if (skipReason != null) { 217 mNeedScheduledCheck = true; 218 return false; 219 } 220 return mEnabled; 221 } 222 223 void clear() { 224 mLastPss = 0; 225 mState = STATE_OK; 226 } 227 } 228 229 /** 230 * Used for scheduling monitor callbacks and checking memory usage. 231 */ 232 final class HeartbeatHandler extends Handler { 233 @Override 234 public void handleMessage(Message msg) { 235 switch (msg.what) { 236 case GLOBAL_PSS: { 237 if (mHaveGlobalPss) { 238 // During the last pass we collected pss information, so 239 // now it is time to report it. 240 mHaveGlobalPss = false; 241 if (localLOGV) Slog.v(TAG, "Received global pss, logging."); 242 logGlobalMemory(); 243 } 244 } break; 245 246 case MONITOR: { 247 if (mHavePss) { 248 // During the last pass we collected pss information, so 249 // now it is time to report it. 250 mHavePss = false; 251 if (localLOGV) Slog.v(TAG, "Have pss, checking memory."); 252 checkMemory(); 253 } 254 255 if (mHaveGlobalPss) { 256 // During the last pass we collected pss information, so 257 // now it is time to report it. 258 mHaveGlobalPss = false; 259 if (localLOGV) Slog.v(TAG, "Have global pss, logging."); 260 logGlobalMemory(); 261 } 262 263 long now = SystemClock.uptimeMillis(); 264 265 // See if we should force a reboot. 266 int rebootInterval = mReqRebootInterval >= 0 267 ? mReqRebootInterval : Settings.Secure.getInt( 268 mResolver, Settings.Secure.REBOOT_INTERVAL, 269 REBOOT_DEFAULT_INTERVAL); 270 if (mRebootInterval != rebootInterval) { 271 mRebootInterval = rebootInterval; 272 // We have been running long enough that a reboot can 273 // be considered... 274 checkReboot(false); 275 } 276 277 // See if we should check memory conditions. 278 long memCheckInterval = Settings.Secure.getLong( 279 mResolver, Settings.Secure.MEMCHECK_INTERVAL, 280 MEMCHECK_DEFAULT_INTERVAL) * 1000; 281 if ((mLastMemCheckTime+memCheckInterval) < now) { 282 // It is now time to collect pss information. This 283 // is async so we won't report it now. And to keep 284 // things simple, we will assume that everyone has 285 // reported back by the next MONITOR message. 286 mLastMemCheckTime = now; 287 if (localLOGV) Slog.v(TAG, "Collecting memory usage."); 288 collectMemory(); 289 mHavePss = true; 290 291 long memCheckRealtimeInterval = Settings.Secure.getLong( 292 mResolver, Settings.Secure.MEMCHECK_LOG_REALTIME_INTERVAL, 293 MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL) * 1000; 294 long realtimeNow = SystemClock.elapsedRealtime(); 295 if ((mLastMemCheckRealtime+memCheckRealtimeInterval) < realtimeNow) { 296 mLastMemCheckRealtime = realtimeNow; 297 if (localLOGV) Slog.v(TAG, "Collecting global memory usage."); 298 collectGlobalMemory(); 299 mHaveGlobalPss = true; 300 } 301 } 302 303 final int size = mMonitors.size(); 304 for (int i = 0 ; i < size ; i++) { 305 mCurrentMonitor = mMonitors.get(i); 306 mCurrentMonitor.monitor(); 307 } 308 309 synchronized (Watchdog.this) { 310 mCompleted = true; 311 mCurrentMonitor = null; 312 } 313 } break; 314 } 315 } 316 } 317 318 final class GlobalPssCollected implements Runnable { 319 public void run() { 320 mHandler.sendEmptyMessage(GLOBAL_PSS); 321 } 322 } 323 324 final class CheckupReceiver extends BroadcastReceiver { 325 @Override 326 public void onReceive(Context c, Intent intent) { 327 if (localLOGV) Slog.v(TAG, "Alarm went off, checking memory."); 328 checkMemory(); 329 } 330 } 331 332 final class RebootReceiver extends BroadcastReceiver { 333 @Override 334 public void onReceive(Context c, Intent intent) { 335 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 336 checkReboot(true); 337 } 338 } 339 340 final class RebootRequestReceiver extends BroadcastReceiver { 341 @Override 342 public void onReceive(Context c, Intent intent) { 343 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 344 mReqRebootInterval = intent.getIntExtra("interval", -1); 345 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 346 mReqRebootWindow = intent.getIntExtra("window", -1); 347 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 348 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 349 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 350 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 351 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 352 mReqRecheckInterval, mReqRebootStartTime, 353 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 354 checkReboot(true); 355 } 356 } 357 358 public interface Monitor { 359 void monitor(); 360 } 361 362 public interface PssRequestor { 363 void requestPss(); 364 } 365 366 public class PssStats { 367 public int mEmptyPss; 368 public int mEmptyCount; 369 public int mBackgroundPss; 370 public int mBackgroundCount; 371 public int mServicePss; 372 public int mServiceCount; 373 public int mVisiblePss; 374 public int mVisibleCount; 375 public int mForegroundPss; 376 public int mForegroundCount; 377 378 public int mNoPssCount; 379 380 public int mProcDeaths[] = new int[10]; 381 } 382 383 public static Watchdog getInstance() { 384 if (sWatchdog == null) { 385 sWatchdog = new Watchdog(); 386 } 387 388 return sWatchdog; 389 } 390 391 private Watchdog() { 392 super("watchdog"); 393 mHandler = new HeartbeatHandler(); 394 mGlobalPssCollected = new GlobalPssCollected(); 395 } 396 397 public void init(Context context, BatteryService battery, 398 PowerManagerService power, AlarmManagerService alarm, 399 ActivityManagerService activity) { 400 mResolver = context.getContentResolver(); 401 mBattery = battery; 402 mPower = power; 403 mAlarm = alarm; 404 mActivity = activity; 405 406 context.registerReceiver(new CheckupReceiver(), 407 new IntentFilter(CHECKUP_ACTION)); 408 mCheckupIntent = PendingIntent.getBroadcast(context, 409 0, new Intent(CHECKUP_ACTION), 0); 410 411 context.registerReceiver(new RebootReceiver(), 412 new IntentFilter(REBOOT_ACTION)); 413 mRebootIntent = PendingIntent.getBroadcast(context, 414 0, new Intent(REBOOT_ACTION), 0); 415 416 context.registerReceiver(new RebootRequestReceiver(), 417 new IntentFilter(Intent.ACTION_REBOOT), 418 android.Manifest.permission.REBOOT, null); 419 420 mBootTime = System.currentTimeMillis(); 421 } 422 423 public void processStarted(PssRequestor req, String name, int pid) { 424 synchronized (this) { 425 if ("com.android.phone".equals(name)) { 426 mPhoneReq = req; 427 mPhonePid = pid; 428 mPhonePss = 0; 429 } 430 } 431 } 432 433 public void reportPss(PssRequestor req, String name, int pss) { 434 synchronized (this) { 435 if (mPhoneReq == req) { 436 mPhonePss = pss; 437 } 438 } 439 } 440 441 public void addMonitor(Monitor monitor) { 442 synchronized (this) { 443 if (isAlive()) { 444 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 445 } 446 mMonitors.add(monitor); 447 } 448 } 449 450 /** 451 * Retrieve memory usage information from specific processes being 452 * monitored. This is an async operation, so must be done before doing 453 * memory checks. 454 */ 455 void collectMemory() { 456 synchronized (this) { 457 if (mPhoneReq != null) { 458 mPhoneReq.requestPss(); 459 } 460 } 461 } 462 463 /** 464 * Retrieve memory usage over all application processes. This is an 465 * async operation, so must be done before doing memory checks. 466 */ 467 void collectGlobalMemory() { 468 mActivity.requestPss(mGlobalPssCollected); 469 } 470 471 /** 472 * Check memory usage in the system, scheduling kills/reboots as needed. 473 * This always runs on the mHandler thread. 474 */ 475 void checkMemory() { 476 boolean needScheduledCheck; 477 long curTime; 478 long nextTime = 0; 479 480 long recheckInterval = Settings.Secure.getLong( 481 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL, 482 MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000; 483 484 mSystemMemMonitor.retrieveSettings(mResolver); 485 mPhoneMemMonitor.retrieveSettings(mResolver); 486 retrieveBrutalityAmount(); 487 488 synchronized (this) { 489 curTime = System.currentTimeMillis(); 490 mNeedScheduledCheck = false; 491 492 // How is the system doing? 493 if (mSystemMemMonitor.checkLocked(curTime, Process.myPid(), 494 (int)Process.getPss(Process.myPid()))) { 495 // Not good! Time to suicide. 496 mForceKillSystem = true; 497 notifyAll(); 498 return; 499 } 500 501 // How is the phone process doing? 502 if (mPhoneReq != null) { 503 if (mPhoneMemMonitor.checkLocked(curTime, mPhonePid, 504 mPhonePss)) { 505 // Just kill the phone process and let it restart. 506 Slog.i(TAG, "Watchdog is killing the phone process"); 507 Process.killProcess(mPhonePid); 508 } 509 } else { 510 mPhoneMemMonitor.clear(); 511 } 512 513 needScheduledCheck = mNeedScheduledCheck; 514 if (needScheduledCheck) { 515 // Something is going bad, but now is not a good time to 516 // tear things down... schedule an alarm to check again soon. 517 nextTime = curTime + recheckInterval; 518 if (nextTime < mMemcheckExecStartTime) { 519 nextTime = mMemcheckExecStartTime; 520 } else if (nextTime >= mMemcheckExecEndTime){ 521 // Need to check during next exec time... so that needs 522 // to be computed. 523 if (localLOGV) Slog.v(TAG, "Computing next time range"); 524 computeMemcheckTimesLocked(nextTime); 525 nextTime = mMemcheckExecStartTime; 526 } 527 528 if (localLOGV) { 529 mCalendar.setTimeInMillis(nextTime); 530 Slog.v(TAG, "Next Alarm Time: " + mCalendar); 531 } 532 } 533 } 534 535 if (needScheduledCheck) { 536 if (localLOGV) Slog.v(TAG, "Scheduling next memcheck alarm for " 537 + ((nextTime-curTime)/1000/60) + "m from now"); 538 mAlarm.remove(mCheckupIntent); 539 mAlarm.set(AlarmManager.RTC_WAKEUP, nextTime, mCheckupIntent); 540 } else { 541 if (localLOGV) Slog.v(TAG, "No need to schedule a memcheck alarm!"); 542 mAlarm.remove(mCheckupIntent); 543 } 544 } 545 546 final PssStats mPssStats = new PssStats(); 547 final String[] mMemInfoFields = new String[] { 548 "MemFree:", "Buffers:", "Cached:", 549 "Active:", "Inactive:", 550 "AnonPages:", "Mapped:", "Slab:", 551 "SReclaimable:", "SUnreclaim:", "PageTables:" }; 552 final long[] mMemInfoSizes = new long[mMemInfoFields.length]; 553 final String[] mVMStatFields = new String[] { 554 "pgfree ", "pgactivate ", "pgdeactivate ", 555 "pgfault ", "pgmajfault " }; 556 final long[] mVMStatSizes = new long[mVMStatFields.length]; 557 final long[] mPrevVMStatSizes = new long[mVMStatFields.length]; 558 long mLastLogGlobalMemoryTime; 559 560 void logGlobalMemory() { 561 PssStats stats = mPssStats; 562 mActivity.collectPss(stats); 563 EventLog.writeEvent(EventLogTags.WATCHDOG_PSS_STATS, 564 stats.mEmptyPss, stats.mEmptyCount, 565 stats.mBackgroundPss, stats.mBackgroundCount, 566 stats.mServicePss, stats.mServiceCount, 567 stats.mVisiblePss, stats.mVisibleCount, 568 stats.mForegroundPss, stats.mForegroundCount, 569 stats.mNoPssCount); 570 EventLog.writeEvent(EventLogTags.WATCHDOG_PROC_STATS, 571 stats.mProcDeaths[0], stats.mProcDeaths[1], stats.mProcDeaths[2], 572 stats.mProcDeaths[3], stats.mProcDeaths[4]); 573 Process.readProcLines("/proc/meminfo", mMemInfoFields, mMemInfoSizes); 574 for (int i=0; i<mMemInfoSizes.length; i++) { 575 mMemInfoSizes[i] *= 1024; 576 } 577 EventLog.writeEvent(EventLogTags.WATCHDOG_MEMINFO, 578 (int)mMemInfoSizes[0], (int)mMemInfoSizes[1], (int)mMemInfoSizes[2], 579 (int)mMemInfoSizes[3], (int)mMemInfoSizes[4], 580 (int)mMemInfoSizes[5], (int)mMemInfoSizes[6], (int)mMemInfoSizes[7], 581 (int)mMemInfoSizes[8], (int)mMemInfoSizes[9], (int)mMemInfoSizes[10]); 582 long now = SystemClock.uptimeMillis(); 583 long dur = now - mLastLogGlobalMemoryTime; 584 mLastLogGlobalMemoryTime = now; 585 Process.readProcLines("/proc/vmstat", mVMStatFields, mVMStatSizes); 586 for (int i=0; i<mVMStatSizes.length; i++) { 587 long v = mVMStatSizes[i]; 588 mVMStatSizes[i] -= mPrevVMStatSizes[i]; 589 mPrevVMStatSizes[i] = v; 590 } 591 EventLog.writeEvent(EventLogTags.WATCHDOG_VMSTAT, dur, 592 (int)mVMStatSizes[0], (int)mVMStatSizes[1], (int)mVMStatSizes[2], 593 (int)mVMStatSizes[3], (int)mVMStatSizes[4]); 594 } 595 596 void checkReboot(boolean fromAlarm) { 597 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 598 : Settings.Secure.getInt( 599 mResolver, Settings.Secure.REBOOT_INTERVAL, 600 REBOOT_DEFAULT_INTERVAL); 601 mRebootInterval = rebootInterval; 602 if (rebootInterval <= 0) { 603 // No reboot interval requested. 604 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 605 mAlarm.remove(mRebootIntent); 606 return; 607 } 608 609 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 610 : Settings.Secure.getLong( 611 mResolver, Settings.Secure.REBOOT_START_TIME, 612 REBOOT_DEFAULT_START_TIME); 613 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 614 : Settings.Secure.getLong( 615 mResolver, Settings.Secure.REBOOT_WINDOW, 616 REBOOT_DEFAULT_WINDOW)) * 1000; 617 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 618 : Settings.Secure.getLong( 619 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL, 620 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000; 621 622 retrieveBrutalityAmount(); 623 624 long realStartTime; 625 long now; 626 627 synchronized (this) { 628 now = System.currentTimeMillis(); 629 realStartTime = computeCalendarTime(mCalendar, now, 630 rebootStartTime); 631 632 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 633 if (DB || mReqRebootNoWait || 634 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 635 if (fromAlarm && rebootWindowMillis <= 0) { 636 // No reboot window -- just immediately reboot. 637 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 638 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 639 (int)rebootWindowMillis, ""); 640 rebootSystem("Checkin scheduled forced"); 641 return; 642 } 643 644 // Are we within the reboot window? 645 if (now < realStartTime) { 646 // Schedule alarm for next check interval. 647 realStartTime = computeCalendarTime(mCalendar, 648 now, rebootStartTime); 649 } else if (now < (realStartTime+rebootWindowMillis)) { 650 String doit = shouldWeBeBrutalLocked(now); 651 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 652 (int)rebootInterval, (int)rebootStartTime*1000, 653 (int)rebootWindowMillis, doit != null ? doit : ""); 654 if (doit == null) { 655 rebootSystem("Checked scheduled range"); 656 return; 657 } 658 659 // Schedule next alarm either within the window or in the 660 // next interval. 661 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 662 realStartTime = computeCalendarTime(mCalendar, 663 now + rebootIntervalMillis, rebootStartTime); 664 } else { 665 realStartTime = now + recheckInterval; 666 } 667 } else { 668 // Schedule alarm for next check interval. 669 realStartTime = computeCalendarTime(mCalendar, 670 now + rebootIntervalMillis, rebootStartTime); 671 } 672 } 673 } 674 675 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 676 + ((realStartTime-now)/1000/60) + "m from now"); 677 mAlarm.remove(mRebootIntent); 678 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 679 } 680 681 /** 682 * Perform a full reboot of the system. 683 */ 684 void rebootSystem(String reason) { 685 Slog.i(TAG, "Rebooting system because: " + reason); 686 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 687 pms.reboot(reason); 688 } 689 690 /** 691 * Load the current Gservices settings for when 692 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 693 * Must not be called with the lock held. 694 */ 695 void retrieveBrutalityAmount() { 696 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 697 : Settings.Secure.getInt( 698 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF, 699 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000; 700 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 701 : Settings.Secure.getInt( 702 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM, 703 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000; 704 } 705 706 /** 707 * Determine whether it is a good time to kill, crash, or otherwise 708 * plunder the current situation for the overall long-term benefit of 709 * the world. 710 * 711 * @param curTime The current system time. 712 * @return Returns null if this is a good time, else a String with the 713 * text of why it is not a good time. 714 */ 715 String shouldWeBeBrutalLocked(long curTime) { 716 if (mBattery == null || !mBattery.isPowered()) { 717 return "battery"; 718 } 719 720 if (mMinScreenOff >= 0 && (mPower == null || 721 mPower.timeSinceScreenOn() < mMinScreenOff)) { 722 return "screen"; 723 } 724 725 if (mMinAlarm >= 0 && (mAlarm == null || 726 mAlarm.timeToNextAlarm() < mMinAlarm)) { 727 return "alarm"; 728 } 729 730 return null; 731 } 732 733 /** 734 * Compute the times during which we next would like to perform process 735 * restarts. 736 * 737 * @param curTime The current system time. 738 */ 739 void computeMemcheckTimesLocked(long curTime) { 740 if (mMemcheckLastTime == curTime) { 741 return; 742 } 743 744 mMemcheckLastTime = curTime; 745 746 long memcheckExecStartTime = Settings.Secure.getLong( 747 mResolver, Settings.Secure.MEMCHECK_EXEC_START_TIME, 748 MEMCHECK_DEFAULT_EXEC_START_TIME); 749 long memcheckExecEndTime = Settings.Secure.getLong( 750 mResolver, Settings.Secure.MEMCHECK_EXEC_END_TIME, 751 MEMCHECK_DEFAULT_EXEC_END_TIME); 752 753 mMemcheckExecEndTime = computeCalendarTime(mCalendar, curTime, 754 memcheckExecEndTime); 755 if (mMemcheckExecEndTime < curTime) { 756 memcheckExecStartTime += 24*60*60; 757 memcheckExecEndTime += 24*60*60; 758 mMemcheckExecEndTime = computeCalendarTime(mCalendar, curTime, 759 memcheckExecEndTime); 760 } 761 mMemcheckExecStartTime = computeCalendarTime(mCalendar, curTime, 762 memcheckExecStartTime); 763 764 if (localLOGV) { 765 mCalendar.setTimeInMillis(curTime); 766 Slog.v(TAG, "Current Time: " + mCalendar); 767 mCalendar.setTimeInMillis(mMemcheckExecStartTime); 768 Slog.v(TAG, "Start Check Time: " + mCalendar); 769 mCalendar.setTimeInMillis(mMemcheckExecEndTime); 770 Slog.v(TAG, "End Check Time: " + mCalendar); 771 } 772 } 773 774 static long computeCalendarTime(Calendar c, long curTime, 775 long secondsSinceMidnight) { 776 777 // start with now 778 c.setTimeInMillis(curTime); 779 780 int val = (int)secondsSinceMidnight / (60*60); 781 c.set(Calendar.HOUR_OF_DAY, val); 782 secondsSinceMidnight -= val * (60*60); 783 val = (int)secondsSinceMidnight / 60; 784 c.set(Calendar.MINUTE, val); 785 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 786 c.set(Calendar.MILLISECOND, 0); 787 788 long newTime = c.getTimeInMillis(); 789 if (newTime < curTime) { 790 // The given time (in seconds since midnight) has already passed for today, so advance 791 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 792 c.add(Calendar.DAY_OF_MONTH, 1); 793 newTime = c.getTimeInMillis(); 794 } 795 796 return newTime; 797 } 798 799 @Override 800 public void run() { 801 boolean waitedHalf = false; 802 while (true) { 803 mCompleted = false; 804 mHandler.sendEmptyMessage(MONITOR); 805 806 synchronized (this) { 807 long timeout = TIME_TO_WAIT; 808 809 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 810 // wait while asleep. If the device is asleep then the thing that we are waiting 811 // to timeout on is asleep as well and won't have a chance to run, causing a false 812 // positive on when to kill things. 813 long start = SystemClock.uptimeMillis(); 814 while (timeout > 0 && !mForceKillSystem) { 815 try { 816 wait(timeout); // notifyAll() is called when mForceKillSystem is set 817 } catch (InterruptedException e) { 818 Log.wtf(TAG, e); 819 } 820 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 821 } 822 823 if (mCompleted && !mForceKillSystem) { 824 // The monitors have returned. 825 waitedHalf = false; 826 continue; 827 } 828 829 if (!waitedHalf) { 830 // We've waited half the deadlock-detection interval. Pull a stack 831 // trace and wait another half. 832 ArrayList pids = new ArrayList(); 833 pids.add(Process.myPid()); 834 File stack = ActivityManagerService.dumpStackTraces(true, pids); 835 waitedHalf = true; 836 continue; 837 } 838 } 839 840 // If we got here, that means that the system is most likely hung. 841 // First collect stack traces from all threads of the system process. 842 // Then kill this process so that the system will restart. 843 844 String name = (mCurrentMonitor != null) ? mCurrentMonitor.getClass().getName() : "null"; 845 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 846 847 ArrayList pids = new ArrayList(); 848 pids.add(Process.myPid()); 849 if (mPhonePid > 0) pids.add(mPhonePid); 850 // Pass !waitedHalf so that just in case we somehow wind up here without having 851 // dumped the halfway stacks, we properly re-initialize the trace file. 852 File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids); 853 854 // Give some extra time to make sure the stack traces get written. 855 // The system's been hanging for a minute, another second or two won't hurt much. 856 SystemClock.sleep(2000); 857 858 // Pull our own kernel thread stacks as well if we're configured for that 859 if (RECORD_KERNEL_THREADS) { 860 dumpKernelStackTraces(); 861 } 862 863 mActivity.addErrorToDropBox("watchdog", null, null, null, name, null, stack, null); 864 865 // Only kill the process if the debugger is not attached. 866 if (!Debug.isDebuggerConnected()) { 867 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 868 Process.killProcess(Process.myPid()); 869 System.exit(10); 870 } else { 871 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 872 } 873 874 waitedHalf = false; 875 } 876 } 877 878 private File dumpKernelStackTraces() { 879 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 880 if (tracesPath == null || tracesPath.length() == 0) { 881 return null; 882 } 883 884 native_dumpKernelStacks(tracesPath); 885 return new File(tracesPath); 886 } 887 888 private native void native_dumpKernelStacks(String tracesPath); 889 } 890