Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import com.android.server.am.ActivityManagerService;
     20 
     21 import android.app.AlarmManager;
     22 import android.app.PendingIntent;
     23 import android.content.BroadcastReceiver;
     24 import android.content.ContentResolver;
     25 import android.content.Context;
     26 import android.content.Intent;
     27 import android.content.IntentFilter;
     28 import android.os.Debug;
     29 import android.os.Handler;
     30 import android.os.Message;
     31 import android.os.Process;
     32 import android.os.ServiceManager;
     33 import android.os.SystemClock;
     34 import android.os.SystemProperties;
     35 import android.provider.Settings;
     36 import android.util.EventLog;
     37 import android.util.Log;
     38 import android.util.Slog;
     39 
     40 import java.io.File;
     41 import java.util.ArrayList;
     42 import java.util.Calendar;
     43 
     44 /** This class calls its monitor every minute. Killing this process if they don't return **/
     45 public class Watchdog extends Thread {
     46     static final String TAG = "Watchdog";
     47     static final boolean localLOGV = false || false;
     48 
     49     // Set this to true to use debug default values.
     50     static final boolean DB = false;
     51 
     52     // Set this to true to have the watchdog record kernel thread stacks when it fires
     53     static final boolean RECORD_KERNEL_THREADS = true;
     54 
     55     static final int MONITOR = 2718;
     56 
     57     static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
     58     static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
     59 
     60     static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
     61     static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
     62     static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
     63 
     64     static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
     65     static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
     66     static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
     67 
     68     static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
     69 
     70     static Watchdog sWatchdog;
     71 
     72     /* This handler will be used to post message back onto the main thread */
     73     final Handler mHandler;
     74     final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
     75     ContentResolver mResolver;
     76     BatteryService mBattery;
     77     PowerManagerService mPower;
     78     AlarmManagerService mAlarm;
     79     ActivityManagerService mActivity;
     80     boolean mCompleted;
     81     boolean mForceKillSystem;
     82     Monitor mCurrentMonitor;
     83 
     84     int mPhonePid;
     85 
     86     final Calendar mCalendar = Calendar.getInstance();
     87     int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
     88     int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
     89     boolean mNeedScheduledCheck;
     90     PendingIntent mCheckupIntent;
     91     PendingIntent mRebootIntent;
     92 
     93     long mBootTime;
     94     int mRebootInterval;
     95 
     96     boolean mReqRebootNoWait;     // should wait for one interval before reboot?
     97     int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
     98     int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
     99     int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
    100     int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
    101     int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
    102     int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
    103 
    104     /**
    105      * Used for scheduling monitor callbacks and checking memory usage.
    106      */
    107     final class HeartbeatHandler extends Handler {
    108         @Override
    109         public void handleMessage(Message msg) {
    110             switch (msg.what) {
    111                 case MONITOR: {
    112                     // See if we should force a reboot.
    113                     int rebootInterval = mReqRebootInterval >= 0
    114                             ? mReqRebootInterval : Settings.Secure.getInt(
    115                             mResolver, Settings.Secure.REBOOT_INTERVAL,
    116                             REBOOT_DEFAULT_INTERVAL);
    117                     if (mRebootInterval != rebootInterval) {
    118                         mRebootInterval = rebootInterval;
    119                         // We have been running long enough that a reboot can
    120                         // be considered...
    121                         checkReboot(false);
    122                     }
    123 
    124                     final int size = mMonitors.size();
    125                     for (int i = 0 ; i < size ; i++) {
    126                         mCurrentMonitor = mMonitors.get(i);
    127                         mCurrentMonitor.monitor();
    128                     }
    129 
    130                     synchronized (Watchdog.this) {
    131                         mCompleted = true;
    132                         mCurrentMonitor = null;
    133                     }
    134                 } break;
    135             }
    136         }
    137     }
    138 
    139     final class RebootReceiver extends BroadcastReceiver {
    140         @Override
    141         public void onReceive(Context c, Intent intent) {
    142             if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
    143             checkReboot(true);
    144         }
    145     }
    146 
    147     final class RebootRequestReceiver extends BroadcastReceiver {
    148         @Override
    149         public void onReceive(Context c, Intent intent) {
    150             mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
    151             mReqRebootInterval = intent.getIntExtra("interval", -1);
    152             mReqRebootStartTime = intent.getIntExtra("startTime", -1);
    153             mReqRebootWindow = intent.getIntExtra("window", -1);
    154             mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
    155             mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
    156             mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
    157             EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
    158                     mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
    159                             mReqRecheckInterval, mReqRebootStartTime,
    160                     mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
    161             checkReboot(true);
    162         }
    163     }
    164 
    165     public interface Monitor {
    166         void monitor();
    167     }
    168 
    169     public static Watchdog getInstance() {
    170         if (sWatchdog == null) {
    171             sWatchdog = new Watchdog();
    172         }
    173 
    174         return sWatchdog;
    175     }
    176 
    177     private Watchdog() {
    178         super("watchdog");
    179         mHandler = new HeartbeatHandler();
    180     }
    181 
    182     public void init(Context context, BatteryService battery,
    183             PowerManagerService power, AlarmManagerService alarm,
    184             ActivityManagerService activity) {
    185         mResolver = context.getContentResolver();
    186         mBattery = battery;
    187         mPower = power;
    188         mAlarm = alarm;
    189         mActivity = activity;
    190 
    191         context.registerReceiver(new RebootReceiver(),
    192                 new IntentFilter(REBOOT_ACTION));
    193         mRebootIntent = PendingIntent.getBroadcast(context,
    194                 0, new Intent(REBOOT_ACTION), 0);
    195 
    196         context.registerReceiver(new RebootRequestReceiver(),
    197                 new IntentFilter(Intent.ACTION_REBOOT),
    198                 android.Manifest.permission.REBOOT, null);
    199 
    200         mBootTime = System.currentTimeMillis();
    201     }
    202 
    203     public void processStarted(String name, int pid) {
    204         synchronized (this) {
    205             if ("com.android.phone".equals(name)) {
    206                 mPhonePid = pid;
    207             }
    208         }
    209     }
    210 
    211     public void addMonitor(Monitor monitor) {
    212         synchronized (this) {
    213             if (isAlive()) {
    214                 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
    215             }
    216             mMonitors.add(monitor);
    217         }
    218     }
    219 
    220     void checkReboot(boolean fromAlarm) {
    221         int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
    222                 : Settings.Secure.getInt(
    223                 mResolver, Settings.Secure.REBOOT_INTERVAL,
    224                 REBOOT_DEFAULT_INTERVAL);
    225         mRebootInterval = rebootInterval;
    226         if (rebootInterval <= 0) {
    227             // No reboot interval requested.
    228             if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
    229             mAlarm.remove(mRebootIntent);
    230             return;
    231         }
    232 
    233         long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
    234                 : Settings.Secure.getLong(
    235                 mResolver, Settings.Secure.REBOOT_START_TIME,
    236                 REBOOT_DEFAULT_START_TIME);
    237         long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
    238                 : Settings.Secure.getLong(
    239                 mResolver, Settings.Secure.REBOOT_WINDOW,
    240                 REBOOT_DEFAULT_WINDOW)) * 1000;
    241         long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
    242                 : Settings.Secure.getLong(
    243                 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
    244                 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000;
    245 
    246         retrieveBrutalityAmount();
    247 
    248         long realStartTime;
    249         long now;
    250 
    251         synchronized (this) {
    252             now = System.currentTimeMillis();
    253             realStartTime = computeCalendarTime(mCalendar, now,
    254                     rebootStartTime);
    255 
    256             long rebootIntervalMillis = rebootInterval*24*60*60*1000;
    257             if (DB || mReqRebootNoWait ||
    258                     (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
    259                 if (fromAlarm && rebootWindowMillis <= 0) {
    260                     // No reboot window -- just immediately reboot.
    261                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
    262                             (int)rebootIntervalMillis, (int)rebootStartTime*1000,
    263                             (int)rebootWindowMillis, "");
    264                     rebootSystem("Checkin scheduled forced");
    265                     return;
    266                 }
    267 
    268                 // Are we within the reboot window?
    269                 if (now < realStartTime) {
    270                     // Schedule alarm for next check interval.
    271                     realStartTime = computeCalendarTime(mCalendar,
    272                             now, rebootStartTime);
    273                 } else if (now < (realStartTime+rebootWindowMillis)) {
    274                     String doit = shouldWeBeBrutalLocked(now);
    275                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
    276                             (int)rebootInterval, (int)rebootStartTime*1000,
    277                             (int)rebootWindowMillis, doit != null ? doit : "");
    278                     if (doit == null) {
    279                         rebootSystem("Checked scheduled range");
    280                         return;
    281                     }
    282 
    283                     // Schedule next alarm either within the window or in the
    284                     // next interval.
    285                     if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
    286                         realStartTime = computeCalendarTime(mCalendar,
    287                                 now + rebootIntervalMillis, rebootStartTime);
    288                     } else {
    289                         realStartTime = now + recheckInterval;
    290                     }
    291                 } else {
    292                     // Schedule alarm for next check interval.
    293                     realStartTime = computeCalendarTime(mCalendar,
    294                             now + rebootIntervalMillis, rebootStartTime);
    295                 }
    296             }
    297         }
    298 
    299         if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
    300                 + ((realStartTime-now)/1000/60) + "m from now");
    301         mAlarm.remove(mRebootIntent);
    302         mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
    303     }
    304 
    305     /**
    306      * Perform a full reboot of the system.
    307      */
    308     void rebootSystem(String reason) {
    309         Slog.i(TAG, "Rebooting system because: " + reason);
    310         PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
    311         pms.reboot(reason);
    312     }
    313 
    314     /**
    315      * Load the current Gservices settings for when
    316      * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
    317      * Must not be called with the lock held.
    318      */
    319     void retrieveBrutalityAmount() {
    320         mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
    321                 : Settings.Secure.getInt(
    322                 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF,
    323                 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000;
    324         mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
    325                 : Settings.Secure.getInt(
    326                 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM,
    327                 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000;
    328     }
    329 
    330     /**
    331      * Determine whether it is a good time to kill, crash, or otherwise
    332      * plunder the current situation for the overall long-term benefit of
    333      * the world.
    334      *
    335      * @param curTime The current system time.
    336      * @return Returns null if this is a good time, else a String with the
    337      * text of why it is not a good time.
    338      */
    339     String shouldWeBeBrutalLocked(long curTime) {
    340         if (mBattery == null || !mBattery.isPowered()) {
    341             return "battery";
    342         }
    343 
    344         if (mMinScreenOff >= 0 && (mPower == null ||
    345                 mPower.timeSinceScreenOn() < mMinScreenOff)) {
    346             return "screen";
    347         }
    348 
    349         if (mMinAlarm >= 0 && (mAlarm == null ||
    350                 mAlarm.timeToNextAlarm() < mMinAlarm)) {
    351             return "alarm";
    352         }
    353 
    354         return null;
    355     }
    356 
    357     static long computeCalendarTime(Calendar c, long curTime,
    358             long secondsSinceMidnight) {
    359 
    360         // start with now
    361         c.setTimeInMillis(curTime);
    362 
    363         int val = (int)secondsSinceMidnight / (60*60);
    364         c.set(Calendar.HOUR_OF_DAY, val);
    365         secondsSinceMidnight -= val * (60*60);
    366         val = (int)secondsSinceMidnight / 60;
    367         c.set(Calendar.MINUTE, val);
    368         c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
    369         c.set(Calendar.MILLISECOND, 0);
    370 
    371         long newTime = c.getTimeInMillis();
    372         if (newTime < curTime) {
    373             // The given time (in seconds since midnight) has already passed for today, so advance
    374             // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
    375             c.add(Calendar.DAY_OF_MONTH, 1);
    376             newTime = c.getTimeInMillis();
    377         }
    378 
    379         return newTime;
    380     }
    381 
    382     @Override
    383     public void run() {
    384         boolean waitedHalf = false;
    385         while (true) {
    386             mCompleted = false;
    387             mHandler.sendEmptyMessage(MONITOR);
    388 
    389             synchronized (this) {
    390                 long timeout = TIME_TO_WAIT;
    391 
    392                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
    393                 // wait while asleep. If the device is asleep then the thing that we are waiting
    394                 // to timeout on is asleep as well and won't have a chance to run, causing a false
    395                 // positive on when to kill things.
    396                 long start = SystemClock.uptimeMillis();
    397                 while (timeout > 0 && !mForceKillSystem) {
    398                     try {
    399                         wait(timeout);  // notifyAll() is called when mForceKillSystem is set
    400                     } catch (InterruptedException e) {
    401                         Log.wtf(TAG, e);
    402                     }
    403                     timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
    404                 }
    405 
    406                 if (mCompleted && !mForceKillSystem) {
    407                     // The monitors have returned.
    408                     waitedHalf = false;
    409                     continue;
    410                 }
    411 
    412                 if (!waitedHalf) {
    413                     // We've waited half the deadlock-detection interval.  Pull a stack
    414                     // trace and wait another half.
    415                     ArrayList<Integer> pids = new ArrayList<Integer>();
    416                     pids.add(Process.myPid());
    417                     ActivityManagerService.dumpStackTraces(true, pids, null, null);
    418                     waitedHalf = true;
    419                     continue;
    420                 }
    421             }
    422 
    423             // If we got here, that means that the system is most likely hung.
    424             // First collect stack traces from all threads of the system process.
    425             // Then kill this process so that the system will restart.
    426 
    427             final String name = (mCurrentMonitor != null) ?
    428                     mCurrentMonitor.getClass().getName() : "null";
    429             EventLog.writeEvent(EventLogTags.WATCHDOG, name);
    430 
    431             ArrayList<Integer> pids = new ArrayList<Integer>();
    432             pids.add(Process.myPid());
    433             if (mPhonePid > 0) pids.add(mPhonePid);
    434             // Pass !waitedHalf so that just in case we somehow wind up here without having
    435             // dumped the halfway stacks, we properly re-initialize the trace file.
    436             final File stack = ActivityManagerService.dumpStackTraces(
    437                     !waitedHalf, pids, null, null);
    438 
    439             // Give some extra time to make sure the stack traces get written.
    440             // The system's been hanging for a minute, another second or two won't hurt much.
    441             SystemClock.sleep(2000);
    442 
    443             // Pull our own kernel thread stacks as well if we're configured for that
    444             if (RECORD_KERNEL_THREADS) {
    445                 dumpKernelStackTraces();
    446             }
    447 
    448             // Try to add the error to the dropbox, but assuming that the ActivityManager
    449             // itself may be deadlocked.  (which has happened, causing this statement to
    450             // deadlock and the watchdog as a whole to be ineffective)
    451             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
    452                     public void run() {
    453                         mActivity.addErrorToDropBox(
    454                                 "watchdog", null, "system_server", null, null,
    455                                 name, null, stack, null);
    456                     }
    457                 };
    458             dropboxThread.start();
    459             try {
    460                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
    461             } catch (InterruptedException ignored) {}
    462 
    463             // Only kill the process if the debugger is not attached.
    464             if (!Debug.isDebuggerConnected()) {
    465                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
    466                 Process.killProcess(Process.myPid());
    467                 System.exit(10);
    468             } else {
    469                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
    470             }
    471 
    472             waitedHalf = false;
    473         }
    474     }
    475 
    476     private File dumpKernelStackTraces() {
    477         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
    478         if (tracesPath == null || tracesPath.length() == 0) {
    479             return null;
    480         }
    481 
    482         native_dumpKernelStacks(tracesPath);
    483         return new File(tracesPath);
    484     }
    485 
    486     private native void native_dumpKernelStacks(String tracesPath);
    487 }
    488