Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import com.android.server.am.ActivityManagerService;
     20 
     21 import android.app.AlarmManager;
     22 import android.app.PendingIntent;
     23 import android.content.BroadcastReceiver;
     24 import android.content.ContentResolver;
     25 import android.content.Context;
     26 import android.content.Intent;
     27 import android.content.IntentFilter;
     28 import android.os.Debug;
     29 import android.os.Handler;
     30 import android.os.Message;
     31 import android.os.Process;
     32 import android.os.ServiceManager;
     33 import android.os.SystemClock;
     34 import android.os.SystemProperties;
     35 import android.provider.Settings;
     36 import android.util.Config;
     37 import android.util.EventLog;
     38 import android.util.Log;
     39 import android.util.Slog;
     40 
     41 import java.io.File;
     42 import java.util.ArrayList;
     43 import java.util.Calendar;
     44 
     45 /** This class calls its monitor every minute. Killing this process if they don't return **/
     46 public class Watchdog extends Thread {
     47     static final String TAG = "Watchdog";
     48     static final boolean localLOGV = false || Config.LOGV;
     49 
     50     // Set this to true to use debug default values.
     51     static final boolean DB = false;
     52 
     53     // Set this to true to have the watchdog record kernel thread stacks when it fires
     54     static final boolean RECORD_KERNEL_THREADS = true;
     55 
     56     static final int MONITOR = 2718;
     57 
     58     static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
     59     static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
     60 
     61     static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
     62     static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
     63     static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
     64 
     65     static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
     66     static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
     67     static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
     68 
     69     static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
     70 
     71     static Watchdog sWatchdog;
     72 
     73     /* This handler will be used to post message back onto the main thread */
     74     final Handler mHandler;
     75     final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
     76     ContentResolver mResolver;
     77     BatteryService mBattery;
     78     PowerManagerService mPower;
     79     AlarmManagerService mAlarm;
     80     ActivityManagerService mActivity;
     81     boolean mCompleted;
     82     boolean mForceKillSystem;
     83     Monitor mCurrentMonitor;
     84 
     85     int mPhonePid;
     86 
     87     final Calendar mCalendar = Calendar.getInstance();
     88     int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
     89     int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
     90     boolean mNeedScheduledCheck;
     91     PendingIntent mCheckupIntent;
     92     PendingIntent mRebootIntent;
     93 
     94     long mBootTime;
     95     int mRebootInterval;
     96 
     97     boolean mReqRebootNoWait;     // should wait for one interval before reboot?
     98     int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
     99     int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
    100     int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
    101     int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
    102     int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
    103     int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
    104 
    105     /**
    106      * Used for scheduling monitor callbacks and checking memory usage.
    107      */
    108     final class HeartbeatHandler extends Handler {
    109         @Override
    110         public void handleMessage(Message msg) {
    111             switch (msg.what) {
    112                 case MONITOR: {
    113                     // See if we should force a reboot.
    114                     int rebootInterval = mReqRebootInterval >= 0
    115                             ? mReqRebootInterval : Settings.Secure.getInt(
    116                             mResolver, Settings.Secure.REBOOT_INTERVAL,
    117                             REBOOT_DEFAULT_INTERVAL);
    118                     if (mRebootInterval != rebootInterval) {
    119                         mRebootInterval = rebootInterval;
    120                         // We have been running long enough that a reboot can
    121                         // be considered...
    122                         checkReboot(false);
    123                     }
    124 
    125                     final int size = mMonitors.size();
    126                     for (int i = 0 ; i < size ; i++) {
    127                         mCurrentMonitor = mMonitors.get(i);
    128                         mCurrentMonitor.monitor();
    129                     }
    130 
    131                     synchronized (Watchdog.this) {
    132                         mCompleted = true;
    133                         mCurrentMonitor = null;
    134                     }
    135                 } break;
    136             }
    137         }
    138     }
    139 
    140     final class RebootReceiver extends BroadcastReceiver {
    141         @Override
    142         public void onReceive(Context c, Intent intent) {
    143             if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
    144             checkReboot(true);
    145         }
    146     }
    147 
    148     final class RebootRequestReceiver extends BroadcastReceiver {
    149         @Override
    150         public void onReceive(Context c, Intent intent) {
    151             mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
    152             mReqRebootInterval = intent.getIntExtra("interval", -1);
    153             mReqRebootStartTime = intent.getIntExtra("startTime", -1);
    154             mReqRebootWindow = intent.getIntExtra("window", -1);
    155             mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
    156             mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
    157             mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
    158             EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
    159                     mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
    160                             mReqRecheckInterval, mReqRebootStartTime,
    161                     mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
    162             checkReboot(true);
    163         }
    164     }
    165 
    166     public interface Monitor {
    167         void monitor();
    168     }
    169 
    170     public static Watchdog getInstance() {
    171         if (sWatchdog == null) {
    172             sWatchdog = new Watchdog();
    173         }
    174 
    175         return sWatchdog;
    176     }
    177 
    178     private Watchdog() {
    179         super("watchdog");
    180         mHandler = new HeartbeatHandler();
    181     }
    182 
    183     public void init(Context context, BatteryService battery,
    184             PowerManagerService power, AlarmManagerService alarm,
    185             ActivityManagerService activity) {
    186         mResolver = context.getContentResolver();
    187         mBattery = battery;
    188         mPower = power;
    189         mAlarm = alarm;
    190         mActivity = activity;
    191 
    192         context.registerReceiver(new RebootReceiver(),
    193                 new IntentFilter(REBOOT_ACTION));
    194         mRebootIntent = PendingIntent.getBroadcast(context,
    195                 0, new Intent(REBOOT_ACTION), 0);
    196 
    197         context.registerReceiver(new RebootRequestReceiver(),
    198                 new IntentFilter(Intent.ACTION_REBOOT),
    199                 android.Manifest.permission.REBOOT, null);
    200 
    201         mBootTime = System.currentTimeMillis();
    202     }
    203 
    204     public void processStarted(String name, int pid) {
    205         synchronized (this) {
    206             if ("com.android.phone".equals(name)) {
    207                 mPhonePid = pid;
    208             }
    209         }
    210     }
    211 
    212     public void addMonitor(Monitor monitor) {
    213         synchronized (this) {
    214             if (isAlive()) {
    215                 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
    216             }
    217             mMonitors.add(monitor);
    218         }
    219     }
    220 
    221     void checkReboot(boolean fromAlarm) {
    222         int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
    223                 : Settings.Secure.getInt(
    224                 mResolver, Settings.Secure.REBOOT_INTERVAL,
    225                 REBOOT_DEFAULT_INTERVAL);
    226         mRebootInterval = rebootInterval;
    227         if (rebootInterval <= 0) {
    228             // No reboot interval requested.
    229             if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
    230             mAlarm.remove(mRebootIntent);
    231             return;
    232         }
    233 
    234         long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
    235                 : Settings.Secure.getLong(
    236                 mResolver, Settings.Secure.REBOOT_START_TIME,
    237                 REBOOT_DEFAULT_START_TIME);
    238         long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
    239                 : Settings.Secure.getLong(
    240                 mResolver, Settings.Secure.REBOOT_WINDOW,
    241                 REBOOT_DEFAULT_WINDOW)) * 1000;
    242         long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
    243                 : Settings.Secure.getLong(
    244                 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
    245                 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000;
    246 
    247         retrieveBrutalityAmount();
    248 
    249         long realStartTime;
    250         long now;
    251 
    252         synchronized (this) {
    253             now = System.currentTimeMillis();
    254             realStartTime = computeCalendarTime(mCalendar, now,
    255                     rebootStartTime);
    256 
    257             long rebootIntervalMillis = rebootInterval*24*60*60*1000;
    258             if (DB || mReqRebootNoWait ||
    259                     (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
    260                 if (fromAlarm && rebootWindowMillis <= 0) {
    261                     // No reboot window -- just immediately reboot.
    262                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
    263                             (int)rebootIntervalMillis, (int)rebootStartTime*1000,
    264                             (int)rebootWindowMillis, "");
    265                     rebootSystem("Checkin scheduled forced");
    266                     return;
    267                 }
    268 
    269                 // Are we within the reboot window?
    270                 if (now < realStartTime) {
    271                     // Schedule alarm for next check interval.
    272                     realStartTime = computeCalendarTime(mCalendar,
    273                             now, rebootStartTime);
    274                 } else if (now < (realStartTime+rebootWindowMillis)) {
    275                     String doit = shouldWeBeBrutalLocked(now);
    276                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
    277                             (int)rebootInterval, (int)rebootStartTime*1000,
    278                             (int)rebootWindowMillis, doit != null ? doit : "");
    279                     if (doit == null) {
    280                         rebootSystem("Checked scheduled range");
    281                         return;
    282                     }
    283 
    284                     // Schedule next alarm either within the window or in the
    285                     // next interval.
    286                     if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
    287                         realStartTime = computeCalendarTime(mCalendar,
    288                                 now + rebootIntervalMillis, rebootStartTime);
    289                     } else {
    290                         realStartTime = now + recheckInterval;
    291                     }
    292                 } else {
    293                     // Schedule alarm for next check interval.
    294                     realStartTime = computeCalendarTime(mCalendar,
    295                             now + rebootIntervalMillis, rebootStartTime);
    296                 }
    297             }
    298         }
    299 
    300         if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
    301                 + ((realStartTime-now)/1000/60) + "m from now");
    302         mAlarm.remove(mRebootIntent);
    303         mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
    304     }
    305 
    306     /**
    307      * Perform a full reboot of the system.
    308      */
    309     void rebootSystem(String reason) {
    310         Slog.i(TAG, "Rebooting system because: " + reason);
    311         PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
    312         pms.reboot(reason);
    313     }
    314 
    315     /**
    316      * Load the current Gservices settings for when
    317      * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
    318      * Must not be called with the lock held.
    319      */
    320     void retrieveBrutalityAmount() {
    321         mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
    322                 : Settings.Secure.getInt(
    323                 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF,
    324                 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000;
    325         mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
    326                 : Settings.Secure.getInt(
    327                 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM,
    328                 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000;
    329     }
    330 
    331     /**
    332      * Determine whether it is a good time to kill, crash, or otherwise
    333      * plunder the current situation for the overall long-term benefit of
    334      * the world.
    335      *
    336      * @param curTime The current system time.
    337      * @return Returns null if this is a good time, else a String with the
    338      * text of why it is not a good time.
    339      */
    340     String shouldWeBeBrutalLocked(long curTime) {
    341         if (mBattery == null || !mBattery.isPowered()) {
    342             return "battery";
    343         }
    344 
    345         if (mMinScreenOff >= 0 && (mPower == null ||
    346                 mPower.timeSinceScreenOn() < mMinScreenOff)) {
    347             return "screen";
    348         }
    349 
    350         if (mMinAlarm >= 0 && (mAlarm == null ||
    351                 mAlarm.timeToNextAlarm() < mMinAlarm)) {
    352             return "alarm";
    353         }
    354 
    355         return null;
    356     }
    357 
    358     static long computeCalendarTime(Calendar c, long curTime,
    359             long secondsSinceMidnight) {
    360 
    361         // start with now
    362         c.setTimeInMillis(curTime);
    363 
    364         int val = (int)secondsSinceMidnight / (60*60);
    365         c.set(Calendar.HOUR_OF_DAY, val);
    366         secondsSinceMidnight -= val * (60*60);
    367         val = (int)secondsSinceMidnight / 60;
    368         c.set(Calendar.MINUTE, val);
    369         c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
    370         c.set(Calendar.MILLISECOND, 0);
    371 
    372         long newTime = c.getTimeInMillis();
    373         if (newTime < curTime) {
    374             // The given time (in seconds since midnight) has already passed for today, so advance
    375             // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
    376             c.add(Calendar.DAY_OF_MONTH, 1);
    377             newTime = c.getTimeInMillis();
    378         }
    379 
    380         return newTime;
    381     }
    382 
    383     @Override
    384     public void run() {
    385         boolean waitedHalf = false;
    386         while (true) {
    387             mCompleted = false;
    388             mHandler.sendEmptyMessage(MONITOR);
    389 
    390             synchronized (this) {
    391                 long timeout = TIME_TO_WAIT;
    392 
    393                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
    394                 // wait while asleep. If the device is asleep then the thing that we are waiting
    395                 // to timeout on is asleep as well and won't have a chance to run, causing a false
    396                 // positive on when to kill things.
    397                 long start = SystemClock.uptimeMillis();
    398                 while (timeout > 0 && !mForceKillSystem) {
    399                     try {
    400                         wait(timeout);  // notifyAll() is called when mForceKillSystem is set
    401                     } catch (InterruptedException e) {
    402                         Log.wtf(TAG, e);
    403                     }
    404                     timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
    405                 }
    406 
    407                 if (mCompleted && !mForceKillSystem) {
    408                     // The monitors have returned.
    409                     waitedHalf = false;
    410                     continue;
    411                 }
    412 
    413                 if (!waitedHalf) {
    414                     // We've waited half the deadlock-detection interval.  Pull a stack
    415                     // trace and wait another half.
    416                     ArrayList<Integer> pids = new ArrayList<Integer>();
    417                     pids.add(Process.myPid());
    418                     ActivityManagerService.dumpStackTraces(true, pids, null, null);
    419                     waitedHalf = true;
    420                     continue;
    421                 }
    422             }
    423 
    424             // If we got here, that means that the system is most likely hung.
    425             // First collect stack traces from all threads of the system process.
    426             // Then kill this process so that the system will restart.
    427 
    428             String name = (mCurrentMonitor != null) ?
    429                     mCurrentMonitor.getClass().getName() : "null";
    430             EventLog.writeEvent(EventLogTags.WATCHDOG, name);
    431 
    432             ArrayList<Integer> pids = new ArrayList<Integer>();
    433             pids.add(Process.myPid());
    434             if (mPhonePid > 0) pids.add(mPhonePid);
    435             // Pass !waitedHalf so that just in case we somehow wind up here without having
    436             // dumped the halfway stacks, we properly re-initialize the trace file.
    437             File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids, null, null);
    438 
    439             // Give some extra time to make sure the stack traces get written.
    440             // The system's been hanging for a minute, another second or two won't hurt much.
    441             SystemClock.sleep(2000);
    442 
    443             // Pull our own kernel thread stacks as well if we're configured for that
    444             if (RECORD_KERNEL_THREADS) {
    445                 dumpKernelStackTraces();
    446             }
    447 
    448             mActivity.addErrorToDropBox("watchdog", null, null, null, name, null, stack, null);
    449 
    450             // Only kill the process if the debugger is not attached.
    451             if (!Debug.isDebuggerConnected()) {
    452                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
    453                 Process.killProcess(Process.myPid());
    454                 System.exit(10);
    455             } else {
    456                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
    457             }
    458 
    459             waitedHalf = false;
    460         }
    461     }
    462 
    463     private File dumpKernelStackTraces() {
    464         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
    465         if (tracesPath == null || tracesPath.length() == 0) {
    466             return null;
    467         }
    468 
    469         native_dumpKernelStacks(tracesPath);
    470         return new File(tracesPath);
    471     }
    472 
    473     private native void native_dumpKernelStacks(String tracesPath);
    474 }
    475