Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import com.android.server.am.ActivityManagerService;
     20 
     21 import android.app.AlarmManager;
     22 import android.app.PendingIntent;
     23 import android.content.BroadcastReceiver;
     24 import android.content.ContentResolver;
     25 import android.content.Context;
     26 import android.content.Intent;
     27 import android.content.IntentFilter;
     28 import android.os.Debug;
     29 import android.os.Handler;
     30 import android.os.Message;
     31 import android.os.Process;
     32 import android.os.ServiceManager;
     33 import android.os.SystemClock;
     34 import android.os.SystemProperties;
     35 import android.provider.Settings;
     36 import android.util.EventLog;
     37 import android.util.Log;
     38 import android.util.Slog;
     39 
     40 import java.io.File;
     41 import java.util.ArrayList;
     42 import java.util.Calendar;
     43 
     44 /** This class calls its monitor every minute. Killing this process if they don't return **/
     45 public class Watchdog extends Thread {
     46     static final String TAG = "Watchdog";
     47     static final boolean localLOGV = false || false;
     48 
     49     // Set this to true to use debug default values.
     50     static final boolean DB = false;
     51 
     52     // Set this to true to have the watchdog record kernel thread stacks when it fires
     53     static final boolean RECORD_KERNEL_THREADS = true;
     54 
     55     static final int MONITOR = 2718;
     56 
     57     static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
     58     static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
     59 
     60     static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
     61     static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
     62     static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
     63 
     64     static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
     65     static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
     66     static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
     67 
     68     static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
     69 
     70     static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
     71         "/system/bin/mediaserver",
     72         "/system/bin/sdcard",
     73         "/system/bin/surfaceflinger"
     74     };
     75 
     76     static Watchdog sWatchdog;
     77 
     78     /* This handler will be used to post message back onto the main thread */
     79     final Handler mHandler;
     80     final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
     81     ContentResolver mResolver;
     82     BatteryService mBattery;
     83     PowerManagerService mPower;
     84     AlarmManagerService mAlarm;
     85     ActivityManagerService mActivity;
     86     boolean mCompleted;
     87     boolean mForceKillSystem;
     88     Monitor mCurrentMonitor;
     89 
     90     int mPhonePid;
     91 
     92     final Calendar mCalendar = Calendar.getInstance();
     93     int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
     94     int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
     95     boolean mNeedScheduledCheck;
     96     PendingIntent mCheckupIntent;
     97     PendingIntent mRebootIntent;
     98 
     99     long mBootTime;
    100     int mRebootInterval;
    101 
    102     boolean mReqRebootNoWait;     // should wait for one interval before reboot?
    103     int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
    104     int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
    105     int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
    106     int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
    107     int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
    108     int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
    109 
    110     /**
    111      * Used for scheduling monitor callbacks and checking memory usage.
    112      */
    113     final class HeartbeatHandler extends Handler {
    114         @Override
    115         public void handleMessage(Message msg) {
    116             switch (msg.what) {
    117                 case MONITOR: {
    118                     // See if we should force a reboot.
    119                     int rebootInterval = mReqRebootInterval >= 0
    120                             ? mReqRebootInterval : Settings.Secure.getInt(
    121                             mResolver, Settings.Secure.REBOOT_INTERVAL,
    122                             REBOOT_DEFAULT_INTERVAL);
    123                     if (mRebootInterval != rebootInterval) {
    124                         mRebootInterval = rebootInterval;
    125                         // We have been running long enough that a reboot can
    126                         // be considered...
    127                         checkReboot(false);
    128                     }
    129 
    130                     final int size = mMonitors.size();
    131                     for (int i = 0 ; i < size ; i++) {
    132                         mCurrentMonitor = mMonitors.get(i);
    133                         mCurrentMonitor.monitor();
    134                     }
    135 
    136                     synchronized (Watchdog.this) {
    137                         mCompleted = true;
    138                         mCurrentMonitor = null;
    139                     }
    140                 } break;
    141             }
    142         }
    143     }
    144 
    145     final class RebootReceiver extends BroadcastReceiver {
    146         @Override
    147         public void onReceive(Context c, Intent intent) {
    148             if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
    149             checkReboot(true);
    150         }
    151     }
    152 
    153     final class RebootRequestReceiver extends BroadcastReceiver {
    154         @Override
    155         public void onReceive(Context c, Intent intent) {
    156             mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
    157             mReqRebootInterval = intent.getIntExtra("interval", -1);
    158             mReqRebootStartTime = intent.getIntExtra("startTime", -1);
    159             mReqRebootWindow = intent.getIntExtra("window", -1);
    160             mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
    161             mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
    162             mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
    163             EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
    164                     mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
    165                             mReqRecheckInterval, mReqRebootStartTime,
    166                     mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
    167             checkReboot(true);
    168         }
    169     }
    170 
    171     public interface Monitor {
    172         void monitor();
    173     }
    174 
    175     public static Watchdog getInstance() {
    176         if (sWatchdog == null) {
    177             sWatchdog = new Watchdog();
    178         }
    179 
    180         return sWatchdog;
    181     }
    182 
    183     private Watchdog() {
    184         super("watchdog");
    185         mHandler = new HeartbeatHandler();
    186     }
    187 
    188     public void init(Context context, BatteryService battery,
    189             PowerManagerService power, AlarmManagerService alarm,
    190             ActivityManagerService activity) {
    191         mResolver = context.getContentResolver();
    192         mBattery = battery;
    193         mPower = power;
    194         mAlarm = alarm;
    195         mActivity = activity;
    196 
    197         context.registerReceiver(new RebootReceiver(),
    198                 new IntentFilter(REBOOT_ACTION));
    199         mRebootIntent = PendingIntent.getBroadcast(context,
    200                 0, new Intent(REBOOT_ACTION), 0);
    201 
    202         context.registerReceiver(new RebootRequestReceiver(),
    203                 new IntentFilter(Intent.ACTION_REBOOT),
    204                 android.Manifest.permission.REBOOT, null);
    205 
    206         mBootTime = System.currentTimeMillis();
    207     }
    208 
    209     public void processStarted(String name, int pid) {
    210         synchronized (this) {
    211             if ("com.android.phone".equals(name)) {
    212                 mPhonePid = pid;
    213             }
    214         }
    215     }
    216 
    217     public void addMonitor(Monitor monitor) {
    218         synchronized (this) {
    219             if (isAlive()) {
    220                 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
    221             }
    222             mMonitors.add(monitor);
    223         }
    224     }
    225 
    226     void checkReboot(boolean fromAlarm) {
    227         int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
    228                 : Settings.Secure.getInt(
    229                 mResolver, Settings.Secure.REBOOT_INTERVAL,
    230                 REBOOT_DEFAULT_INTERVAL);
    231         mRebootInterval = rebootInterval;
    232         if (rebootInterval <= 0) {
    233             // No reboot interval requested.
    234             if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
    235             mAlarm.remove(mRebootIntent);
    236             return;
    237         }
    238 
    239         long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
    240                 : Settings.Secure.getLong(
    241                 mResolver, Settings.Secure.REBOOT_START_TIME,
    242                 REBOOT_DEFAULT_START_TIME);
    243         long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
    244                 : Settings.Secure.getLong(
    245                 mResolver, Settings.Secure.REBOOT_WINDOW,
    246                 REBOOT_DEFAULT_WINDOW)) * 1000;
    247         long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
    248                 : Settings.Secure.getLong(
    249                 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
    250                 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000;
    251 
    252         retrieveBrutalityAmount();
    253 
    254         long realStartTime;
    255         long now;
    256 
    257         synchronized (this) {
    258             now = System.currentTimeMillis();
    259             realStartTime = computeCalendarTime(mCalendar, now,
    260                     rebootStartTime);
    261 
    262             long rebootIntervalMillis = rebootInterval*24*60*60*1000;
    263             if (DB || mReqRebootNoWait ||
    264                     (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
    265                 if (fromAlarm && rebootWindowMillis <= 0) {
    266                     // No reboot window -- just immediately reboot.
    267                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
    268                             (int)rebootIntervalMillis, (int)rebootStartTime*1000,
    269                             (int)rebootWindowMillis, "");
    270                     rebootSystem("Checkin scheduled forced");
    271                     return;
    272                 }
    273 
    274                 // Are we within the reboot window?
    275                 if (now < realStartTime) {
    276                     // Schedule alarm for next check interval.
    277                     realStartTime = computeCalendarTime(mCalendar,
    278                             now, rebootStartTime);
    279                 } else if (now < (realStartTime+rebootWindowMillis)) {
    280                     String doit = shouldWeBeBrutalLocked(now);
    281                     EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
    282                             (int)rebootInterval, (int)rebootStartTime*1000,
    283                             (int)rebootWindowMillis, doit != null ? doit : "");
    284                     if (doit == null) {
    285                         rebootSystem("Checked scheduled range");
    286                         return;
    287                     }
    288 
    289                     // Schedule next alarm either within the window or in the
    290                     // next interval.
    291                     if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
    292                         realStartTime = computeCalendarTime(mCalendar,
    293                                 now + rebootIntervalMillis, rebootStartTime);
    294                     } else {
    295                         realStartTime = now + recheckInterval;
    296                     }
    297                 } else {
    298                     // Schedule alarm for next check interval.
    299                     realStartTime = computeCalendarTime(mCalendar,
    300                             now + rebootIntervalMillis, rebootStartTime);
    301                 }
    302             }
    303         }
    304 
    305         if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
    306                 + ((realStartTime-now)/1000/60) + "m from now");
    307         mAlarm.remove(mRebootIntent);
    308         mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
    309     }
    310 
    311     /**
    312      * Perform a full reboot of the system.
    313      */
    314     void rebootSystem(String reason) {
    315         Slog.i(TAG, "Rebooting system because: " + reason);
    316         PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
    317         pms.reboot(reason);
    318     }
    319 
    320     /**
    321      * Load the current Gservices settings for when
    322      * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
    323      * Must not be called with the lock held.
    324      */
    325     void retrieveBrutalityAmount() {
    326         mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
    327                 : Settings.Secure.getInt(
    328                 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF,
    329                 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000;
    330         mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
    331                 : Settings.Secure.getInt(
    332                 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM,
    333                 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000;
    334     }
    335 
    336     /**
    337      * Determine whether it is a good time to kill, crash, or otherwise
    338      * plunder the current situation for the overall long-term benefit of
    339      * the world.
    340      *
    341      * @param curTime The current system time.
    342      * @return Returns null if this is a good time, else a String with the
    343      * text of why it is not a good time.
    344      */
    345     String shouldWeBeBrutalLocked(long curTime) {
    346         if (mBattery == null || !mBattery.isPowered()) {
    347             return "battery";
    348         }
    349 
    350         if (mMinScreenOff >= 0 && (mPower == null ||
    351                 mPower.timeSinceScreenOn() < mMinScreenOff)) {
    352             return "screen";
    353         }
    354 
    355         if (mMinAlarm >= 0 && (mAlarm == null ||
    356                 mAlarm.timeToNextAlarm() < mMinAlarm)) {
    357             return "alarm";
    358         }
    359 
    360         return null;
    361     }
    362 
    363     static long computeCalendarTime(Calendar c, long curTime,
    364             long secondsSinceMidnight) {
    365 
    366         // start with now
    367         c.setTimeInMillis(curTime);
    368 
    369         int val = (int)secondsSinceMidnight / (60*60);
    370         c.set(Calendar.HOUR_OF_DAY, val);
    371         secondsSinceMidnight -= val * (60*60);
    372         val = (int)secondsSinceMidnight / 60;
    373         c.set(Calendar.MINUTE, val);
    374         c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
    375         c.set(Calendar.MILLISECOND, 0);
    376 
    377         long newTime = c.getTimeInMillis();
    378         if (newTime < curTime) {
    379             // The given time (in seconds since midnight) has already passed for today, so advance
    380             // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
    381             c.add(Calendar.DAY_OF_MONTH, 1);
    382             newTime = c.getTimeInMillis();
    383         }
    384 
    385         return newTime;
    386     }
    387 
    388     @Override
    389     public void run() {
    390         boolean waitedHalf = false;
    391         while (true) {
    392             mCompleted = false;
    393             mHandler.sendEmptyMessage(MONITOR);
    394 
    395             synchronized (this) {
    396                 long timeout = TIME_TO_WAIT;
    397 
    398                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
    399                 // wait while asleep. If the device is asleep then the thing that we are waiting
    400                 // to timeout on is asleep as well and won't have a chance to run, causing a false
    401                 // positive on when to kill things.
    402                 long start = SystemClock.uptimeMillis();
    403                 while (timeout > 0 && !mForceKillSystem) {
    404                     try {
    405                         wait(timeout);  // notifyAll() is called when mForceKillSystem is set
    406                     } catch (InterruptedException e) {
    407                         Log.wtf(TAG, e);
    408                     }
    409                     timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
    410                 }
    411 
    412                 if (mCompleted && !mForceKillSystem) {
    413                     // The monitors have returned.
    414                     waitedHalf = false;
    415                     continue;
    416                 }
    417 
    418                 if (!waitedHalf) {
    419                     // We've waited half the deadlock-detection interval.  Pull a stack
    420                     // trace and wait another half.
    421                     ArrayList<Integer> pids = new ArrayList<Integer>();
    422                     pids.add(Process.myPid());
    423                     ActivityManagerService.dumpStackTraces(true, pids, null, null,
    424                             NATIVE_STACKS_OF_INTEREST);
    425                     waitedHalf = true;
    426                     continue;
    427                 }
    428             }
    429 
    430             // If we got here, that means that the system is most likely hung.
    431             // First collect stack traces from all threads of the system process.
    432             // Then kill this process so that the system will restart.
    433 
    434             final String name = (mCurrentMonitor != null) ?
    435                     mCurrentMonitor.getClass().getName() : "null";
    436             EventLog.writeEvent(EventLogTags.WATCHDOG, name);
    437 
    438             ArrayList<Integer> pids = new ArrayList<Integer>();
    439             pids.add(Process.myPid());
    440             if (mPhonePid > 0) pids.add(mPhonePid);
    441             // Pass !waitedHalf so that just in case we somehow wind up here without having
    442             // dumped the halfway stacks, we properly re-initialize the trace file.
    443             final File stack = ActivityManagerService.dumpStackTraces(
    444                     !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
    445 
    446             // Give some extra time to make sure the stack traces get written.
    447             // The system's been hanging for a minute, another second or two won't hurt much.
    448             SystemClock.sleep(2000);
    449 
    450             // Pull our own kernel thread stacks as well if we're configured for that
    451             if (RECORD_KERNEL_THREADS) {
    452                 dumpKernelStackTraces();
    453             }
    454 
    455             // Try to add the error to the dropbox, but assuming that the ActivityManager
    456             // itself may be deadlocked.  (which has happened, causing this statement to
    457             // deadlock and the watchdog as a whole to be ineffective)
    458             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
    459                     public void run() {
    460                         mActivity.addErrorToDropBox(
    461                                 "watchdog", null, "system_server", null, null,
    462                                 name, null, stack, null);
    463                     }
    464                 };
    465             dropboxThread.start();
    466             try {
    467                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
    468             } catch (InterruptedException ignored) {}
    469 
    470             // Only kill the process if the debugger is not attached.
    471             if (!Debug.isDebuggerConnected()) {
    472                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
    473                 Process.killProcess(Process.myPid());
    474                 System.exit(10);
    475             } else {
    476                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
    477             }
    478 
    479             waitedHalf = false;
    480         }
    481     }
    482 
    483     private File dumpKernelStackTraces() {
    484         String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
    485         if (tracesPath == null || tracesPath.length() == 0) {
    486             return null;
    487         }
    488 
    489         native_dumpKernelStacks(tracesPath);
    490         return new File(tracesPath);
    491     }
    492 
    493     private native void native_dumpKernelStacks(String tracesPath);
    494 }
    495