Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import android.app.IActivityController;
     20 import android.content.BroadcastReceiver;
     21 import android.content.Context;
     22 import android.content.Intent;
     23 import android.content.IntentFilter;
     24 import android.hidl.manager.V1_0.IServiceManager;
     25 import android.os.Binder;
     26 import android.os.Build;
     27 import android.os.Debug;
     28 import android.os.Handler;
     29 import android.os.IPowerManager;
     30 import android.os.Looper;
     31 import android.os.Process;
     32 import android.os.RemoteException;
     33 import android.os.ServiceManager;
     34 import android.os.SystemClock;
     35 import android.system.ErrnoException;
     36 import android.system.Os;
     37 import android.system.OsConstants;
     38 import android.system.StructRlimit;
     39 import android.util.EventLog;
     40 import android.util.Log;
     41 import android.util.Slog;
     42 import android.util.StatsLog;
     43 
     44 import com.android.internal.os.ZygoteConnectionConstants;
     45 import com.android.server.am.ActivityManagerService;
     46 import com.android.server.wm.SurfaceAnimationThread;
     47 
     48 import java.io.File;
     49 import java.io.FileWriter;
     50 import java.io.IOException;
     51 import java.nio.charset.StandardCharsets;
     52 import java.nio.file.Files;
     53 import java.nio.file.Path;
     54 import java.nio.file.Paths;
     55 import java.util.ArrayList;
     56 import java.util.Arrays;
     57 import java.util.Collections;
     58 import java.util.HashSet;
     59 import java.util.List;
     60 
     61 /** This class calls its monitor every minute. Killing this process if they don't return **/
     62 public class Watchdog extends Thread {
     63     static final String TAG = "Watchdog";
     64 
     65     /** Debug flag. */
     66     public static final boolean DEBUG = false;
     67 
     68     // Set this to true to use debug default values.
     69     static final boolean DB = false;
     70 
     71     // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with
     72     //         timeout in com.android.internal.os.ZygoteConnection, or wrapped applications
     73     //         can trigger the watchdog.
     74     // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped
     75     //         applications may not work with a debug build. CTS will fail.
     76     static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
     77     static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
     78 
     79     // These are temporally ordered: larger values as lateness increases
     80     static final int COMPLETED = 0;
     81     static final int WAITING = 1;
     82     static final int WAITED_HALF = 2;
     83     static final int OVERDUE = 3;
     84 
     85     // Which native processes to dump into dropbox's stack traces
     86     public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
     87         "/system/bin/audioserver",
     88         "/system/bin/cameraserver",
     89         "/system/bin/drmserver",
     90         "/system/bin/mediadrmserver",
     91         "/system/bin/mediaserver",
     92         "/system/bin/sdcard",
     93         "/system/bin/surfaceflinger",
     94         "/system/bin/vold",
     95         "media.extractor", // system/bin/mediaextractor
     96         "media.metrics", // system/bin/mediametrics
     97         "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service
     98         "media.swcodec", // /apex/com.android.media.swcodec/bin/mediaswcodec
     99         "com.android.bluetooth",  // Bluetooth service
    100         "/system/bin/statsd",  // Stats daemon
    101     };
    102 
    103     public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList(
    104             "android.hardware.audio (at) 2.0::IDevicesFactory",
    105             "android.hardware.audio (at) 4.0::IDevicesFactory",
    106             "android.hardware.bluetooth (at) 1.0::IBluetoothHci",
    107             "android.hardware.camera.provider (at) 2.4::ICameraProvider",
    108             "android.hardware.graphics.allocator (at) 2.0::IAllocator",
    109             "android.hardware.graphics.composer (at) 2.1::IComposer",
    110             "android.hardware.health (at) 2.0::IHealth",
    111             "android.hardware.media.c2 (at) 1.0::IComponentStore",
    112             "android.hardware.media.omx (at) 1.0::IOmx",
    113             "android.hardware.media.omx (at) 1.0::IOmxStore",
    114             "android.hardware.sensors (at) 1.0::ISensors",
    115             "android.hardware.vr (at) 1.0::IVr",
    116             "android.hardware.biometrics.face (at) 1.0::IBiometricsFace"
    117     );
    118 
    119     static Watchdog sWatchdog;
    120 
    121     /* This handler will be used to post message back onto the main thread */
    122     final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
    123     final HandlerChecker mMonitorChecker;
    124     ActivityManagerService mActivity;
    125 
    126     int mPhonePid;
    127     IActivityController mController;
    128     boolean mAllowRestart = true;
    129     final OpenFdMonitor mOpenFdMonitor;
    130 
    131     /**
    132      * Used for checking status of handle threads and scheduling monitor callbacks.
    133      */
    134     public final class HandlerChecker implements Runnable {
    135         private final Handler mHandler;
    136         private final String mName;
    137         private final long mWaitMax;
    138         private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
    139         private final ArrayList<Monitor> mMonitorQueue = new ArrayList<Monitor>();
    140         private boolean mCompleted;
    141         private Monitor mCurrentMonitor;
    142         private long mStartTime;
    143         private int mPauseCount;
    144 
    145         HandlerChecker(Handler handler, String name, long waitMaxMillis) {
    146             mHandler = handler;
    147             mName = name;
    148             mWaitMax = waitMaxMillis;
    149             mCompleted = true;
    150         }
    151 
    152         void addMonitorLocked(Monitor monitor) {
    153             // We don't want to update mMonitors when the Handler is in the middle of checking
    154             // all monitors. We will update mMonitors on the next schedule if it is safe
    155             mMonitorQueue.add(monitor);
    156         }
    157 
    158         public void scheduleCheckLocked() {
    159             if (mCompleted) {
    160                 // Safe to update monitors in queue, Handler is not in the middle of work
    161                 mMonitors.addAll(mMonitorQueue);
    162                 mMonitorQueue.clear();
    163             }
    164             if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling())
    165                     || (mPauseCount > 0)) {
    166                 // Don't schedule until after resume OR
    167                 // If the target looper has recently been polling, then
    168                 // there is no reason to enqueue our checker on it since that
    169                 // is as good as it not being deadlocked.  This avoid having
    170                 // to do a context switch to check the thread. Note that we
    171                 // only do this if we have no monitors since those would need to
    172                 // be executed at this point.
    173                 mCompleted = true;
    174                 return;
    175             }
    176             if (!mCompleted) {
    177                 // we already have a check in flight, so no need
    178                 return;
    179             }
    180 
    181             mCompleted = false;
    182             mCurrentMonitor = null;
    183             mStartTime = SystemClock.uptimeMillis();
    184             mHandler.postAtFrontOfQueue(this);
    185         }
    186 
    187         boolean isOverdueLocked() {
    188             return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
    189         }
    190 
    191         public int getCompletionStateLocked() {
    192             if (mCompleted) {
    193                 return COMPLETED;
    194             } else {
    195                 long latency = SystemClock.uptimeMillis() - mStartTime;
    196                 if (latency < mWaitMax/2) {
    197                     return WAITING;
    198                 } else if (latency < mWaitMax) {
    199                     return WAITED_HALF;
    200                 }
    201             }
    202             return OVERDUE;
    203         }
    204 
    205         public Thread getThread() {
    206             return mHandler.getLooper().getThread();
    207         }
    208 
    209         public String getName() {
    210             return mName;
    211         }
    212 
    213         String describeBlockedStateLocked() {
    214             if (mCurrentMonitor == null) {
    215                 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
    216             } else {
    217                 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
    218                         + " on " + mName + " (" + getThread().getName() + ")";
    219             }
    220         }
    221 
    222         @Override
    223         public void run() {
    224             // Once we get here, we ensure that mMonitors does not change even if we call
    225             // #addMonitorLocked because we first add the new monitors to mMonitorQueue and
    226             // move them to mMonitors on the next schedule when mCompleted is true, at which
    227             // point we have completed execution of this method.
    228             final int size = mMonitors.size();
    229             for (int i = 0 ; i < size ; i++) {
    230                 synchronized (Watchdog.this) {
    231                     mCurrentMonitor = mMonitors.get(i);
    232                 }
    233                 mCurrentMonitor.monitor();
    234             }
    235 
    236             synchronized (Watchdog.this) {
    237                 mCompleted = true;
    238                 mCurrentMonitor = null;
    239             }
    240         }
    241 
    242         /** Pause the HandlerChecker. */
    243         public void pauseLocked(String reason) {
    244             mPauseCount++;
    245             // Mark as completed, because there's a chance we called this after the watchog
    246             // thread loop called Object#wait after 'WAITED_HALF'. In that case we want to ensure
    247             // the next call to #getCompletionStateLocked for this checker returns 'COMPLETED'
    248             mCompleted = true;
    249             Slog.i(TAG, "Pausing HandlerChecker: " + mName + " for reason: "
    250                     + reason + ". Pause count: " + mPauseCount);
    251         }
    252 
    253         /** Resume the HandlerChecker from the last {@link #pauseLocked}. */
    254         public void resumeLocked(String reason) {
    255             if (mPauseCount > 0) {
    256                 mPauseCount--;
    257                 Slog.i(TAG, "Resuming HandlerChecker: " + mName + " for reason: "
    258                         + reason + ". Pause count: " + mPauseCount);
    259             } else {
    260                 Slog.wtf(TAG, "Already resumed HandlerChecker: " + mName);
    261             }
    262         }
    263     }
    264 
    265     final class RebootRequestReceiver extends BroadcastReceiver {
    266         @Override
    267         public void onReceive(Context c, Intent intent) {
    268             if (intent.getIntExtra("nowait", 0) != 0) {
    269                 rebootSystem("Received ACTION_REBOOT broadcast");
    270                 return;
    271             }
    272             Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
    273         }
    274     }
    275 
    276     /** Monitor for checking the availability of binder threads. The monitor will block until
    277      * there is a binder thread available to process in coming IPCs to make sure other processes
    278      * can still communicate with the service.
    279      */
    280     private static final class BinderThreadMonitor implements Watchdog.Monitor {
    281         @Override
    282         public void monitor() {
    283             Binder.blockUntilThreadAvailable();
    284         }
    285     }
    286 
    287     public interface Monitor {
    288         void monitor();
    289     }
    290 
    291     public static Watchdog getInstance() {
    292         if (sWatchdog == null) {
    293             sWatchdog = new Watchdog();
    294         }
    295 
    296         return sWatchdog;
    297     }
    298 
    299     private Watchdog() {
    300         super("watchdog");
    301         // Initialize handler checkers for each common thread we want to check.  Note
    302         // that we are not currently checking the background thread, since it can
    303         // potentially hold longer running operations with no guarantees about the timeliness
    304         // of operations there.
    305 
    306         // The shared foreground thread is the main checker.  It is where we
    307         // will also dispatch monitor checks and do other work.
    308         mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
    309                 "foreground thread", DEFAULT_TIMEOUT);
    310         mHandlerCheckers.add(mMonitorChecker);
    311         // Add checker for main thread.  We only do a quick check since there
    312         // can be UI running on the thread.
    313         mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
    314                 "main thread", DEFAULT_TIMEOUT));
    315         // Add checker for shared UI thread.
    316         mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
    317                 "ui thread", DEFAULT_TIMEOUT));
    318         // And also check IO thread.
    319         mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
    320                 "i/o thread", DEFAULT_TIMEOUT));
    321         // And the display thread.
    322         mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
    323                 "display thread", DEFAULT_TIMEOUT));
    324         // And the animation thread.
    325         mHandlerCheckers.add(new HandlerChecker(AnimationThread.getHandler(),
    326                 "animation thread", DEFAULT_TIMEOUT));
    327         // And the surface animation thread.
    328         mHandlerCheckers.add(new HandlerChecker(SurfaceAnimationThread.getHandler(),
    329                 "surface animation thread", DEFAULT_TIMEOUT));
    330 
    331         // Initialize monitor for Binder threads.
    332         addMonitor(new BinderThreadMonitor());
    333 
    334         mOpenFdMonitor = OpenFdMonitor.create();
    335 
    336         // See the notes on DEFAULT_TIMEOUT.
    337         assert DB ||
    338                 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS;
    339     }
    340 
    341     /**
    342      * Registers a {@link BroadcastReceiver} to listen to reboot broadcasts and trigger reboot.
    343      * Should be called during boot after the ActivityManagerService is up and registered
    344      * as a system service so it can handle registration of a {@link BroadcastReceiver}.
    345      */
    346     public void init(Context context, ActivityManagerService activity) {
    347         mActivity = activity;
    348         context.registerReceiver(new RebootRequestReceiver(),
    349                 new IntentFilter(Intent.ACTION_REBOOT),
    350                 android.Manifest.permission.REBOOT, null);
    351     }
    352 
    353     public void processStarted(String name, int pid) {
    354         synchronized (this) {
    355             if ("com.android.phone".equals(name)) {
    356                 mPhonePid = pid;
    357             }
    358         }
    359     }
    360 
    361     public void setActivityController(IActivityController controller) {
    362         synchronized (this) {
    363             mController = controller;
    364         }
    365     }
    366 
    367     public void setAllowRestart(boolean allowRestart) {
    368         synchronized (this) {
    369             mAllowRestart = allowRestart;
    370         }
    371     }
    372 
    373     public void addMonitor(Monitor monitor) {
    374         synchronized (this) {
    375             mMonitorChecker.addMonitorLocked(monitor);
    376         }
    377     }
    378 
    379     public void addThread(Handler thread) {
    380         addThread(thread, DEFAULT_TIMEOUT);
    381     }
    382 
    383     public void addThread(Handler thread, long timeoutMillis) {
    384         synchronized (this) {
    385             final String name = thread.getLooper().getThread().getName();
    386             mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
    387         }
    388     }
    389 
    390     /**
    391      * Pauses Watchdog action for the currently running thread. Useful before executing long running
    392      * operations that could falsely trigger the watchdog. Each call to this will require a matching
    393      * call to {@link #resumeWatchingCurrentThread}.
    394      *
    395      * <p>If the current thread has not been added to the Watchdog, this call is a no-op.
    396      *
    397      * <p>If the Watchdog is already paused for the current thread, this call adds
    398      * adds another pause and will require an additional {@link #resumeCurrentThread} to resume.
    399      *
    400      * <p>Note: Use with care, as any deadlocks on the current thread will be undetected until all
    401      * pauses have been resumed.
    402      */
    403     public void pauseWatchingCurrentThread(String reason) {
    404         synchronized (this) {
    405             for (HandlerChecker hc : mHandlerCheckers) {
    406                 if (Thread.currentThread().equals(hc.getThread())) {
    407                     hc.pauseLocked(reason);
    408                 }
    409             }
    410         }
    411     }
    412 
    413     /**
    414      * Resumes the last pause from {@link #pauseWatchingCurrentThread} for the currently running
    415      * thread.
    416      *
    417      * <p>If the current thread has not been added to the Watchdog, this call is a no-op.
    418      *
    419      * <p>If the Watchdog action for the current thread is already resumed, this call logs a wtf.
    420      *
    421      * <p>If all pauses have been resumed, the Watchdog action is finally resumed, otherwise,
    422      * the Watchdog action for the current thread remains paused until resume is called at least
    423      * as many times as the calls to pause.
    424      */
    425     public void resumeWatchingCurrentThread(String reason) {
    426         synchronized (this) {
    427             for (HandlerChecker hc : mHandlerCheckers) {
    428                 if (Thread.currentThread().equals(hc.getThread())) {
    429                     hc.resumeLocked(reason);
    430                 }
    431             }
    432         }
    433     }
    434 
    435     /**
    436      * Perform a full reboot of the system.
    437      */
    438     void rebootSystem(String reason) {
    439         Slog.i(TAG, "Rebooting system because: " + reason);
    440         IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
    441         try {
    442             pms.reboot(false, reason, false);
    443         } catch (RemoteException ex) {
    444         }
    445     }
    446 
    447     private int evaluateCheckerCompletionLocked() {
    448         int state = COMPLETED;
    449         for (int i=0; i<mHandlerCheckers.size(); i++) {
    450             HandlerChecker hc = mHandlerCheckers.get(i);
    451             state = Math.max(state, hc.getCompletionStateLocked());
    452         }
    453         return state;
    454     }
    455 
    456     private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
    457         ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
    458         for (int i=0; i<mHandlerCheckers.size(); i++) {
    459             HandlerChecker hc = mHandlerCheckers.get(i);
    460             if (hc.isOverdueLocked()) {
    461                 checkers.add(hc);
    462             }
    463         }
    464         return checkers;
    465     }
    466 
    467     private String describeCheckersLocked(List<HandlerChecker> checkers) {
    468         StringBuilder builder = new StringBuilder(128);
    469         for (int i=0; i<checkers.size(); i++) {
    470             if (builder.length() > 0) {
    471                 builder.append(", ");
    472             }
    473             builder.append(checkers.get(i).describeBlockedStateLocked());
    474         }
    475         return builder.toString();
    476     }
    477 
    478     private static ArrayList<Integer> getInterestingHalPids() {
    479         try {
    480             IServiceManager serviceManager = IServiceManager.getService();
    481             ArrayList<IServiceManager.InstanceDebugInfo> dump =
    482                     serviceManager.debugDump();
    483             HashSet<Integer> pids = new HashSet<>();
    484             for (IServiceManager.InstanceDebugInfo info : dump) {
    485                 if (info.pid == IServiceManager.PidConstant.NO_PID) {
    486                     continue;
    487                 }
    488 
    489                 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
    490                     continue;
    491                 }
    492 
    493                 pids.add(info.pid);
    494             }
    495             return new ArrayList<Integer>(pids);
    496         } catch (RemoteException e) {
    497             return new ArrayList<Integer>();
    498         }
    499     }
    500 
    501     static ArrayList<Integer> getInterestingNativePids() {
    502         ArrayList<Integer> pids = getInterestingHalPids();
    503 
    504         int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
    505         if (nativePids != null) {
    506             pids.ensureCapacity(pids.size() + nativePids.length);
    507             for (int i : nativePids) {
    508                 pids.add(i);
    509             }
    510         }
    511 
    512         return pids;
    513     }
    514 
    515     @Override
    516     public void run() {
    517         boolean waitedHalf = false;
    518         while (true) {
    519             final List<HandlerChecker> blockedCheckers;
    520             final String subject;
    521             final boolean allowRestart;
    522             int debuggerWasConnected = 0;
    523             synchronized (this) {
    524                 long timeout = CHECK_INTERVAL;
    525                 // Make sure we (re)spin the checkers that have become idle within
    526                 // this wait-and-check interval
    527                 for (int i=0; i<mHandlerCheckers.size(); i++) {
    528                     HandlerChecker hc = mHandlerCheckers.get(i);
    529                     hc.scheduleCheckLocked();
    530                 }
    531 
    532                 if (debuggerWasConnected > 0) {
    533                     debuggerWasConnected--;
    534                 }
    535 
    536                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
    537                 // wait while asleep. If the device is asleep then the thing that we are waiting
    538                 // to timeout on is asleep as well and won't have a chance to run, causing a false
    539                 // positive on when to kill things.
    540                 long start = SystemClock.uptimeMillis();
    541                 while (timeout > 0) {
    542                     if (Debug.isDebuggerConnected()) {
    543                         debuggerWasConnected = 2;
    544                     }
    545                     try {
    546                         wait(timeout);
    547                         // Note: mHandlerCheckers and mMonitorChecker may have changed after waiting
    548                     } catch (InterruptedException e) {
    549                         Log.wtf(TAG, e);
    550                     }
    551                     if (Debug.isDebuggerConnected()) {
    552                         debuggerWasConnected = 2;
    553                     }
    554                     timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
    555                 }
    556 
    557                 boolean fdLimitTriggered = false;
    558                 if (mOpenFdMonitor != null) {
    559                     fdLimitTriggered = mOpenFdMonitor.monitor();
    560                 }
    561 
    562                 if (!fdLimitTriggered) {
    563                     final int waitState = evaluateCheckerCompletionLocked();
    564                     if (waitState == COMPLETED) {
    565                         // The monitors have returned; reset
    566                         waitedHalf = false;
    567                         continue;
    568                     } else if (waitState == WAITING) {
    569                         // still waiting but within their configured intervals; back off and recheck
    570                         continue;
    571                     } else if (waitState == WAITED_HALF) {
    572                         if (!waitedHalf) {
    573                             Slog.i(TAG, "WAITED_HALF");
    574                             // We've waited half the deadlock-detection interval.  Pull a stack
    575                             // trace and wait another half.
    576                             ArrayList<Integer> pids = new ArrayList<Integer>();
    577                             pids.add(Process.myPid());
    578                             ActivityManagerService.dumpStackTraces(pids, null, null,
    579                                 getInterestingNativePids());
    580                             waitedHalf = true;
    581                         }
    582                         continue;
    583                     }
    584 
    585                     // something is overdue!
    586                     blockedCheckers = getBlockedCheckersLocked();
    587                     subject = describeCheckersLocked(blockedCheckers);
    588                 } else {
    589                     blockedCheckers = Collections.emptyList();
    590                     subject = "Open FD high water mark reached";
    591                 }
    592                 allowRestart = mAllowRestart;
    593             }
    594 
    595             // If we got here, that means that the system is most likely hung.
    596             // First collect stack traces from all threads of the system process.
    597             // Then kill this process so that the system will restart.
    598             EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
    599 
    600             ArrayList<Integer> pids = new ArrayList<>();
    601             pids.add(Process.myPid());
    602             if (mPhonePid > 0) pids.add(mPhonePid);
    603 
    604             final File stack = ActivityManagerService.dumpStackTraces(
    605                     pids, null, null, getInterestingNativePids());
    606 
    607             // Give some extra time to make sure the stack traces get written.
    608             // The system's been hanging for a minute, another second or two won't hurt much.
    609             SystemClock.sleep(5000);
    610 
    611             // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
    612             doSysRq('w');
    613             doSysRq('l');
    614 
    615             // Try to add the error to the dropbox, but assuming that the ActivityManager
    616             // itself may be deadlocked.  (which has happened, causing this statement to
    617             // deadlock and the watchdog as a whole to be ineffective)
    618             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
    619                     public void run() {
    620                         // If a watched thread hangs before init() is called, we don't have a
    621                         // valid mActivity. So we can't log the error to dropbox.
    622                         if (mActivity != null) {
    623                             mActivity.addErrorToDropBox(
    624                                     "watchdog", null, "system_server", null, null, null,
    625                                     subject, null, stack, null);
    626                         }
    627                         StatsLog.write(StatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject);
    628                     }
    629                 };
    630             dropboxThread.start();
    631             try {
    632                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
    633             } catch (InterruptedException ignored) {}
    634 
    635             IActivityController controller;
    636             synchronized (this) {
    637                 controller = mController;
    638             }
    639             if (controller != null) {
    640                 Slog.i(TAG, "Reporting stuck state to activity controller");
    641                 try {
    642                     Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
    643                     // 1 = keep waiting, -1 = kill system
    644                     int res = controller.systemNotResponding(subject);
    645                     if (res >= 0) {
    646                         Slog.i(TAG, "Activity controller requested to coninue to wait");
    647                         waitedHalf = false;
    648                         continue;
    649                     }
    650                 } catch (RemoteException e) {
    651                 }
    652             }
    653 
    654             // Only kill the process if the debugger is not attached.
    655             if (Debug.isDebuggerConnected()) {
    656                 debuggerWasConnected = 2;
    657             }
    658             if (debuggerWasConnected >= 2) {
    659                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
    660             } else if (debuggerWasConnected > 0) {
    661                 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
    662             } else if (!allowRestart) {
    663                 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
    664             } else {
    665                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
    666                 WatchdogDiagnostics.diagnoseCheckers(blockedCheckers);
    667                 Slog.w(TAG, "*** GOODBYE!");
    668                 Process.killProcess(Process.myPid());
    669                 System.exit(10);
    670             }
    671 
    672             waitedHalf = false;
    673         }
    674     }
    675 
    676     private void doSysRq(char c) {
    677         try {
    678             FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
    679             sysrq_trigger.write(c);
    680             sysrq_trigger.close();
    681         } catch (IOException e) {
    682             Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
    683         }
    684     }
    685 
    686     public static final class OpenFdMonitor {
    687         /**
    688          * Number of FDs below the soft limit that we trigger a runtime restart at. This was
    689          * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
    690          * of FDs in reserve to complete a dump.
    691          */
    692         private static final int FD_HIGH_WATER_MARK = 12;
    693 
    694         private final File mDumpDir;
    695         private final File mFdHighWaterMark;
    696 
    697         public static OpenFdMonitor create() {
    698             // Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
    699             if (!Build.IS_DEBUGGABLE) {
    700                 return null;
    701             }
    702 
    703             final StructRlimit rlimit;
    704             try {
    705                 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
    706             } catch (ErrnoException errno) {
    707                 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
    708                 return null;
    709             }
    710 
    711             // The assumption we're making here is that FD numbers are allocated (more or less)
    712             // sequentially, which is currently (and historically) true since open is currently
    713             // specified to always return the lowest-numbered non-open file descriptor for the
    714             // current process.
    715             //
    716             // We do this to avoid having to enumerate the contents of /proc/self/fd in order to
    717             // count the number of descriptors open in the process.
    718             final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
    719             return new OpenFdMonitor(new File("/data/anr"), fdThreshold);
    720         }
    721 
    722         OpenFdMonitor(File dumpDir, File fdThreshold) {
    723             mDumpDir = dumpDir;
    724             mFdHighWaterMark = fdThreshold;
    725         }
    726 
    727         /**
    728          * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}.
    729          */
    730         private void dumpOpenDescriptors() {
    731             // We cannot exec lsof to get more info about open file descriptors because a newly
    732             // forked process will not have the permissions to readlink. Instead list all open
    733             // descriptors from /proc/pid/fd and resolve them.
    734             List<String> dumpInfo = new ArrayList<>();
    735             String fdDirPath = String.format("/proc/%d/fd/", Process.myPid());
    736             File[] fds = new File(fdDirPath).listFiles();
    737             if (fds == null) {
    738                 dumpInfo.add("Unable to list " + fdDirPath);
    739             } else {
    740                 for (File f : fds) {
    741                     String fdSymLink = f.getAbsolutePath();
    742                     String resolvedPath = "";
    743                     try {
    744                         resolvedPath = Os.readlink(fdSymLink);
    745                     } catch (ErrnoException ex) {
    746                         resolvedPath = ex.getMessage();
    747                     }
    748                     dumpInfo.add(fdSymLink + "\t" + resolvedPath);
    749                 }
    750             }
    751 
    752             // Dump the fds & paths to a temp file.
    753             try {
    754                 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir);
    755                 Path out = Paths.get(dumpFile.getAbsolutePath());
    756                 Files.write(out, dumpInfo, StandardCharsets.UTF_8);
    757             } catch (IOException ex) {
    758                 Slog.w(TAG, "Unable to write open descriptors to file: " + ex);
    759             }
    760         }
    761 
    762         /**
    763          * @return {@code true} if the high water mark was breached and a dump was written,
    764          *     {@code false} otherwise.
    765          */
    766         public boolean monitor() {
    767             if (mFdHighWaterMark.exists()) {
    768                 dumpOpenDescriptors();
    769                 return true;
    770             }
    771 
    772             return false;
    773         }
    774     }
    775 }
    776