Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo;
     20 
     21 import android.content.ContentResolver;
     22 import android.content.Context;
     23 import android.os.Build;
     24 import android.os.Environment;
     25 import android.os.FileUtils;
     26 import android.os.RecoverySystem;
     27 import android.os.SystemClock;
     28 import android.os.SystemProperties;
     29 import android.os.UserHandle;
     30 import android.provider.Settings;
     31 import android.text.format.DateUtils;
     32 import android.util.ExceptionUtils;
     33 import android.util.Log;
     34 import android.util.MathUtils;
     35 import android.util.Slog;
     36 import android.util.SparseArray;
     37 
     38 import com.android.internal.util.ArrayUtils;
     39 import com.android.server.pm.PackageManagerService;
     40 
     41 import java.io.File;
     42 
     43 /**
     44  * Utilities to help rescue the system from crash loops. Callers are expected to
     45  * report boot events and persistent app crashes, and if they happen frequently
     46  * enough this class will slowly escalate through several rescue operations
     47  * before finally rebooting and prompting the user if they want to wipe data as
     48  * a last resort.
     49  *
     50  * @hide
     51  */
     52 public class RescueParty {
     53     private static final String TAG = "RescueParty";
     54 
     55     private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
     56     private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
     57     private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
     58     private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
     59     private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
     60     private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
     61 
     62     private static final int LEVEL_NONE = 0;
     63     private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
     64     private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
     65     private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
     66     private static final int LEVEL_FACTORY_RESET = 4;
     67 
     68     /** Threshold for boot loops */
     69     private static final Threshold sBoot = new BootThreshold();
     70     /** Threshold for app crash loops */
     71     private static SparseArray<Threshold> sApps = new SparseArray<>();
     72 
     73     private static boolean isDisabled() {
     74         // Check if we're explicitly enabled for testing
     75         if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
     76             return false;
     77         }
     78 
     79         // We're disabled on all engineering devices
     80         if (Build.IS_ENG) {
     81             Slog.v(TAG, "Disabled because of eng build");
     82             return true;
     83         }
     84 
     85         // We're disabled on userdebug devices connected over USB, since that's
     86         // a decent signal that someone is actively trying to debug the device,
     87         // or that it's in a lab environment.
     88         if (Build.IS_USERDEBUG && isUsbActive()) {
     89             Slog.v(TAG, "Disabled because of active USB connection");
     90             return true;
     91         }
     92 
     93         // One last-ditch check
     94         if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
     95             Slog.v(TAG, "Disabled because of manual property");
     96             return true;
     97         }
     98 
     99         return false;
    100     }
    101 
    102     /**
    103      * Take note of a boot event. If we notice too many of these events
    104      * happening in rapid succession, we'll send out a rescue party.
    105      */
    106     public static void noteBoot(Context context) {
    107         if (isDisabled()) return;
    108         if (sBoot.incrementAndTest()) {
    109             sBoot.reset();
    110             incrementRescueLevel(sBoot.uid);
    111             executeRescueLevel(context);
    112         }
    113     }
    114 
    115     /**
    116      * Take note of a persistent app crash. If we notice too many of these
    117      * events happening in rapid succession, we'll send out a rescue party.
    118      */
    119     public static void notePersistentAppCrash(Context context, int uid) {
    120         if (isDisabled()) return;
    121         Threshold t = sApps.get(uid);
    122         if (t == null) {
    123             t = new AppThreshold(uid);
    124             sApps.put(uid, t);
    125         }
    126         if (t.incrementAndTest()) {
    127             t.reset();
    128             incrementRescueLevel(t.uid);
    129             executeRescueLevel(context);
    130         }
    131     }
    132 
    133     /**
    134      * Check if we're currently attempting to reboot for a factory reset.
    135      */
    136     public static boolean isAttemptingFactoryReset() {
    137         return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
    138     }
    139 
    140     /**
    141      * Escalate to the next rescue level. After incrementing the level you'll
    142      * probably want to call {@link #executeRescueLevel(Context)}.
    143      */
    144     private static void incrementRescueLevel(int triggerUid) {
    145         final int level = MathUtils.constrain(
    146                 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
    147                 LEVEL_NONE, LEVEL_FACTORY_RESET);
    148         SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
    149 
    150         EventLogTags.writeRescueLevel(level, triggerUid);
    151         logCriticalInfo(Log.WARN, "Incremented rescue level to "
    152                 + levelToString(level) + " triggered by UID " + triggerUid);
    153     }
    154 
    155     /**
    156      * Called when {@code SettingsProvider} has been published, which is a good
    157      * opportunity to reset any settings depending on our rescue level.
    158      */
    159     public static void onSettingsProviderPublished(Context context) {
    160         executeRescueLevel(context);
    161     }
    162 
    163     private static void executeRescueLevel(Context context) {
    164         final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
    165         if (level == LEVEL_NONE) return;
    166 
    167         Slog.w(TAG, "Attempting rescue level " + levelToString(level));
    168         try {
    169             executeRescueLevelInternal(context, level);
    170             EventLogTags.writeRescueSuccess(level);
    171             logCriticalInfo(Log.DEBUG,
    172                     "Finished rescue level " + levelToString(level));
    173         } catch (Throwable t) {
    174             final String msg = ExceptionUtils.getCompleteMessage(t);
    175             EventLogTags.writeRescueFailure(level, msg);
    176             logCriticalInfo(Log.ERROR,
    177                     "Failed rescue level " + levelToString(level) + ": " + msg);
    178         }
    179     }
    180 
    181     private static void executeRescueLevelInternal(Context context, int level) throws Exception {
    182         switch (level) {
    183             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
    184                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
    185                 break;
    186             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
    187                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
    188                 break;
    189             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
    190                 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
    191                 break;
    192             case LEVEL_FACTORY_RESET:
    193                 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
    194                 break;
    195         }
    196     }
    197 
    198     private static void resetAllSettings(Context context, int mode) throws Exception {
    199         // Try our best to reset all settings possible, and once finished
    200         // rethrow any exception that we encountered
    201         Exception res = null;
    202         final ContentResolver resolver = context.getContentResolver();
    203         try {
    204             Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
    205         } catch (Throwable t) {
    206             res = new RuntimeException("Failed to reset global settings", t);
    207         }
    208         for (int userId : getAllUserIds()) {
    209             try {
    210                 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
    211             } catch (Throwable t) {
    212                 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
    213             }
    214         }
    215         if (res != null) {
    216             throw res;
    217         }
    218     }
    219 
    220     /**
    221      * Threshold that can be triggered if a number of events occur within a
    222      * window of time.
    223      */
    224     private abstract static class Threshold {
    225         public abstract int getCount();
    226         public abstract void setCount(int count);
    227         public abstract long getStart();
    228         public abstract void setStart(long start);
    229 
    230         private final int uid;
    231         private final int triggerCount;
    232         private final long triggerWindow;
    233 
    234         public Threshold(int uid, int triggerCount, long triggerWindow) {
    235             this.uid = uid;
    236             this.triggerCount = triggerCount;
    237             this.triggerWindow = triggerWindow;
    238         }
    239 
    240         public void reset() {
    241             setCount(0);
    242             setStart(0);
    243         }
    244 
    245         /**
    246          * @return if this threshold has been triggered
    247          */
    248         public boolean incrementAndTest() {
    249             final long now = SystemClock.elapsedRealtime();
    250             final long window = now - getStart();
    251             if (window > triggerWindow) {
    252                 setCount(1);
    253                 setStart(now);
    254                 return false;
    255             } else {
    256                 int count = getCount() + 1;
    257                 setCount(count);
    258                 EventLogTags.writeRescueNote(uid, count, window);
    259                 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
    260                         + (window / 1000) + " sec");
    261                 return (count >= triggerCount);
    262             }
    263         }
    264     }
    265 
    266     /**
    267      * Specialization of {@link Threshold} for monitoring boot events. It stores
    268      * counters in system properties for robustness.
    269      */
    270     private static class BootThreshold extends Threshold {
    271         public BootThreshold() {
    272             // We're interested in 5 events in any 300 second period; this
    273             // window is super relaxed because booting can take a long time if
    274             // forced to dexopt things.
    275             super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
    276         }
    277 
    278         @Override
    279         public int getCount() {
    280             return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
    281         }
    282 
    283         @Override
    284         public void setCount(int count) {
    285             SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
    286         }
    287 
    288         @Override
    289         public long getStart() {
    290             return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
    291         }
    292 
    293         @Override
    294         public void setStart(long start) {
    295             SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
    296         }
    297     }
    298 
    299     /**
    300      * Specialization of {@link Threshold} for monitoring app crashes. It stores
    301      * counters in memory.
    302      */
    303     private static class AppThreshold extends Threshold {
    304         private int count;
    305         private long start;
    306 
    307         public AppThreshold(int uid) {
    308             // We're interested in 5 events in any 30 second period; apps crash
    309             // pretty quickly so we can keep a tight leash on them.
    310             super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
    311         }
    312 
    313         @Override public int getCount() { return count; }
    314         @Override public void setCount(int count) { this.count = count; }
    315         @Override public long getStart() { return start; }
    316         @Override public void setStart(long start) { this.start = start; }
    317     }
    318 
    319     private static int[] getAllUserIds() {
    320         int[] userIds = { UserHandle.USER_SYSTEM };
    321         try {
    322             for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
    323                 try {
    324                     final int userId = Integer.parseInt(file.getName());
    325                     if (userId != UserHandle.USER_SYSTEM) {
    326                         userIds = ArrayUtils.appendInt(userIds, userId);
    327                     }
    328                 } catch (NumberFormatException ignored) {
    329                 }
    330             }
    331         } catch (Throwable t) {
    332             Slog.w(TAG, "Trouble discovering users", t);
    333         }
    334         return userIds;
    335     }
    336 
    337     /**
    338      * Hacky test to check if the device has an active USB connection, which is
    339      * a good proxy for someone doing local development work.
    340      */
    341     private static boolean isUsbActive() {
    342         if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
    343             Slog.v(TAG, "Assuming virtual device is connected over USB");
    344             return true;
    345         }
    346         try {
    347             final String state = FileUtils
    348                     .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
    349             return "CONFIGURED".equals(state.trim());
    350         } catch (Throwable t) {
    351             Slog.w(TAG, "Failed to determine if device was on USB", t);
    352             return false;
    353         }
    354     }
    355 
    356     private static String levelToString(int level) {
    357         switch (level) {
    358             case LEVEL_NONE: return "NONE";
    359             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
    360             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
    361             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
    362             case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
    363             default: return Integer.toString(level);
    364         }
    365     }
    366 }
    367