Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import android.content.ContentResolver;
     20 import android.content.Context;
     21 import android.os.Build;
     22 import android.os.Environment;
     23 import android.os.FileUtils;
     24 import android.os.RecoverySystem;
     25 import android.os.SystemClock;
     26 import android.os.SystemProperties;
     27 import android.os.UserHandle;
     28 import android.provider.Settings;
     29 import android.text.format.DateUtils;
     30 import android.util.ExceptionUtils;
     31 import android.util.Log;
     32 import android.util.MathUtils;
     33 import android.util.Slog;
     34 import android.util.SparseArray;
     35 
     36 import com.android.internal.util.ArrayUtils;
     37 import com.android.server.pm.PackageManagerService;
     38 
     39 import java.io.File;
     40 
     41 /**
     42  * Utilities to help rescue the system from crash loops. Callers are expected to
     43  * report boot events and persistent app crashes, and if they happen frequently
     44  * enough this class will slowly escalate through several rescue operations
     45  * before finally rebooting and prompting the user if they want to wipe data as
     46  * a last resort.
     47  *
     48  * @hide
     49  */
     50 public class RescueParty {
     51     private static final String TAG = "RescueParty";
     52 
     53     private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
     54     private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
     55     private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
     56     private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
     57     private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
     58     private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
     59 
     60     private static final int LEVEL_NONE = 0;
     61     private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
     62     private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
     63     private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
     64     private static final int LEVEL_FACTORY_RESET = 4;
     65 
     66     /** Threshold for boot loops */
     67     private static final Threshold sBoot = new BootThreshold();
     68     /** Threshold for app crash loops */
     69     private static SparseArray<Threshold> sApps = new SparseArray<>();
     70 
     71     private static boolean isDisabled() {
     72         // Check if we're explicitly enabled for testing
     73         if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
     74             return false;
     75         }
     76 
     77         // We're disabled on all engineering devices
     78         if (Build.IS_ENG) {
     79             Slog.v(TAG, "Disabled because of eng build");
     80             return true;
     81         }
     82 
     83         // We're disabled on userdebug devices connected over USB, since that's
     84         // a decent signal that someone is actively trying to debug the device,
     85         // or that it's in a lab environment.
     86         if (Build.IS_USERDEBUG && isUsbActive()) {
     87             Slog.v(TAG, "Disabled because of active USB connection");
     88             return true;
     89         }
     90 
     91         // One last-ditch check
     92         if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
     93             Slog.v(TAG, "Disabled because of manual property");
     94             return true;
     95         }
     96 
     97         return false;
     98     }
     99 
    100     /**
    101      * Take note of a boot event. If we notice too many of these events
    102      * happening in rapid succession, we'll send out a rescue party.
    103      */
    104     public static void noteBoot(Context context) {
    105         if (isDisabled()) return;
    106         if (sBoot.incrementAndTest()) {
    107             sBoot.reset();
    108             incrementRescueLevel(sBoot.uid);
    109             executeRescueLevel(context);
    110         }
    111     }
    112 
    113     /**
    114      * Take note of a persistent app crash. If we notice too many of these
    115      * events happening in rapid succession, we'll send out a rescue party.
    116      */
    117     public static void notePersistentAppCrash(Context context, int uid) {
    118         if (isDisabled()) return;
    119         Threshold t = sApps.get(uid);
    120         if (t == null) {
    121             t = new AppThreshold(uid);
    122             sApps.put(uid, t);
    123         }
    124         if (t.incrementAndTest()) {
    125             t.reset();
    126             incrementRescueLevel(t.uid);
    127             executeRescueLevel(context);
    128         }
    129     }
    130 
    131     /**
    132      * Check if we're currently attempting to reboot for a factory reset.
    133      */
    134     public static boolean isAttemptingFactoryReset() {
    135         return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
    136     }
    137 
    138     /**
    139      * Escalate to the next rescue level. After incrementing the level you'll
    140      * probably want to call {@link #executeRescueLevel(Context)}.
    141      */
    142     private static void incrementRescueLevel(int triggerUid) {
    143         final int level = MathUtils.constrain(
    144                 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
    145                 LEVEL_NONE, LEVEL_FACTORY_RESET);
    146         SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
    147 
    148         EventLogTags.writeRescueLevel(level, triggerUid);
    149         PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to "
    150                 + levelToString(level) + " triggered by UID " + triggerUid);
    151     }
    152 
    153     /**
    154      * Called when {@code SettingsProvider} has been published, which is a good
    155      * opportunity to reset any settings depending on our rescue level.
    156      */
    157     public static void onSettingsProviderPublished(Context context) {
    158         executeRescueLevel(context);
    159     }
    160 
    161     private static void executeRescueLevel(Context context) {
    162         final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
    163         if (level == LEVEL_NONE) return;
    164 
    165         Slog.w(TAG, "Attempting rescue level " + levelToString(level));
    166         try {
    167             executeRescueLevelInternal(context, level);
    168             EventLogTags.writeRescueSuccess(level);
    169             PackageManagerService.logCriticalInfo(Log.DEBUG,
    170                     "Finished rescue level " + levelToString(level));
    171         } catch (Throwable t) {
    172             final String msg = ExceptionUtils.getCompleteMessage(t);
    173             EventLogTags.writeRescueFailure(level, msg);
    174             PackageManagerService.logCriticalInfo(Log.ERROR,
    175                     "Failed rescue level " + levelToString(level) + ": " + msg);
    176         }
    177     }
    178 
    179     private static void executeRescueLevelInternal(Context context, int level) throws Exception {
    180         switch (level) {
    181             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
    182                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
    183                 break;
    184             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
    185                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
    186                 break;
    187             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
    188                 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
    189                 break;
    190             case LEVEL_FACTORY_RESET:
    191                 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
    192                 break;
    193         }
    194     }
    195 
    196     private static void resetAllSettings(Context context, int mode) throws Exception {
    197         // Try our best to reset all settings possible, and once finished
    198         // rethrow any exception that we encountered
    199         Exception res = null;
    200         final ContentResolver resolver = context.getContentResolver();
    201         try {
    202             Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
    203         } catch (Throwable t) {
    204             res = new RuntimeException("Failed to reset global settings", t);
    205         }
    206         for (int userId : getAllUserIds()) {
    207             try {
    208                 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
    209             } catch (Throwable t) {
    210                 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
    211             }
    212         }
    213         if (res != null) {
    214             throw res;
    215         }
    216     }
    217 
    218     /**
    219      * Threshold that can be triggered if a number of events occur within a
    220      * window of time.
    221      */
    222     private abstract static class Threshold {
    223         public abstract int getCount();
    224         public abstract void setCount(int count);
    225         public abstract long getStart();
    226         public abstract void setStart(long start);
    227 
    228         private final int uid;
    229         private final int triggerCount;
    230         private final long triggerWindow;
    231 
    232         public Threshold(int uid, int triggerCount, long triggerWindow) {
    233             this.uid = uid;
    234             this.triggerCount = triggerCount;
    235             this.triggerWindow = triggerWindow;
    236         }
    237 
    238         public void reset() {
    239             setCount(0);
    240             setStart(0);
    241         }
    242 
    243         /**
    244          * @return if this threshold has been triggered
    245          */
    246         public boolean incrementAndTest() {
    247             final long now = SystemClock.elapsedRealtime();
    248             final long window = now - getStart();
    249             if (window > triggerWindow) {
    250                 setCount(1);
    251                 setStart(now);
    252                 return false;
    253             } else {
    254                 int count = getCount() + 1;
    255                 setCount(count);
    256                 EventLogTags.writeRescueNote(uid, count, window);
    257                 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
    258                         + (window / 1000) + " sec");
    259                 return (count >= triggerCount);
    260             }
    261         }
    262     }
    263 
    264     /**
    265      * Specialization of {@link Threshold} for monitoring boot events. It stores
    266      * counters in system properties for robustness.
    267      */
    268     private static class BootThreshold extends Threshold {
    269         public BootThreshold() {
    270             // We're interested in 5 events in any 300 second period; this
    271             // window is super relaxed because booting can take a long time if
    272             // forced to dexopt things.
    273             super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
    274         }
    275 
    276         @Override
    277         public int getCount() {
    278             return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
    279         }
    280 
    281         @Override
    282         public void setCount(int count) {
    283             SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
    284         }
    285 
    286         @Override
    287         public long getStart() {
    288             return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
    289         }
    290 
    291         @Override
    292         public void setStart(long start) {
    293             SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
    294         }
    295     }
    296 
    297     /**
    298      * Specialization of {@link Threshold} for monitoring app crashes. It stores
    299      * counters in memory.
    300      */
    301     private static class AppThreshold extends Threshold {
    302         private int count;
    303         private long start;
    304 
    305         public AppThreshold(int uid) {
    306             // We're interested in 5 events in any 30 second period; apps crash
    307             // pretty quickly so we can keep a tight leash on them.
    308             super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
    309         }
    310 
    311         @Override public int getCount() { return count; }
    312         @Override public void setCount(int count) { this.count = count; }
    313         @Override public long getStart() { return start; }
    314         @Override public void setStart(long start) { this.start = start; }
    315     }
    316 
    317     private static int[] getAllUserIds() {
    318         int[] userIds = { UserHandle.USER_SYSTEM };
    319         try {
    320             for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
    321                 try {
    322                     final int userId = Integer.parseInt(file.getName());
    323                     if (userId != UserHandle.USER_SYSTEM) {
    324                         userIds = ArrayUtils.appendInt(userIds, userId);
    325                     }
    326                 } catch (NumberFormatException ignored) {
    327                 }
    328             }
    329         } catch (Throwable t) {
    330             Slog.w(TAG, "Trouble discovering users", t);
    331         }
    332         return userIds;
    333     }
    334 
    335     /**
    336      * Hacky test to check if the device has an active USB connection, which is
    337      * a good proxy for someone doing local development work.
    338      */
    339     private static boolean isUsbActive() {
    340         if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
    341             Slog.v(TAG, "Assuming virtual device is connected over USB");
    342             return true;
    343         }
    344         try {
    345             final String state = FileUtils
    346                     .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
    347             return "CONFIGURED".equals(state.trim());
    348         } catch (Throwable t) {
    349             Slog.w(TAG, "Failed to determine if device was on USB", t);
    350             return false;
    351         }
    352     }
    353 
    354     private static String levelToString(int level) {
    355         switch (level) {
    356             case LEVEL_NONE: return "NONE";
    357             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
    358             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
    359             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
    360             case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
    361             default: return Integer.toString(level);
    362         }
    363     }
    364 }
    365