Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.server;
     18 
     19 import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo;
     20 
     21 import android.content.ContentResolver;
     22 import android.content.Context;
     23 import android.os.Build;
     24 import android.os.Environment;
     25 import android.os.FileUtils;
     26 import android.os.RecoverySystem;
     27 import android.os.SystemClock;
     28 import android.os.SystemProperties;
     29 import android.os.UserHandle;
     30 import android.provider.Settings;
     31 import android.text.format.DateUtils;
     32 import android.util.ExceptionUtils;
     33 import android.util.Log;
     34 import android.util.MathUtils;
     35 import android.util.Slog;
     36 import android.util.SparseArray;
     37 import android.util.StatsLog;
     38 
     39 import com.android.internal.annotations.VisibleForTesting;
     40 import com.android.internal.util.ArrayUtils;
     41 import com.android.server.am.SettingsToPropertiesMapper;
     42 import com.android.server.utils.FlagNamespaceUtils;
     43 
     44 import java.io.File;
     45 import java.util.Arrays;
     46 
     47 /**
     48  * Utilities to help rescue the system from crash loops. Callers are expected to
     49  * report boot events and persistent app crashes, and if they happen frequently
     50  * enough this class will slowly escalate through several rescue operations
     51  * before finally rebooting and prompting the user if they want to wipe data as
     52  * a last resort.
     53  *
     54  * @hide
     55  */
     56 public class RescueParty {
     57     @VisibleForTesting
     58     static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
     59     @VisibleForTesting
     60     static final int TRIGGER_COUNT = 5;
     61     @VisibleForTesting
     62     static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
     63     @VisibleForTesting
     64     static final int LEVEL_NONE = 0;
     65     @VisibleForTesting
     66     static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
     67     @VisibleForTesting
     68     static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
     69     @VisibleForTesting
     70     static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
     71     @VisibleForTesting
     72     static final int LEVEL_FACTORY_RESET = 4;
     73     @VisibleForTesting
     74     static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
     75     /**
     76      * The boot trigger window size must always be greater than Watchdog's deadlock timeout
     77      * {@link Watchdog#DEFAULT_TIMEOUT}.
     78      */
     79     @VisibleForTesting
     80     static final long BOOT_TRIGGER_WINDOW_MILLIS = 600 * DateUtils.SECOND_IN_MILLIS;
     81     @VisibleForTesting
     82     static final long PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS = 30 * DateUtils.SECOND_IN_MILLIS;
     83     @VisibleForTesting
     84     static final String TAG = "RescueParty";
     85 
     86     private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
     87     private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
     88     private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
     89 
     90     /** Threshold for boot loops */
     91     private static final Threshold sBoot = new BootThreshold();
     92     /** Threshold for app crash loops */
     93     private static SparseArray<Threshold> sApps = new SparseArray<>();
     94 
     95     private static boolean isDisabled() {
     96         // Check if we're explicitly enabled for testing
     97         if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
     98             return false;
     99         }
    100 
    101         // We're disabled on all engineering devices
    102         if (Build.IS_ENG) {
    103             Slog.v(TAG, "Disabled because of eng build");
    104             return true;
    105         }
    106 
    107         // We're disabled on userdebug devices connected over USB, since that's
    108         // a decent signal that someone is actively trying to debug the device,
    109         // or that it's in a lab environment.
    110         if (Build.IS_USERDEBUG && isUsbActive()) {
    111             Slog.v(TAG, "Disabled because of active USB connection");
    112             return true;
    113         }
    114 
    115         // One last-ditch check
    116         if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
    117             Slog.v(TAG, "Disabled because of manual property");
    118             return true;
    119         }
    120 
    121         return false;
    122     }
    123 
    124     /**
    125      * Take note of a boot event. If we notice too many of these events
    126      * happening in rapid succession, we'll send out a rescue party.
    127      */
    128     public static void noteBoot(Context context) {
    129         if (isDisabled()) return;
    130         if (sBoot.incrementAndTest()) {
    131             sBoot.reset();
    132             incrementRescueLevel(sBoot.uid);
    133             executeRescueLevel(context);
    134         }
    135     }
    136 
    137     /**
    138      * Take note of a persistent app or apex module crash. If we notice too many of these
    139      * events happening in rapid succession, we'll send out a rescue party.
    140      */
    141     public static void noteAppCrash(Context context, int uid) {
    142         if (isDisabled()) return;
    143         Threshold t = sApps.get(uid);
    144         if (t == null) {
    145             t = new AppThreshold(uid);
    146             sApps.put(uid, t);
    147         }
    148         if (t.incrementAndTest()) {
    149             t.reset();
    150             incrementRescueLevel(t.uid);
    151             executeRescueLevel(context);
    152         }
    153     }
    154 
    155     /**
    156      * Check if we're currently attempting to reboot for a factory reset.
    157      */
    158     public static boolean isAttemptingFactoryReset() {
    159         return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
    160     }
    161 
    162     /**
    163      * Called when {@code SettingsProvider} has been published, which is a good
    164      * opportunity to reset any settings depending on our rescue level.
    165      */
    166     public static void onSettingsProviderPublished(Context context) {
    167         handleNativeRescuePartyResets();
    168         executeRescueLevel(context);
    169     }
    170 
    171     @VisibleForTesting
    172     static void resetAllThresholds() {
    173         sBoot.reset();
    174 
    175         for (int i = 0; i < sApps.size(); i++) {
    176             Threshold appThreshold = sApps.get(sApps.keyAt(i));
    177             appThreshold.reset();
    178         }
    179     }
    180 
    181     @VisibleForTesting
    182     static long getElapsedRealtime() {
    183         return SystemClock.elapsedRealtime();
    184     }
    185 
    186     private static void handleNativeRescuePartyResets() {
    187         if (SettingsToPropertiesMapper.isNativeFlagsResetPerformed()) {
    188             FlagNamespaceUtils.resetDeviceConfig(Settings.RESET_MODE_TRUSTED_DEFAULTS,
    189                     Arrays.asList(SettingsToPropertiesMapper.getResetNativeCategories()));
    190         }
    191     }
    192 
    193     /**
    194      * Escalate to the next rescue level. After incrementing the level you'll
    195      * probably want to call {@link #executeRescueLevel(Context)}.
    196      */
    197     private static void incrementRescueLevel(int triggerUid) {
    198         final int level = MathUtils.constrain(
    199                 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
    200                 LEVEL_NONE, LEVEL_FACTORY_RESET);
    201         SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
    202 
    203         EventLogTags.writeRescueLevel(level, triggerUid);
    204         logCriticalInfo(Log.WARN, "Incremented rescue level to "
    205                 + levelToString(level) + " triggered by UID " + triggerUid);
    206     }
    207 
    208     private static void executeRescueLevel(Context context) {
    209         final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
    210         if (level == LEVEL_NONE) return;
    211 
    212         Slog.w(TAG, "Attempting rescue level " + levelToString(level));
    213         try {
    214             executeRescueLevelInternal(context, level);
    215             EventLogTags.writeRescueSuccess(level);
    216             logCriticalInfo(Log.DEBUG,
    217                     "Finished rescue level " + levelToString(level));
    218         } catch (Throwable t) {
    219             final String msg = ExceptionUtils.getCompleteMessage(t);
    220             EventLogTags.writeRescueFailure(level, msg);
    221             logCriticalInfo(Log.ERROR,
    222                     "Failed rescue level " + levelToString(level) + ": " + msg);
    223         }
    224     }
    225 
    226     private static void executeRescueLevelInternal(Context context, int level) throws Exception {
    227         StatsLog.write(StatsLog.RESCUE_PARTY_RESET_REPORTED, level);
    228         switch (level) {
    229             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
    230                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
    231                 break;
    232             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
    233                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
    234                 break;
    235             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
    236                 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
    237                 break;
    238             case LEVEL_FACTORY_RESET:
    239                 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
    240                 break;
    241         }
    242         FlagNamespaceUtils.addToKnownResetNamespaces(
    243                 FlagNamespaceUtils.NAMESPACE_NO_PACKAGE);
    244     }
    245 
    246     private static void resetAllSettings(Context context, int mode) throws Exception {
    247         // Try our best to reset all settings possible, and once finished
    248         // rethrow any exception that we encountered
    249         Exception res = null;
    250         final ContentResolver resolver = context.getContentResolver();
    251         try {
    252             FlagNamespaceUtils.resetDeviceConfig(mode);
    253         } catch (Exception e) {
    254             res = new RuntimeException("Failed to reset config settings", e);
    255         }
    256         try {
    257             Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
    258         } catch (Exception e) {
    259             res = new RuntimeException("Failed to reset global settings", e);
    260         }
    261         for (int userId : getAllUserIds()) {
    262             try {
    263                 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
    264             } catch (Exception e) {
    265                 res = new RuntimeException("Failed to reset secure settings for " + userId, e);
    266             }
    267         }
    268         if (res != null) {
    269             throw res;
    270         }
    271     }
    272 
    273     /**
    274      * Threshold that can be triggered if a number of events occur within a
    275      * window of time.
    276      */
    277     private abstract static class Threshold {
    278         public abstract int getCount();
    279         public abstract void setCount(int count);
    280         public abstract long getStart();
    281         public abstract void setStart(long start);
    282 
    283         private final int uid;
    284         private final int triggerCount;
    285         private final long triggerWindow;
    286 
    287         public Threshold(int uid, int triggerCount, long triggerWindow) {
    288             this.uid = uid;
    289             this.triggerCount = triggerCount;
    290             this.triggerWindow = triggerWindow;
    291         }
    292 
    293         public void reset() {
    294             setCount(0);
    295             setStart(0);
    296         }
    297 
    298         /**
    299          * @return if this threshold has been triggered
    300          */
    301         public boolean incrementAndTest() {
    302             final long now = getElapsedRealtime();
    303             final long window = now - getStart();
    304             if (window > triggerWindow) {
    305                 setCount(1);
    306                 setStart(now);
    307                 return false;
    308             } else {
    309                 int count = getCount() + 1;
    310                 setCount(count);
    311                 EventLogTags.writeRescueNote(uid, count, window);
    312                 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
    313                         + (window / 1000) + " sec");
    314                 return (count >= triggerCount);
    315             }
    316         }
    317     }
    318 
    319     /**
    320      * Specialization of {@link Threshold} for monitoring boot events. It stores
    321      * counters in system properties for robustness.
    322      */
    323     private static class BootThreshold extends Threshold {
    324         public BootThreshold() {
    325             // We're interested in TRIGGER_COUNT events in any
    326             // BOOT_TRIGGER_WINDOW_MILLIS second period; this window is super relaxed because
    327             // booting can take a long time if forced to dexopt things.
    328             super(android.os.Process.ROOT_UID, TRIGGER_COUNT, BOOT_TRIGGER_WINDOW_MILLIS);
    329         }
    330 
    331         @Override
    332         public int getCount() {
    333             return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
    334         }
    335 
    336         @Override
    337         public void setCount(int count) {
    338             SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
    339         }
    340 
    341         @Override
    342         public long getStart() {
    343             return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
    344         }
    345 
    346         @Override
    347         public void setStart(long start) {
    348             SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
    349         }
    350     }
    351 
    352     /**
    353      * Specialization of {@link Threshold} for monitoring app crashes. It stores
    354      * counters in memory.
    355      */
    356     private static class AppThreshold extends Threshold {
    357         private int count;
    358         private long start;
    359 
    360         public AppThreshold(int uid) {
    361             // We're interested in TRIGGER_COUNT events in any
    362             // PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS second period; apps crash pretty quickly
    363             // so we can keep a tight leash on them.
    364             super(uid, TRIGGER_COUNT, PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS);
    365         }
    366 
    367         @Override public int getCount() { return count; }
    368         @Override public void setCount(int count) { this.count = count; }
    369         @Override public long getStart() { return start; }
    370         @Override public void setStart(long start) { this.start = start; }
    371     }
    372 
    373     private static int[] getAllUserIds() {
    374         int[] userIds = { UserHandle.USER_SYSTEM };
    375         try {
    376             for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
    377                 try {
    378                     final int userId = Integer.parseInt(file.getName());
    379                     if (userId != UserHandle.USER_SYSTEM) {
    380                         userIds = ArrayUtils.appendInt(userIds, userId);
    381                     }
    382                 } catch (NumberFormatException ignored) {
    383                 }
    384             }
    385         } catch (Throwable t) {
    386             Slog.w(TAG, "Trouble discovering users", t);
    387         }
    388         return userIds;
    389     }
    390 
    391     /**
    392      * Hacky test to check if the device has an active USB connection, which is
    393      * a good proxy for someone doing local development work.
    394      */
    395     private static boolean isUsbActive() {
    396         if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
    397             Slog.v(TAG, "Assuming virtual device is connected over USB");
    398             return true;
    399         }
    400         try {
    401             final String state = FileUtils
    402                     .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
    403             return "CONFIGURED".equals(state.trim());
    404         } catch (Throwable t) {
    405             Slog.w(TAG, "Failed to determine if device was on USB", t);
    406             return false;
    407         }
    408     }
    409 
    410     private static String levelToString(int level) {
    411         switch (level) {
    412             case LEVEL_NONE: return "NONE";
    413             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
    414             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
    415             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
    416             case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
    417             default: return Integer.toString(level);
    418         }
    419     }
    420 }
    421