1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import android.content.ContentResolver; 20 import android.content.Context; 21 import android.os.Build; 22 import android.os.Environment; 23 import android.os.FileUtils; 24 import android.os.RecoverySystem; 25 import android.os.SystemClock; 26 import android.os.SystemProperties; 27 import android.os.UserHandle; 28 import android.provider.Settings; 29 import android.text.format.DateUtils; 30 import android.util.ExceptionUtils; 31 import android.util.Log; 32 import android.util.MathUtils; 33 import android.util.Slog; 34 import android.util.SparseArray; 35 36 import com.android.internal.util.ArrayUtils; 37 import com.android.server.pm.PackageManagerService; 38 39 import java.io.File; 40 41 /** 42 * Utilities to help rescue the system from crash loops. Callers are expected to 43 * report boot events and persistent app crashes, and if they happen frequently 44 * enough this class will slowly escalate through several rescue operations 45 * before finally rebooting and prompting the user if they want to wipe data as 46 * a last resort. 47 * 48 * @hide 49 */ 50 public class RescueParty { 51 private static final String TAG = "RescueParty"; 52 53 private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue"; 54 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue"; 55 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level"; 56 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count"; 57 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start"; 58 private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device"; 59 60 private static final int LEVEL_NONE = 0; 61 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1; 62 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2; 63 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3; 64 private static final int LEVEL_FACTORY_RESET = 4; 65 66 /** Threshold for boot loops */ 67 private static final Threshold sBoot = new BootThreshold(); 68 /** Threshold for app crash loops */ 69 private static SparseArray<Threshold> sApps = new SparseArray<>(); 70 71 private static boolean isDisabled() { 72 // Check if we're explicitly enabled for testing 73 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) { 74 return false; 75 } 76 77 // We're disabled on all engineering devices 78 if (Build.IS_ENG) { 79 Slog.v(TAG, "Disabled because of eng build"); 80 return true; 81 } 82 83 // We're disabled on userdebug devices connected over USB, since that's 84 // a decent signal that someone is actively trying to debug the device, 85 // or that it's in a lab environment. 86 if (Build.IS_USERDEBUG && isUsbActive()) { 87 Slog.v(TAG, "Disabled because of active USB connection"); 88 return true; 89 } 90 91 // One last-ditch check 92 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) { 93 Slog.v(TAG, "Disabled because of manual property"); 94 return true; 95 } 96 97 return false; 98 } 99 100 /** 101 * Take note of a boot event. If we notice too many of these events 102 * happening in rapid succession, we'll send out a rescue party. 103 */ 104 public static void noteBoot(Context context) { 105 if (isDisabled()) return; 106 if (sBoot.incrementAndTest()) { 107 sBoot.reset(); 108 incrementRescueLevel(sBoot.uid); 109 executeRescueLevel(context); 110 } 111 } 112 113 /** 114 * Take note of a persistent app crash. If we notice too many of these 115 * events happening in rapid succession, we'll send out a rescue party. 116 */ 117 public static void notePersistentAppCrash(Context context, int uid) { 118 if (isDisabled()) return; 119 Threshold t = sApps.get(uid); 120 if (t == null) { 121 t = new AppThreshold(uid); 122 sApps.put(uid, t); 123 } 124 if (t.incrementAndTest()) { 125 t.reset(); 126 incrementRescueLevel(t.uid); 127 executeRescueLevel(context); 128 } 129 } 130 131 /** 132 * Check if we're currently attempting to reboot for a factory reset. 133 */ 134 public static boolean isAttemptingFactoryReset() { 135 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET; 136 } 137 138 /** 139 * Escalate to the next rescue level. After incrementing the level you'll 140 * probably want to call {@link #executeRescueLevel(Context)}. 141 */ 142 private static void incrementRescueLevel(int triggerUid) { 143 final int level = MathUtils.constrain( 144 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1, 145 LEVEL_NONE, LEVEL_FACTORY_RESET); 146 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level)); 147 148 EventLogTags.writeRescueLevel(level, triggerUid); 149 PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to " 150 + levelToString(level) + " triggered by UID " + triggerUid); 151 } 152 153 /** 154 * Called when {@code SettingsProvider} has been published, which is a good 155 * opportunity to reset any settings depending on our rescue level. 156 */ 157 public static void onSettingsProviderPublished(Context context) { 158 executeRescueLevel(context); 159 } 160 161 private static void executeRescueLevel(Context context) { 162 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE); 163 if (level == LEVEL_NONE) return; 164 165 Slog.w(TAG, "Attempting rescue level " + levelToString(level)); 166 try { 167 executeRescueLevelInternal(context, level); 168 EventLogTags.writeRescueSuccess(level); 169 PackageManagerService.logCriticalInfo(Log.DEBUG, 170 "Finished rescue level " + levelToString(level)); 171 } catch (Throwable t) { 172 final String msg = ExceptionUtils.getCompleteMessage(t); 173 EventLogTags.writeRescueFailure(level, msg); 174 PackageManagerService.logCriticalInfo(Log.ERROR, 175 "Failed rescue level " + levelToString(level) + ": " + msg); 176 } 177 } 178 179 private static void executeRescueLevelInternal(Context context, int level) throws Exception { 180 switch (level) { 181 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: 182 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS); 183 break; 184 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: 185 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES); 186 break; 187 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: 188 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS); 189 break; 190 case LEVEL_FACTORY_RESET: 191 RecoverySystem.rebootPromptAndWipeUserData(context, TAG); 192 break; 193 } 194 } 195 196 private static void resetAllSettings(Context context, int mode) throws Exception { 197 // Try our best to reset all settings possible, and once finished 198 // rethrow any exception that we encountered 199 Exception res = null; 200 final ContentResolver resolver = context.getContentResolver(); 201 try { 202 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM); 203 } catch (Throwable t) { 204 res = new RuntimeException("Failed to reset global settings", t); 205 } 206 for (int userId : getAllUserIds()) { 207 try { 208 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId); 209 } catch (Throwable t) { 210 res = new RuntimeException("Failed to reset secure settings for " + userId, t); 211 } 212 } 213 if (res != null) { 214 throw res; 215 } 216 } 217 218 /** 219 * Threshold that can be triggered if a number of events occur within a 220 * window of time. 221 */ 222 private abstract static class Threshold { 223 public abstract int getCount(); 224 public abstract void setCount(int count); 225 public abstract long getStart(); 226 public abstract void setStart(long start); 227 228 private final int uid; 229 private final int triggerCount; 230 private final long triggerWindow; 231 232 public Threshold(int uid, int triggerCount, long triggerWindow) { 233 this.uid = uid; 234 this.triggerCount = triggerCount; 235 this.triggerWindow = triggerWindow; 236 } 237 238 public void reset() { 239 setCount(0); 240 setStart(0); 241 } 242 243 /** 244 * @return if this threshold has been triggered 245 */ 246 public boolean incrementAndTest() { 247 final long now = SystemClock.elapsedRealtime(); 248 final long window = now - getStart(); 249 if (window > triggerWindow) { 250 setCount(1); 251 setStart(now); 252 return false; 253 } else { 254 int count = getCount() + 1; 255 setCount(count); 256 EventLogTags.writeRescueNote(uid, count, window); 257 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last " 258 + (window / 1000) + " sec"); 259 return (count >= triggerCount); 260 } 261 } 262 } 263 264 /** 265 * Specialization of {@link Threshold} for monitoring boot events. It stores 266 * counters in system properties for robustness. 267 */ 268 private static class BootThreshold extends Threshold { 269 public BootThreshold() { 270 // We're interested in 5 events in any 300 second period; this 271 // window is super relaxed because booting can take a long time if 272 // forced to dexopt things. 273 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS); 274 } 275 276 @Override 277 public int getCount() { 278 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0); 279 } 280 281 @Override 282 public void setCount(int count) { 283 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count)); 284 } 285 286 @Override 287 public long getStart() { 288 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0); 289 } 290 291 @Override 292 public void setStart(long start) { 293 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start)); 294 } 295 } 296 297 /** 298 * Specialization of {@link Threshold} for monitoring app crashes. It stores 299 * counters in memory. 300 */ 301 private static class AppThreshold extends Threshold { 302 private int count; 303 private long start; 304 305 public AppThreshold(int uid) { 306 // We're interested in 5 events in any 30 second period; apps crash 307 // pretty quickly so we can keep a tight leash on them. 308 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS); 309 } 310 311 @Override public int getCount() { return count; } 312 @Override public void setCount(int count) { this.count = count; } 313 @Override public long getStart() { return start; } 314 @Override public void setStart(long start) { this.start = start; } 315 } 316 317 private static int[] getAllUserIds() { 318 int[] userIds = { UserHandle.USER_SYSTEM }; 319 try { 320 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) { 321 try { 322 final int userId = Integer.parseInt(file.getName()); 323 if (userId != UserHandle.USER_SYSTEM) { 324 userIds = ArrayUtils.appendInt(userIds, userId); 325 } 326 } catch (NumberFormatException ignored) { 327 } 328 } 329 } catch (Throwable t) { 330 Slog.w(TAG, "Trouble discovering users", t); 331 } 332 return userIds; 333 } 334 335 /** 336 * Hacky test to check if the device has an active USB connection, which is 337 * a good proxy for someone doing local development work. 338 */ 339 private static boolean isUsbActive() { 340 if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) { 341 Slog.v(TAG, "Assuming virtual device is connected over USB"); 342 return true; 343 } 344 try { 345 final String state = FileUtils 346 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, ""); 347 return "CONFIGURED".equals(state.trim()); 348 } catch (Throwable t) { 349 Slog.w(TAG, "Failed to determine if device was on USB", t); 350 return false; 351 } 352 } 353 354 private static String levelToString(int level) { 355 switch (level) { 356 case LEVEL_NONE: return "NONE"; 357 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS"; 358 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES"; 359 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS"; 360 case LEVEL_FACTORY_RESET: return "FACTORY_RESET"; 361 default: return Integer.toString(level); 362 } 363 } 364 } 365