diff options
author | Gavin Corkery <gavincorkery@google.com> | 2020-11-17 21:57:50 +0000 |
---|---|---|
committer | Gavin Corkery <gavincorkery@google.com> | 2020-11-23 22:49:30 +0000 |
commit | 5cb20e674a926caa8da7dca909c41ba95b88e915 (patch) | |
tree | 7974d6b7905a0a1bd8305d8e1ec1fc6877e34573 | |
parent | 976aa3d404547b6e535c1a8626ebb9f215076d5e (diff) |
Add boot loop de-escalation logic
Similar to work that has already been done for package failures,
pass a mitigation count to an observer interested in boot loops.
Since this logic is handled using sysprops, the mitigation count
will be reset after the default escalation window, rather than
decreasing with a sliding window.
Migrated elapsedRealTime to uptimeMillis in all instances in
PackageWatchdog. This should not have any impact, since the only
difference is that elapsedRealTime counted time in deep sleep,
which will not be an important factor for boot loops detection.
Test: atest PackageWatchdogTest
Test: atest RescuePartyTest
Bug: 172206136
Change-Id: I7e59f3fa32544bd410d8508e6529c77049a70df0
4 files changed, 97 insertions, 103 deletions
diff --git a/services/core/java/com/android/server/PackageWatchdog.java b/services/core/java/com/android/server/PackageWatchdog.java index 735d248b12e4..f3c5fd8d1064 100644 --- a/services/core/java/com/android/server/PackageWatchdog.java +++ b/services/core/java/com/android/server/PackageWatchdog.java @@ -47,7 +47,6 @@ import android.util.Xml; import com.android.internal.annotations.GuardedBy; import com.android.internal.annotations.VisibleForTesting; import com.android.internal.os.BackgroundThread; -import com.android.internal.util.FastXmlSerializer; import com.android.internal.util.IndentingPrintWriter; import com.android.internal.util.XmlUtils; @@ -64,7 +63,6 @@ import java.io.IOException; import java.io.InputStream; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -129,9 +127,17 @@ public class PackageWatchdog { @VisibleForTesting static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5; static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10); + + // These properties track individual system server boot events, and are reset once the boot + // threshold is met, or the boot loop trigger window is exceeded between boot events. private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count"; private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start"; + // These properties track multiple calls made to observers tracking boot loops. They are reset + // when the de-escalation window is exceeded between boot events. + private static final String PROP_BOOT_MITIGATION_WINDOW_START = "sys.boot_mitigation_start"; + private static final String PROP_BOOT_MITIGATION_COUNT = "sys.boot_mitigation_count"; + private long mNumberOfNativeCrashPollsRemaining; private static final int DB_VERSION = 1; @@ -191,7 +197,6 @@ public class PackageWatchdog { @FunctionalInterface @VisibleForTesting interface SystemClock { - // TODO: Add elapsedRealtime to this interface long uptimeMillis(); } @@ -471,13 +476,14 @@ public class PackageWatchdog { synchronized (mLock) { if (mBootThreshold.incrementAndTest()) { mBootThreshold.reset(); + int mitigationCount = mBootThreshold.getMitigationCount() + 1; PackageHealthObserver currentObserverToNotify = null; int currentObserverImpact = Integer.MAX_VALUE; for (int i = 0; i < mAllObservers.size(); i++) { final ObserverInternal observer = mAllObservers.valueAt(i); PackageHealthObserver registeredObserver = observer.registeredObserver; if (registeredObserver != null) { - int impact = registeredObserver.onBootLoop(); + int impact = registeredObserver.onBootLoop(mitigationCount); if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE && impact < currentObserverImpact) { currentObserverToNotify = registeredObserver; @@ -486,7 +492,8 @@ public class PackageWatchdog { } } if (currentObserverToNotify != null) { - currentObserverToNotify.executeBootLoopMitigation(); + mBootThreshold.setMitigationCount(mitigationCount); + currentObserverToNotify.executeBootLoopMitigation(mitigationCount); } } } @@ -609,15 +616,20 @@ public class PackageWatchdog { /** * Called when the system server has booted several times within a window of time, defined * by {@link #mBootThreshold} + * + * @param mitigationCount the number of times mitigation has been attempted for this + * boot loop (including this time). */ - default @PackageHealthObserverImpact int onBootLoop() { + default @PackageHealthObserverImpact int onBootLoop(int mitigationCount) { return PackageHealthObserverImpact.USER_IMPACT_NONE; } /** * Executes mitigation for {@link #onBootLoop} + * @param mitigationCount the number of times mitigation has been attempted for this + * boot loop (including this time). */ - default boolean executeBootLoopMitigation() { + default boolean executeBootLoopMitigation(int mitigationCount) { return false; } @@ -1577,7 +1589,7 @@ public class PackageWatchdog { /** * Handles the thresholding logic for system server boots. */ - static class BootThreshold { + class BootThreshold { private final int mBootTriggerCount; private final long mTriggerWindow; @@ -1604,18 +1616,44 @@ public class PackageWatchdog { return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0); } + public int getMitigationCount() { + return SystemProperties.getInt(PROP_BOOT_MITIGATION_COUNT, 0); + } + public void setStart(long start) { - final long now = android.os.SystemClock.elapsedRealtime(); + setPropertyStart(PROP_RESCUE_BOOT_START, start); + } + + public void setMitigationStart(long start) { + setPropertyStart(PROP_BOOT_MITIGATION_WINDOW_START, start); + } + + public long getMitigationStart() { + return SystemProperties.getLong(PROP_BOOT_MITIGATION_WINDOW_START, 0); + } + + public void setMitigationCount(int count) { + SystemProperties.set(PROP_BOOT_MITIGATION_COUNT, Integer.toString(count)); + } + + public void setPropertyStart(String property, long start) { + final long now = mSystemClock.uptimeMillis(); final long newStart = MathUtils.constrain(start, 0, now); - SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(newStart)); + SystemProperties.set(property, Long.toString(newStart)); } + /** Increments the boot counter, and returns whether the device is bootlooping. */ public boolean incrementAndTest() { - final long now = android.os.SystemClock.elapsedRealtime(); + final long now = mSystemClock.uptimeMillis(); if (now - getStart() < 0) { Slog.e(TAG, "Window was less than zero. Resetting start to current time."); setStart(now); + setMitigationStart(now); + } + if (now - getMitigationStart() > DEFAULT_DEESCALATION_WINDOW_MS) { + setMitigationCount(0); + setMitigationStart(now); } final long window = now - getStart(); if (window >= mTriggerWindow) { diff --git a/services/core/java/com/android/server/RescueParty.java b/services/core/java/com/android/server/RescueParty.java index d04949ac54db..e8e1a16d116b 100644 --- a/services/core/java/com/android/server/RescueParty.java +++ b/services/core/java/com/android/server/RescueParty.java @@ -29,7 +29,6 @@ import android.os.Build; import android.os.Bundle; import android.os.Environment; import android.os.FileUtils; -import android.os.Process; import android.os.RecoverySystem; import android.os.RemoteCallback; import android.os.SystemClock; @@ -40,7 +39,6 @@ import android.provider.Settings; import android.util.ArraySet; import android.util.ExceptionUtils; import android.util.Log; -import android.util.MathUtils; import android.util.Slog; import com.android.internal.annotations.GuardedBy; @@ -75,8 +73,6 @@ import java.util.concurrent.TimeUnit; public class RescueParty { @VisibleForTesting static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue"; - @VisibleForTesting - static final String PROP_RESCUE_LEVEL = "sys.rescue_level"; static final String PROP_ATTEMPTING_FACTORY_RESET = "sys.attempting_factory_reset"; static final String PROP_MAX_RESCUE_LEVEL_ATTEMPTED = "sys.max_rescue_level_attempted"; @VisibleForTesting @@ -168,7 +164,6 @@ public class RescueParty { */ public static void onSettingsProviderPublished(Context context) { handleNativeRescuePartyResets(); - executeRescueLevel(context, /*failedPackage=*/ null); ContentResolver contentResolver = context.getContentResolver(); Settings.Config.registerMonitorCallback(contentResolver, new RemoteCallback(result -> { handleMonitorCallback(context, result); @@ -260,33 +255,6 @@ public class RescueParty { } } - /** - * Get the next rescue level. This indicates the next level of mitigation that may be taken. - */ - private static int getNextRescueLevel() { - return MathUtils.constrain(SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1, - LEVEL_NONE, getMaxRescueLevel()); - } - - /** - * Escalate to the next rescue level. After incrementing the level you'll - * probably want to call {@link #executeRescueLevel(Context, String)}. - */ - private static void incrementRescueLevel(int triggerUid) { - final int level = getNextRescueLevel(); - SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level)); - - EventLogTags.writeRescueLevel(level, triggerUid); - logCriticalInfo(Log.WARN, "Incremented rescue level to " - + levelToString(level) + " triggered by UID " + triggerUid); - } - - private static void executeRescueLevel(Context context, @Nullable String failedPackage) { - final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE); - if (level == LEVEL_NONE) return; - executeRescueLevel(context, failedPackage, level); - } - private static void executeRescueLevel(Context context, @Nullable String failedPackage, int level) { Slog.w(TAG, "Attempting rescue level " + levelToString(level)); @@ -561,20 +529,19 @@ public class RescueParty { } @Override - public int onBootLoop() { + public int onBootLoop(int mitigationCount) { if (isDisabled()) { return PackageHealthObserverImpact.USER_IMPACT_NONE; } - return mapRescueLevelToUserImpact(getNextRescueLevel()); + return mapRescueLevelToUserImpact(getRescueLevel(mitigationCount)); } @Override - public boolean executeBootLoopMitigation() { + public boolean executeBootLoopMitigation(int mitigationCount) { if (isDisabled()) { return false; } - incrementRescueLevel(Process.ROOT_UID); - executeRescueLevel(mContext, /*failedPackage=*/ null); + executeRescueLevel(mContext, /*failedPackage=*/ null, getRescueLevel(mitigationCount)); return true; } diff --git a/services/tests/mockingservicestests/src/com/android/server/RescuePartyTest.java b/services/tests/mockingservicestests/src/com/android/server/RescuePartyTest.java index ebd4a4c5378f..9c8f733730a7 100644 --- a/services/tests/mockingservicestests/src/com/android/server/RescuePartyTest.java +++ b/services/tests/mockingservicestests/src/com/android/server/RescuePartyTest.java @@ -174,8 +174,6 @@ public class RescuePartyTest { doReturn(CURRENT_NETWORK_TIME_MILLIS).when(() -> RescueParty.getElapsedRealtime()); - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, - Integer.toString(RescueParty.LEVEL_NONE)); SystemProperties.set(RescueParty.PROP_RESCUE_BOOT_COUNT, Integer.toString(0)); SystemProperties.set(RescueParty.PROP_ENABLE_RESCUE, Boolean.toString(true)); SystemProperties.set(PROP_DEVICE_CONFIG_DISABLE_FLAG, Boolean.toString(false)); @@ -193,12 +191,10 @@ public class RescuePartyTest { mMonitorCallbackCaptor.capture())); HashMap<String, Integer> verifiedTimesMap = new HashMap<String, Integer>(); - noteBoot(); + noteBoot(1); verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_DEFAULTS, /*resetNamespaces=*/ null, verifiedTimesMap); - assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS, - SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE)); // Record DeviceConfig accesses RescuePartyObserver observer = RescuePartyObserver.getInstance(mMockContext); @@ -208,24 +204,19 @@ public class RescuePartyTest { final String[] expectedAllResetNamespaces = new String[]{NAMESPACE1, NAMESPACE2}; - noteBoot(); + noteBoot(2); verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_CHANGES, expectedAllResetNamespaces, verifiedTimesMap); - assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES, - SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE)); - noteBoot(); + noteBoot(3); verifySettingsResets(Settings.RESET_MODE_TRUSTED_DEFAULTS, expectedAllResetNamespaces, verifiedTimesMap); - assertEquals(RescueParty.LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS, - SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE)); - noteBoot(); + noteBoot(4); - assertEquals(LEVEL_FACTORY_RESET, - SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE)); + assertTrue(RescueParty.isAttemptingFactoryReset()); } @Test @@ -364,24 +355,12 @@ public class RescuePartyTest { @Test public void testIsAttemptingFactoryReset() { for (int i = 0; i < LEVEL_FACTORY_RESET; i++) { - noteBoot(); + noteBoot(i + 1); } assertTrue(RescueParty.isAttemptingFactoryReset()); } @Test - public void testOnSettingsProviderPublishedExecutesRescueLevels() { - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(1)); - - RescueParty.onSettingsProviderPublished(mMockContext); - - verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_DEFAULTS, /*resetNamespaces=*/ null, - /*configResetVerifiedTimesMap=*/ null); - assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS, - SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE)); - } - - @Test public void testNativeRescuePartyResets() { doReturn(true).when(() -> SettingsToPropertiesMapper.isNativeFlagsResetPerformed()); doReturn(FAKE_RESET_NATIVE_NAMESPACES).when( @@ -425,7 +404,7 @@ public class RescuePartyTest { SystemProperties.set(PROP_DISABLE_FACTORY_RESET_FLAG, Boolean.toString(true)); for (int i = 0; i < LEVEL_FACTORY_RESET; i++) { - noteBoot(); + noteBoot(i + 1); } assertFalse(RescueParty.isAttemptingFactoryReset()); @@ -463,29 +442,12 @@ public class RescuePartyTest { public void testBootLoopLevels() { RescuePartyObserver observer = RescuePartyObserver.getInstance(mMockContext); - /* - Ensure that the returned user impact corresponds with the user impact of the next available - rescue level, not the current one. - */ - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString( - RescueParty.LEVEL_NONE)); - assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_LOW); - - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString( - RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS)); - assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_LOW); - - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString( - RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES)); - assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_HIGH); - - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString( - RescueParty.LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS)); - assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_HIGH); - - SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString( - LEVEL_FACTORY_RESET)); - assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_HIGH); + assertEquals(observer.onBootLoop(0), PackageHealthObserverImpact.USER_IMPACT_NONE); + assertEquals(observer.onBootLoop(1), PackageHealthObserverImpact.USER_IMPACT_LOW); + assertEquals(observer.onBootLoop(2), PackageHealthObserverImpact.USER_IMPACT_LOW); + assertEquals(observer.onBootLoop(3), PackageHealthObserverImpact.USER_IMPACT_HIGH); + assertEquals(observer.onBootLoop(4), PackageHealthObserverImpact.USER_IMPACT_HIGH); + assertEquals(observer.onBootLoop(5), PackageHealthObserverImpact.USER_IMPACT_HIGH); } private void verifySettingsResets(int resetMode, String[] resetNamespaces, @@ -513,8 +475,8 @@ public class RescuePartyTest { } } - private void noteBoot() { - RescuePartyObserver.getInstance(mMockContext).executeBootLoopMitigation(); + private void noteBoot(int mitigationCount) { + RescuePartyObserver.getInstance(mMockContext).executeBootLoopMitigation(mitigationCount); } private void notePersistentAppCrash(int mitigationCount) { diff --git a/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java b/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java index fa0574a503f1..9738e58543e1 100644 --- a/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java +++ b/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java @@ -1064,6 +1064,31 @@ public class PackageWatchdogTest { } /** + * Ensure that the correct mitigation counts are sent to the boot loop observer. + */ + @Test + public void testMultipleBootLoopMitigation() { + PackageWatchdog watchdog = createWatchdog(); + TestObserver bootObserver = new TestObserver(OBSERVER_NAME_1); + watchdog.registerHealthObserver(bootObserver); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < PackageWatchdog.DEFAULT_BOOT_LOOP_TRIGGER_COUNT; j++) { + watchdog.noteBoot(); + } + } + + moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_DEESCALATION_WINDOW_MS + 1); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < PackageWatchdog.DEFAULT_BOOT_LOOP_TRIGGER_COUNT; j++) { + watchdog.noteBoot(); + } + } + + assertThat(bootObserver.mBootMitigationCounts).isEqualTo(List.of(1, 2, 3, 4, 1, 2, 3, 4)); + } + + /** * Ensure that passing a null list of failed packages does not cause any mitigation logic to * execute. */ @@ -1267,6 +1292,7 @@ public class PackageWatchdogTest { final List<String> mHealthCheckFailedPackages = new ArrayList<>(); final List<String> mMitigatedPackages = new ArrayList<>(); final List<Integer> mMitigationCounts = new ArrayList<>(); + final List<Integer> mBootMitigationCounts = new ArrayList<>(); TestObserver(String name) { mName = name; @@ -1304,12 +1330,13 @@ public class PackageWatchdogTest { return mMayObservePackages; } - public int onBootLoop() { + public int onBootLoop(int level) { return mImpact; } - public boolean executeBootLoopMitigation() { + public boolean executeBootLoopMitigation(int level) { mMitigatedBootLoop = true; + mBootMitigationCounts.add(level); return true; } |