diff options
author | JW Wang <wangchun@google.com> | 2019-09-11 14:34:43 +0800 |
---|---|---|
committer | JW Wang <wangchun@google.com> | 2019-09-12 11:06:18 +0800 |
commit | 88525f7db84c94dcc41e99066a993e5fb9f41a0a (patch) | |
tree | 3cc6f7db282c6b9e0a28e706b84c93077ea57eb6 | |
parent | dfa5268d38143cfd6455838bebe1f03caa212c18 (diff) |
Fix the algorithm counting the package failures
Use the sliding window algorithm to detect if there exists a window
containing failures equal to or above the trigger threshold.
Bug: 140841942
Test: atest PackageWatchdogTest
Change-Id: I34a20e4d3b98a093dffa05fc7c7c026905834b53
-rw-r--r-- | services/core/java/com/android/server/PackageWatchdog.java | 29 | ||||
-rw-r--r-- | tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java | 33 |
2 files changed, 45 insertions, 17 deletions
diff --git a/services/core/java/com/android/server/PackageWatchdog.java b/services/core/java/com/android/server/PackageWatchdog.java index 548665ba3a32..4403efcd58c3 100644 --- a/services/core/java/com/android/server/PackageWatchdog.java +++ b/services/core/java/com/android/server/PackageWatchdog.java @@ -34,6 +34,7 @@ import android.text.TextUtils; import android.util.ArrayMap; import android.util.ArraySet; import android.util.AtomicFile; +import android.util.LongArrayQueue; import android.util.Slog; import android.util.Xml; @@ -969,6 +970,9 @@ public class PackageWatchdog { class MonitoredPackage { //TODO(b/120598832): VersionedPackage? private final String mName; + // Times when package failures happen sorted in ascending order + @GuardedBy("mLock") + private final LongArrayQueue mFailureHistory = new LongArrayQueue(); // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after // methods that could change the health check state: handleElapsedTimeLocked and // tryPassHealthCheckLocked @@ -988,12 +992,6 @@ public class PackageWatchdog { // of the package, see #getHealthCheckStateLocked @GuardedBy("mLock") private long mHealthCheckDurationMs = Long.MAX_VALUE; - // System uptime of first package failure - @GuardedBy("mLock") - private long mUptimeStartMs; - // Number of failures since mUptimeStartMs - @GuardedBy("mLock") - private int mFailures; MonitoredPackage(String name, long durationMs, boolean hasPassedHealthCheck) { this(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck); @@ -1028,20 +1026,17 @@ public class PackageWatchdog { */ @GuardedBy("mLock") public boolean onFailureLocked() { + // Sliding window algorithm: find out if there exists a window containing failures >= + // mTriggerFailureCount. final long now = mSystemClock.uptimeMillis(); - final long duration = now - mUptimeStartMs; - if (duration > mTriggerFailureDurationMs) { - // TODO(b/120598832): Reseting to 1 is not correct - // because there may be more than 1 failure in the last trigger window from now - // This is the RescueParty impl, will leave for now - mFailures = 1; - mUptimeStartMs = now; - } else { - mFailures++; + mFailureHistory.addLast(now); + while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) { + // Prune values falling out of the window + mFailureHistory.removeFirst(); } - boolean failed = mFailures >= mTriggerFailureCount; + boolean failed = mFailureHistory.size() >= mTriggerFailureCount; if (failed) { - mFailures = 0; + mFailureHistory.clear(); } return failed; } diff --git a/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java b/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java index ab31ed7389a3..ff4c9702bd80 100644 --- a/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java +++ b/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java @@ -765,6 +765,39 @@ public class PackageWatchdogTest { assertThat(observer.mHealthCheckFailedPackages).containsExactly(APP_B); } + /** Test we are notified when enough failures are triggered within any window. */ + @Test + public void testFailureTriggerWindow() { + adoptShellPermissions( + Manifest.permission.WRITE_DEVICE_CONFIG, + Manifest.permission.READ_DEVICE_CONFIG); + DeviceConfig.setProperty(DeviceConfig.NAMESPACE_ROLLBACK, + PackageWatchdog.PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT, + Integer.toString(3), /*makeDefault*/false); + DeviceConfig.setProperty(DeviceConfig.NAMESPACE_ROLLBACK, + PackageWatchdog.PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS, + Integer.toString(1000), /*makeDefault*/false); + PackageWatchdog watchdog = createWatchdog(); + TestObserver observer = new TestObserver(OBSERVER_NAME_1); + + watchdog.startObservingHealth(observer, Arrays.asList(APP_A), Long.MAX_VALUE); + // Raise 2 failures at t=0 and t=900 respectively + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + mTestLooper.dispatchAll(); + moveTimeForwardAndDispatch(900); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + mTestLooper.dispatchAll(); + + // Raise 2 failures at t=1100 + moveTimeForwardAndDispatch(200); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + mTestLooper.dispatchAll(); + + // We should receive APP_A since there are 3 failures within 1000ms window + assertThat(observer.mHealthCheckFailedPackages).containsExactly(APP_A); + } + private void adoptShellPermissions(String... permissions) { InstrumentationRegistry .getInstrumentation() |