Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e1800461 authored by Gavin Corkery's avatar Gavin Corkery
Browse files

Add per-package escalation logic

Track the number of times a call has been made to an
observer to mitigate each MonitoredPackage object. This
"mitigation count" will be used as a proxy for determining
what rescue level to perform in RescueParty. A sliding
window is used so that this mitigation count may
de-escalate. The default value of this sliding window
is one hour.

A follow-up CL will integrate RescueParty's rescue level
mechanism with this logic.

Test: atest PackageWatchdogTest
Bug: 172206136
Change-Id: Idb97901ad1c8acbee15417ea35d29e67e9d4562e
parent 7ad665f7
Loading
Loading
Loading
Loading
+62 −7
Original line number Diff line number Diff line
@@ -68,6 +68,7 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.TimeUnit;

@@ -117,6 +118,9 @@ public class PackageWatchdog {
    static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5;
    @VisibleForTesting
    static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2);
    // Sliding window for tracking how many mitigation calls were made for a package.
    @VisibleForTesting
    static final long DEFAULT_DEESCALATION_WINDOW_MS = TimeUnit.HOURS.toMillis(1);
    // Whether explicit health checks are enabled or not
    private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true;

@@ -388,6 +392,7 @@ public class PackageWatchdog {
                        // Observer that will receive failure for versionedPackage
                        PackageHealthObserver currentObserverToNotify = null;
                        int currentObserverImpact = Integer.MAX_VALUE;
                        MonitoredPackage currentMonitoredPackage = null;

                        // Find observer with least user impact
                        for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
@@ -396,19 +401,33 @@ public class PackageWatchdog {
                            if (registeredObserver != null
                                    && observer.onPackageFailureLocked(
                                    versionedPackage.getPackageName())) {
                                MonitoredPackage p = observer.getMonitoredPackage(
                                        versionedPackage.getPackageName());
                                int mitigationCount = 1;
                                if (p != null) {
                                    mitigationCount = p.getMitigationCountLocked() + 1;
                                }
                                int impact = registeredObserver.onHealthCheckFailed(
                                        versionedPackage, failureReason);
                                        versionedPackage, failureReason, mitigationCount);
                                if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
                                        && impact < currentObserverImpact) {
                                    currentObserverToNotify = registeredObserver;
                                    currentObserverImpact = impact;
                                    currentMonitoredPackage = p;
                                }
                            }
                        }

                        // Execute action with least user impact
                        if (currentObserverToNotify != null) {
                            currentObserverToNotify.execute(versionedPackage, failureReason);
                            int mitigationCount = 1;
                            if (currentMonitoredPackage != null) {
                                currentMonitoredPackage.noteMitigationCallLocked();
                                mitigationCount =
                                        currentMonitoredPackage.getMitigationCountLocked();
                            }
                            currentObserverToNotify.execute(versionedPackage,
                                    failureReason, mitigationCount);
                        }
                    }
                }
@@ -429,7 +448,7 @@ public class PackageWatchdog {
            PackageHealthObserver registeredObserver = observer.registeredObserver;
            if (registeredObserver != null) {
                int impact = registeredObserver.onHealthCheckFailed(
                        failingPackage, failureReason);
                        failingPackage, failureReason, 1);
                if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
                        && impact < currentObserverImpact) {
                    currentObserverToNotify = registeredObserver;
@@ -438,7 +457,7 @@ public class PackageWatchdog {
            }
        }
        if (currentObserverToNotify != null) {
            currentObserverToNotify.execute(failingPackage,  failureReason);
            currentObserverToNotify.execute(failingPackage,  failureReason, 1);
        }
    }

@@ -559,6 +578,8 @@ public class PackageWatchdog {
         * @param versionedPackage the package that is failing. This may be null if a native
         *                          service is crashing.
         * @param failureReason   the type of failure that is occurring.
         * @param mitigationCount the number of times mitigation has been called for this package
         *                        (including this time).
         *
         *
         * @return any one of {@link PackageHealthObserverImpact} to express the impact
@@ -566,7 +587,8 @@ public class PackageWatchdog {
         */
        @PackageHealthObserverImpact int onHealthCheckFailed(
                @Nullable VersionedPackage versionedPackage,
                @FailureReasons int failureReason);
                @FailureReasons int failureReason,
                int mitigationCount);

        /**
         * Executes mitigation for {@link #onHealthCheckFailed}.
@@ -574,10 +596,12 @@ public class PackageWatchdog {
         * @param versionedPackage the package that is failing. This may be null if a native
         *                          service is crashing.
         * @param failureReason   the type of failure that is occurring.
         * @param mitigationCount the number of times mitigation has been called for this package
         *                        (including this time).
         * @return {@code true} if action was executed successfully, {@code false} otherwise
         */
        boolean execute(@Nullable VersionedPackage versionedPackage,
                @FailureReasons int failureReason);
                @FailureReasons int failureReason, int mitigationCount);


        /**
@@ -859,7 +883,7 @@ public class PackageWatchdog {
                        VersionedPackage versionedPkg = it.next().mPackage;
                        Slog.i(TAG, "Explicit health check failed for package " + versionedPkg);
                        registeredObserver.execute(versionedPkg,
                                PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
                                PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 1);
                    }
                }
            }
@@ -1293,6 +1317,10 @@ public class PackageWatchdog {
        // Times when package failures happen sorted in ascending order
        @GuardedBy("mLock")
        private final LongArrayQueue mFailureHistory = new LongArrayQueue();
        // Times when an observer was called to mitigate this package's failure. Sorted in
        // ascending order.
        @GuardedBy("mLock")
        private final LongArrayQueue mMitigationCalls = new LongArrayQueue();
        // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after
        // methods that could change the health check state: handleElapsedTimeLocked and
        // tryPassHealthCheckLocked
@@ -1357,6 +1385,33 @@ public class PackageWatchdog {
            return failed;
        }

        /**
         * Notes the timestamp of a mitigation call into the observer.
         */
        @GuardedBy("mLock")
        public void noteMitigationCallLocked() {
            mMitigationCalls.addLast(mSystemClock.uptimeMillis());
        }

        /**
         * Prunes any mitigation calls outside of the de-escalation window, and returns the
         * number of calls that are in the window afterwards.
         *
         * @return the number of mitigation calls made in the de-escalation window.
         */
        @GuardedBy("mLock")
        public int getMitigationCountLocked() {
            try {
                final long now = mSystemClock.uptimeMillis();
                while (now - mMitigationCalls.peekFirst() > DEFAULT_DEESCALATION_WINDOW_MS) {
                    mMitigationCalls.removeFirst();
                }
            } catch (NoSuchElementException ignore) {
            }

            return mMitigationCalls.size();
        }

        /**
         * Sets the initial health check duration.
         *
+2 −2
Original line number Diff line number Diff line
@@ -452,7 +452,7 @@ public class RescueParty {

        @Override
        public int onHealthCheckFailed(@Nullable VersionedPackage failedPackage,
                @FailureReasons int failureReason) {
                @FailureReasons int failureReason, int mitigationCount) {
            if (!isDisabled() && (failureReason == PackageWatchdog.FAILURE_REASON_APP_CRASH
                    || failureReason == PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING)) {
                return mapRescueLevelToUserImpact(getNextRescueLevel());
@@ -463,7 +463,7 @@ public class RescueParty {

        @Override
        public boolean execute(@Nullable VersionedPackage failedPackage,
                @FailureReasons int failureReason) {
                @FailureReasons int failureReason, int mitigationCount) {
            if (isDisabled()) {
                return false;
            }
+2 −2
Original line number Diff line number Diff line
@@ -92,7 +92,7 @@ final class RollbackPackageHealthObserver implements PackageHealthObserver {

    @Override
    public int onHealthCheckFailed(@Nullable VersionedPackage failedPackage,
            @FailureReasons int failureReason) {
            @FailureReasons int failureReason, int mitigationCount) {
        // For native crashes, we will roll back any available rollbacks
        if (failureReason == PackageWatchdog.FAILURE_REASON_NATIVE_CRASH
                && !mContext.getSystemService(RollbackManager.class)
@@ -110,7 +110,7 @@ final class RollbackPackageHealthObserver implements PackageHealthObserver {

    @Override
    public boolean execute(@Nullable VersionedPackage failedPackage,
            @FailureReasons int rollbackReason) {
            @FailureReasons int rollbackReason, int mitigationCount) {
        if (rollbackReason == PackageWatchdog.FAILURE_REASON_NATIVE_CRASH) {
            mHandler.post(() -> rollbackAll());
            return true;
+46 −3
Original line number Diff line number Diff line
@@ -376,7 +376,7 @@ public class PackageWatchdogTest {
        TestObserver observer = new TestObserver(OBSERVER_NAME_1) {
                @Override
                public int onHealthCheckFailed(VersionedPackage versionedPackage,
                        int failureReason) {
                        int failureReason, int mitigationCount) {
                    if (versionedPackage.getVersionCode() == VERSION_CODE) {
                        // Only rollback for specific versionCode
                        return PackageHealthObserverImpact.USER_IMPACT_MEDIUM;
@@ -1146,6 +1146,45 @@ public class PackageWatchdogTest {
        assertThat(observer.mMitigatedPackages).isEqualTo(List.of(APP_A));
    }

    /**
     * Ensure that the sliding window logic results in the correct mitigation count being sent to
     * an observer.
     */
    @Test
    public void testMitigationSlidingWindow() {
        PackageWatchdog watchdog = createWatchdog();
        TestObserver observer = new TestObserver(OBSERVER_NAME_1);
        watchdog.startObservingHealth(observer, List.of(APP_A),
                PackageWatchdog.DEFAULT_OBSERVING_DURATION_MS * 2);


        raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A,
                VERSION_CODE)), PackageWatchdog.FAILURE_REASON_UNKNOWN);

        moveTimeForwardAndDispatch(TimeUnit.MINUTES.toMillis(10));

        raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A,
                VERSION_CODE)), PackageWatchdog.FAILURE_REASON_UNKNOWN);
        raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A,
                VERSION_CODE)), PackageWatchdog.FAILURE_REASON_UNKNOWN);

        moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_DEESCALATION_WINDOW_MS);

        // The first failure will be outside the threshold.
        raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A,
                VERSION_CODE)), PackageWatchdog.FAILURE_REASON_UNKNOWN);

        moveTimeForwardAndDispatch(TimeUnit.MINUTES.toMillis(20));

        // The next 2 failures will also be outside the threshold.
        raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A,
                VERSION_CODE)), PackageWatchdog.FAILURE_REASON_UNKNOWN);
        raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A,
                VERSION_CODE)), PackageWatchdog.FAILURE_REASON_UNKNOWN);

        assertThat(observer.mMitigationCounts).isEqualTo(List.of(1, 2, 3, 3, 2, 3));
    }

    private void adoptShellPermissions(String... permissions) {
        InstrumentationRegistry
                .getInstrumentation()
@@ -1227,6 +1266,7 @@ public class PackageWatchdogTest {
        private boolean mMitigatedBootLoop = false;
        final List<String> mHealthCheckFailedPackages = new ArrayList<>();
        final List<String> mMitigatedPackages = new ArrayList<>();
        final List<Integer> mMitigationCounts = new ArrayList<>();

        TestObserver(String name) {
            mName = name;
@@ -1238,13 +1278,16 @@ public class PackageWatchdogTest {
            mImpact = impact;
        }

        public int onHealthCheckFailed(VersionedPackage versionedPackage, int failureReason) {
        public int onHealthCheckFailed(VersionedPackage versionedPackage, int failureReason,
                int mitigationCount) {
            mHealthCheckFailedPackages.add(versionedPackage.getPackageName());
            return mImpact;
        }

        public boolean execute(VersionedPackage versionedPackage, int failureReason) {
        public boolean execute(VersionedPackage versionedPackage, int failureReason,
                int mitigationCount) {
            mMitigatedPackages.add(versionedPackage.getPackageName());
            mMitigationCounts.add(mitigationCount);
            mLastFailureReason = failureReason;
            return true;
        }