Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5cb20e67 authored by Gavin Corkery's avatar Gavin Corkery
Browse files

Add boot loop de-escalation logic

Similar to work that has already been done for package failures,
pass a mitigation count to an observer interested in boot loops.
Since this logic is handled using sysprops, the mitigation count
will be reset after the default escalation window, rather than
decreasing with a sliding window.

Migrated elapsedRealTime to uptimeMillis in all instances in
PackageWatchdog. This should not have any impact, since the only
difference is that elapsedRealTime counted time in deep sleep,
which will not be an important factor for boot loops detection.

Test: atest PackageWatchdogTest
Test: atest RescuePartyTest
Bug: 172206136
Change-Id: I7e59f3fa32544bd410d8508e6529c77049a70df0
parent 976aa3d4
Loading
Loading
Loading
Loading
+49 −11
Original line number Diff line number Diff line
@@ -47,7 +47,6 @@ import android.util.Xml;
import com.android.internal.annotations.GuardedBy;
import com.android.internal.annotations.VisibleForTesting;
import com.android.internal.os.BackgroundThread;
import com.android.internal.util.FastXmlSerializer;
import com.android.internal.util.IndentingPrintWriter;
import com.android.internal.util.XmlUtils;

@@ -64,7 +63,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
@@ -129,9 +127,17 @@ public class PackageWatchdog {
    @VisibleForTesting
    static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5;
    static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10);

    // These properties track individual system server boot events, and are reset once the boot
    // threshold is met, or the boot loop trigger window is exceeded between boot events.
    private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
    private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";

    // These properties track multiple calls made to observers tracking boot loops. They are reset
    // when the de-escalation window is exceeded between boot events.
    private static final String PROP_BOOT_MITIGATION_WINDOW_START = "sys.boot_mitigation_start";
    private static final String PROP_BOOT_MITIGATION_COUNT = "sys.boot_mitigation_count";

    private long mNumberOfNativeCrashPollsRemaining;

    private static final int DB_VERSION = 1;
@@ -191,7 +197,6 @@ public class PackageWatchdog {
    @FunctionalInterface
    @VisibleForTesting
    interface SystemClock {
        // TODO: Add elapsedRealtime to this interface
        long uptimeMillis();
    }

@@ -471,13 +476,14 @@ public class PackageWatchdog {
        synchronized (mLock) {
            if (mBootThreshold.incrementAndTest()) {
                mBootThreshold.reset();
                int mitigationCount = mBootThreshold.getMitigationCount() + 1;
                PackageHealthObserver currentObserverToNotify = null;
                int currentObserverImpact = Integer.MAX_VALUE;
                for (int i = 0; i < mAllObservers.size(); i++) {
                    final ObserverInternal observer = mAllObservers.valueAt(i);
                    PackageHealthObserver registeredObserver = observer.registeredObserver;
                    if (registeredObserver != null) {
                        int impact = registeredObserver.onBootLoop();
                        int impact = registeredObserver.onBootLoop(mitigationCount);
                        if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
                                && impact < currentObserverImpact) {
                            currentObserverToNotify = registeredObserver;
@@ -486,7 +492,8 @@ public class PackageWatchdog {
                    }
                }
                if (currentObserverToNotify != null) {
                    currentObserverToNotify.executeBootLoopMitigation();
                    mBootThreshold.setMitigationCount(mitigationCount);
                    currentObserverToNotify.executeBootLoopMitigation(mitigationCount);
                }
            }
        }
@@ -609,15 +616,20 @@ public class PackageWatchdog {
        /**
         * Called when the system server has booted several times within a window of time, defined
         * by {@link #mBootThreshold}
         *
         * @param mitigationCount the number of times mitigation has been attempted for this
         *                        boot loop (including this time).
         */
        default @PackageHealthObserverImpact int onBootLoop() {
        default @PackageHealthObserverImpact int onBootLoop(int mitigationCount) {
            return PackageHealthObserverImpact.USER_IMPACT_NONE;
        }

        /**
         * Executes mitigation for {@link #onBootLoop}
         * @param mitigationCount the number of times mitigation has been attempted for this
         *                        boot loop (including this time).
         */
        default boolean executeBootLoopMitigation() {
        default boolean executeBootLoopMitigation(int mitigationCount) {
            return false;
        }

@@ -1577,7 +1589,7 @@ public class PackageWatchdog {
    /**
     * Handles the thresholding logic for system server boots.
     */
    static class BootThreshold {
    class BootThreshold {

        private final int mBootTriggerCount;
        private final long mTriggerWindow;
@@ -1604,18 +1616,44 @@ public class PackageWatchdog {
            return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
        }

        public int getMitigationCount() {
            return SystemProperties.getInt(PROP_BOOT_MITIGATION_COUNT, 0);
        }

        public void setStart(long start) {
            final long now = android.os.SystemClock.elapsedRealtime();
            setPropertyStart(PROP_RESCUE_BOOT_START, start);
        }

        public void setMitigationStart(long start) {
            setPropertyStart(PROP_BOOT_MITIGATION_WINDOW_START, start);
        }

        public long getMitigationStart() {
            return SystemProperties.getLong(PROP_BOOT_MITIGATION_WINDOW_START, 0);
        }

        public void setMitigationCount(int count) {
            SystemProperties.set(PROP_BOOT_MITIGATION_COUNT, Integer.toString(count));
        }

        public void setPropertyStart(String property, long start) {
            final long now = mSystemClock.uptimeMillis();
            final long newStart = MathUtils.constrain(start, 0, now);
            SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(newStart));
            SystemProperties.set(property, Long.toString(newStart));
        }


        /** Increments the boot counter, and returns whether the device is bootlooping. */
        public boolean incrementAndTest() {
            final long now = android.os.SystemClock.elapsedRealtime();
            final long now = mSystemClock.uptimeMillis();
            if (now - getStart() < 0) {
                Slog.e(TAG, "Window was less than zero. Resetting start to current time.");
                setStart(now);
                setMitigationStart(now);
            }
            if (now - getMitigationStart() > DEFAULT_DEESCALATION_WINDOW_MS) {
                setMitigationCount(0);
                setMitigationStart(now);
            }
            final long window = now - getStart();
            if (window >= mTriggerWindow) {
+4 −37
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@ import android.os.Build;
import android.os.Bundle;
import android.os.Environment;
import android.os.FileUtils;
import android.os.Process;
import android.os.RecoverySystem;
import android.os.RemoteCallback;
import android.os.SystemClock;
@@ -40,7 +39,6 @@ import android.provider.Settings;
import android.util.ArraySet;
import android.util.ExceptionUtils;
import android.util.Log;
import android.util.MathUtils;
import android.util.Slog;

import com.android.internal.annotations.GuardedBy;
@@ -75,8 +73,6 @@ import java.util.concurrent.TimeUnit;
public class RescueParty {
    @VisibleForTesting
    static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
    @VisibleForTesting
    static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
    static final String PROP_ATTEMPTING_FACTORY_RESET = "sys.attempting_factory_reset";
    static final String PROP_MAX_RESCUE_LEVEL_ATTEMPTED = "sys.max_rescue_level_attempted";
    @VisibleForTesting
@@ -168,7 +164,6 @@ public class RescueParty {
     */
    public static void onSettingsProviderPublished(Context context) {
        handleNativeRescuePartyResets();
        executeRescueLevel(context, /*failedPackage=*/ null);
        ContentResolver contentResolver = context.getContentResolver();
        Settings.Config.registerMonitorCallback(contentResolver, new RemoteCallback(result -> {
            handleMonitorCallback(context, result);
@@ -260,33 +255,6 @@ public class RescueParty {
        }
    }

    /**
     * Get the next rescue level. This indicates the next level of mitigation that may be taken.
     */
    private static int getNextRescueLevel() {
        return MathUtils.constrain(SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
                LEVEL_NONE, getMaxRescueLevel());
    }

    /**
     * Escalate to the next rescue level. After incrementing the level you'll
     * probably want to call {@link #executeRescueLevel(Context, String)}.
     */
    private static void incrementRescueLevel(int triggerUid) {
        final int level = getNextRescueLevel();
        SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));

        EventLogTags.writeRescueLevel(level, triggerUid);
        logCriticalInfo(Log.WARN, "Incremented rescue level to "
                + levelToString(level) + " triggered by UID " + triggerUid);
    }

    private static void executeRescueLevel(Context context, @Nullable String failedPackage) {
        final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
        if (level == LEVEL_NONE) return;
        executeRescueLevel(context, failedPackage, level);
    }

    private static void executeRescueLevel(Context context, @Nullable String failedPackage,
            int level) {
        Slog.w(TAG, "Attempting rescue level " + levelToString(level));
@@ -561,20 +529,19 @@ public class RescueParty {
        }

        @Override
        public int onBootLoop() {
        public int onBootLoop(int mitigationCount) {
            if (isDisabled()) {
                return PackageHealthObserverImpact.USER_IMPACT_NONE;
            }
            return mapRescueLevelToUserImpact(getNextRescueLevel());
            return mapRescueLevelToUserImpact(getRescueLevel(mitigationCount));
        }

        @Override
        public boolean executeBootLoopMitigation() {
        public boolean executeBootLoopMitigation(int mitigationCount) {
            if (isDisabled()) {
                return false;
            }
            incrementRescueLevel(Process.ROOT_UID);
            executeRescueLevel(mContext, /*failedPackage=*/ null);
            executeRescueLevel(mContext, /*failedPackage=*/ null, getRescueLevel(mitigationCount));
            return true;
        }

+15 −53
Original line number Diff line number Diff line
@@ -174,8 +174,6 @@ public class RescuePartyTest {

        doReturn(CURRENT_NETWORK_TIME_MILLIS).when(() -> RescueParty.getElapsedRealtime());

        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL,
                Integer.toString(RescueParty.LEVEL_NONE));
        SystemProperties.set(RescueParty.PROP_RESCUE_BOOT_COUNT, Integer.toString(0));
        SystemProperties.set(RescueParty.PROP_ENABLE_RESCUE, Boolean.toString(true));
        SystemProperties.set(PROP_DEVICE_CONFIG_DISABLE_FLAG, Boolean.toString(false));
@@ -193,12 +191,10 @@ public class RescuePartyTest {
                mMonitorCallbackCaptor.capture()));
        HashMap<String, Integer> verifiedTimesMap = new HashMap<String, Integer>();

        noteBoot();
        noteBoot(1);

        verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_DEFAULTS, /*resetNamespaces=*/ null,
                verifiedTimesMap);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));

        // Record DeviceConfig accesses
        RescuePartyObserver observer = RescuePartyObserver.getInstance(mMockContext);
@@ -208,24 +204,19 @@ public class RescuePartyTest {

        final String[] expectedAllResetNamespaces = new String[]{NAMESPACE1, NAMESPACE2};

        noteBoot();
        noteBoot(2);

        verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_CHANGES, expectedAllResetNamespaces,
                verifiedTimesMap);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));

        noteBoot();
        noteBoot(3);

        verifySettingsResets(Settings.RESET_MODE_TRUSTED_DEFAULTS, expectedAllResetNamespaces,
                verifiedTimesMap);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));

        noteBoot();
        noteBoot(4);

        assertEquals(LEVEL_FACTORY_RESET,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
        assertTrue(RescueParty.isAttemptingFactoryReset());
    }

    @Test
@@ -364,23 +355,11 @@ public class RescuePartyTest {
    @Test
    public void testIsAttemptingFactoryReset() {
        for (int i = 0; i < LEVEL_FACTORY_RESET; i++) {
            noteBoot();
            noteBoot(i + 1);
        }
        assertTrue(RescueParty.isAttemptingFactoryReset());
    }

    @Test
    public void testOnSettingsProviderPublishedExecutesRescueLevels() {
        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(1));

        RescueParty.onSettingsProviderPublished(mMockContext);

        verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_DEFAULTS, /*resetNamespaces=*/ null,
                /*configResetVerifiedTimesMap=*/ null);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
    }

    @Test
    public void testNativeRescuePartyResets() {
        doReturn(true).when(() -> SettingsToPropertiesMapper.isNativeFlagsResetPerformed());
@@ -425,7 +404,7 @@ public class RescuePartyTest {
        SystemProperties.set(PROP_DISABLE_FACTORY_RESET_FLAG, Boolean.toString(true));

        for (int i = 0; i < LEVEL_FACTORY_RESET; i++) {
            noteBoot();
            noteBoot(i + 1);
        }
        assertFalse(RescueParty.isAttemptingFactoryReset());

@@ -463,29 +442,12 @@ public class RescuePartyTest {
    public void testBootLoopLevels() {
        RescuePartyObserver observer = RescuePartyObserver.getInstance(mMockContext);

        /*
         Ensure that the returned user impact corresponds with the user impact of the next available
         rescue level, not the current one.
         */
        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                RescueParty.LEVEL_NONE));
        assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_LOW);

        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS));
        assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_LOW);

        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES));
        assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_HIGH);

        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                RescueParty.LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS));
        assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_HIGH);

        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                LEVEL_FACTORY_RESET));
        assertEquals(observer.onBootLoop(), PackageHealthObserverImpact.USER_IMPACT_HIGH);
        assertEquals(observer.onBootLoop(0), PackageHealthObserverImpact.USER_IMPACT_NONE);
        assertEquals(observer.onBootLoop(1), PackageHealthObserverImpact.USER_IMPACT_LOW);
        assertEquals(observer.onBootLoop(2), PackageHealthObserverImpact.USER_IMPACT_LOW);
        assertEquals(observer.onBootLoop(3), PackageHealthObserverImpact.USER_IMPACT_HIGH);
        assertEquals(observer.onBootLoop(4), PackageHealthObserverImpact.USER_IMPACT_HIGH);
        assertEquals(observer.onBootLoop(5), PackageHealthObserverImpact.USER_IMPACT_HIGH);
    }

    private void verifySettingsResets(int resetMode, String[] resetNamespaces,
@@ -513,8 +475,8 @@ public class RescuePartyTest {
        }
    }

    private void noteBoot() {
        RescuePartyObserver.getInstance(mMockContext).executeBootLoopMitigation();
    private void noteBoot(int mitigationCount) {
        RescuePartyObserver.getInstance(mMockContext).executeBootLoopMitigation(mitigationCount);
    }

    private void notePersistentAppCrash(int mitigationCount) {
+29 −2
Original line number Diff line number Diff line
@@ -1063,6 +1063,31 @@ public class PackageWatchdogTest {
        assertThat(bootObserver2.mitigatedBootLoop()).isFalse();
    }

    /**
     * Ensure that the correct mitigation counts are sent to the boot loop observer.
     */
    @Test
    public void testMultipleBootLoopMitigation() {
        PackageWatchdog watchdog = createWatchdog();
        TestObserver bootObserver = new TestObserver(OBSERVER_NAME_1);
        watchdog.registerHealthObserver(bootObserver);
        for (int i = 0; i < 4; i++) {
            for (int j = 0; j < PackageWatchdog.DEFAULT_BOOT_LOOP_TRIGGER_COUNT; j++) {
                watchdog.noteBoot();
            }
        }

        moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_DEESCALATION_WINDOW_MS + 1);

        for (int i = 0; i < 4; i++) {
            for (int j = 0; j < PackageWatchdog.DEFAULT_BOOT_LOOP_TRIGGER_COUNT; j++) {
                watchdog.noteBoot();
            }
        }

        assertThat(bootObserver.mBootMitigationCounts).isEqualTo(List.of(1, 2, 3, 4, 1, 2, 3, 4));
    }

    /**
     * Ensure that passing a null list of failed packages does not cause any mitigation logic to
     * execute.
@@ -1267,6 +1292,7 @@ public class PackageWatchdogTest {
        final List<String> mHealthCheckFailedPackages = new ArrayList<>();
        final List<String> mMitigatedPackages = new ArrayList<>();
        final List<Integer> mMitigationCounts = new ArrayList<>();
        final List<Integer> mBootMitigationCounts = new ArrayList<>();

        TestObserver(String name) {
            mName = name;
@@ -1304,12 +1330,13 @@ public class PackageWatchdogTest {
            return mMayObservePackages;
        }

        public int onBootLoop() {
        public int onBootLoop(int level) {
            return mImpact;
        }

        public boolean executeBootLoopMitigation() {
        public boolean executeBootLoopMitigation(int level) {
            mMitigatedBootLoop = true;
            mBootMitigationCounts.add(level);
            return true;
        }