Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fd2f6873 authored by Gavin Corkery's avatar Gavin Corkery Committed by Android (Google) Code Review
Browse files

Merge "Integrate Rescue Party boot loop logic to Package Watchdog"

parents 1175985e aa57ef3e
Loading
Loading
Loading
Loading
+115 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ import android.net.ConnectivityModuleConnector;
import android.os.Environment;
import android.os.Handler;
import android.os.Looper;
import android.os.Process;
import android.os.SystemProperties;
import android.provider.DeviceConfig;
import android.text.TextUtils;
@@ -36,6 +37,7 @@ import android.util.ArrayMap;
import android.util.ArraySet;
import android.util.AtomicFile;
import android.util.LongArrayQueue;
import android.util.MathUtils;
import android.util.Slog;
import android.util.Xml;

@@ -117,6 +119,12 @@ public class PackageWatchdog {
    // Whether explicit health checks are enabled or not
    private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true;

    @VisibleForTesting
    static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5;
    static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10);
    private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
    private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";

    private long mNumberOfNativeCrashPollsRemaining;

    private static final int DB_VERSION = 1;
@@ -152,6 +160,7 @@ public class PackageWatchdog {
    private final Runnable mSyncStateWithScheduledReason = this::syncStateWithScheduledReason;
    private final Runnable mSaveToFile = this::saveToFile;
    private final SystemClock mSystemClock;
    private final BootThreshold mBootThreshold;
    @GuardedBy("mLock")
    private boolean mIsPackagesReady;
    // Flag to control whether explicit health checks are supported or not
@@ -169,6 +178,7 @@ public class PackageWatchdog {
    @FunctionalInterface
    @VisibleForTesting
    interface SystemClock {
        // TODO: Add elapsedRealtime to this interface
        long uptimeMillis();
    }

@@ -198,6 +208,8 @@ public class PackageWatchdog {
        mConnectivityModuleConnector = connectivityModuleConnector;
        mSystemClock = clock;
        mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS;
        mBootThreshold = new BootThreshold(DEFAULT_BOOT_LOOP_TRIGGER_COUNT,
                DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS);
        loadFromFile();
        sPackageWatchdog = this;
    }
@@ -411,6 +423,35 @@ public class PackageWatchdog {
        }
    }

    /**
     * Called when the system server boots. If the system server is detected to be in a boot loop,
     * query each observer and perform the mitigation action with the lowest user impact.
     */
    public void noteBoot() {
        synchronized (mLock) {
            if (mBootThreshold.incrementAndTest()) {
                mBootThreshold.reset();
                PackageHealthObserver currentObserverToNotify = null;
                int currentObserverImpact = Integer.MAX_VALUE;
                for (int i = 0; i < mAllObservers.size(); i++) {
                    final ObserverInternal observer = mAllObservers.valueAt(i);
                    PackageHealthObserver registeredObserver = observer.registeredObserver;
                    if (registeredObserver != null) {
                        int impact = registeredObserver.onBootLoop();
                        if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
                                && impact < currentObserverImpact) {
                            currentObserverToNotify = registeredObserver;
                            currentObserverImpact = impact;
                        }
                    }
                }
                if (currentObserverToNotify != null) {
                    currentObserverToNotify.executeBootLoopMitigation();
                }
            }
        }
    }

    // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also
    // avoid holding lock?
    // This currently adds about 7ms extra to shutdown thread
@@ -519,6 +560,22 @@ public class PackageWatchdog {
        boolean execute(@Nullable VersionedPackage versionedPackage,
                @FailureReasons int failureReason);


        /**
         * Called when the system server has booted several times within a window of time, defined
         * by {@link #mBootThreshold}
         */
        default @PackageHealthObserverImpact int onBootLoop() {
            return PackageHealthObserverImpact.USER_IMPACT_NONE;
        }

        /**
         * Executes mitigation for {@link #onBootLoop}
         */
        default boolean executeBootLoopMitigation() {
            return false;
        }

        // TODO(b/120598832): Ensure uniqueness?
        /**
         * Identifier for the observer, should not change across device updates otherwise the
@@ -1367,4 +1424,62 @@ public class PackageWatchdog {
            return value > 0 ? value : Long.MAX_VALUE;
        }
    }

    /**
     * Handles the thresholding logic for system server boots.
     */
    static class BootThreshold {

        private final int mBootTriggerCount;
        private final long mTriggerWindow;

        BootThreshold(int bootTriggerCount, long triggerWindow) {
            this.mBootTriggerCount = bootTriggerCount;
            this.mTriggerWindow = triggerWindow;
        }

        public void reset() {
            setStart(0);
            setCount(0);
        }

        private int getCount() {
            return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
        }

        private void setCount(int count) {
            SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
        }

        public long getStart() {
            return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
        }

        public void setStart(long start) {
            final long now = android.os.SystemClock.elapsedRealtime();
            final long newStart = MathUtils.constrain(start, 0, now);
            SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(newStart));
        }

        /** Increments the boot counter, and returns whether the device is bootlooping. */
        public boolean incrementAndTest() {
            final long now = android.os.SystemClock.elapsedRealtime();
            if (now - getStart() < 0) {
                Slog.e(TAG, "Window was less than zero. Resetting start to current time.");
                setStart(now);
            }
            final long window = now - getStart();
            if (window >= mTriggerWindow) {
                setCount(1);
                setStart(now);
                return false;
            } else {
                int count = getCount() + 1;
                setCount(count);
                EventLogTags.writeRescueNote(Process.ROOT_UID, count, window);
                return count >= mBootTriggerCount;
            }
        }

    }
}
+22 −107
Original line number Diff line number Diff line
@@ -27,17 +27,16 @@ import android.content.pm.VersionedPackage;
import android.os.Build;
import android.os.Environment;
import android.os.FileUtils;
import android.os.Process;
import android.os.RecoverySystem;
import android.os.SystemClock;
import android.os.SystemProperties;
import android.os.UserHandle;
import android.provider.Settings;
import android.text.format.DateUtils;
import android.util.ExceptionUtils;
import android.util.Log;
import android.util.MathUtils;
import android.util.Slog;
import android.util.SparseArray;
import android.util.StatsLog;

import com.android.internal.annotations.GuardedBy;
@@ -80,12 +79,6 @@ public class RescueParty {
    static final int LEVEL_FACTORY_RESET = 4;
    @VisibleForTesting
    static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
    /**
     * The boot trigger window size must always be greater than Watchdog's deadlock timeout
     * {@link Watchdog#DEFAULT_TIMEOUT}.
     */
    @VisibleForTesting
    static final long BOOT_TRIGGER_WINDOW_MILLIS = 600 * DateUtils.SECOND_IN_MILLIS;
    @VisibleForTesting
    static final String TAG = "RescueParty";

@@ -93,18 +86,11 @@ public class RescueParty {


    private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
    private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
    private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";

    private static final int PERSISTENT_MASK = ApplicationInfo.FLAG_PERSISTENT
            | ApplicationInfo.FLAG_SYSTEM;


    /** Threshold for boot loops */
    private static final Threshold sBoot = new BootThreshold();
    /** Threshold for app crash loops */
    private static SparseArray<Threshold> sApps = new SparseArray<>();

    /** Register the Rescue Party observer as a Package Watchdog health observer */
    public static void registerHealthObserver(Context context) {
        PackageWatchdog.getInstance(context).registerHealthObserver(
@@ -140,19 +126,6 @@ public class RescueParty {
        return false;
    }

    /**
     * Take note of a boot event. If we notice too many of these events
     * happening in rapid succession, we'll send out a rescue party.
     */
    public static void noteBoot(Context context) {
        if (isDisabled()) return;
        if (sBoot.incrementAndTest()) {
            sBoot.reset();
            incrementRescueLevel(sBoot.uid);
            executeRescueLevel(context);
        }
    }

    /**
     * Check if we're currently attempting to reboot for a factory reset.
     */
@@ -169,11 +142,6 @@ public class RescueParty {
        executeRescueLevel(context);
    }

    @VisibleForTesting
    static void resetAllThresholds() {
        sBoot.reset();
    }

    @VisibleForTesting
    static long getElapsedRealtime() {
        return SystemClock.elapsedRealtime();
@@ -186,6 +154,14 @@ public class RescueParty {
        }
    }

    /**
     * Get the current rescue level.
     */
    private static int getRescueLevel() {
        return MathUtils.constrain(SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE),
                LEVEL_NONE, LEVEL_FACTORY_RESET);
    }

    /**
     * Escalate to the next rescue level. After incrementing the level you'll
     * probably want to call {@link #executeRescueLevel(Context)}.
@@ -366,87 +342,26 @@ public class RescueParty {
        }

        @Override
        public String getName() {
            return NAME;
        }
    }

    /**
     * Threshold that can be triggered if a number of events occur within a
     * window of time.
     */
    private abstract static class Threshold {
        public abstract int getCount();
        public abstract void setCount(int count);
        public abstract long getStart();
        public abstract void setStart(long start);

        private final int uid;
        private final int triggerCount;
        private final long triggerWindow;

        public Threshold(int uid, int triggerCount, long triggerWindow) {
            this.uid = uid;
            this.triggerCount = triggerCount;
            this.triggerWindow = triggerWindow;
        }

        public void reset() {
            setCount(0);
            setStart(0);
        }

        /**
         * @return if this threshold has been triggered
         */
        public boolean incrementAndTest() {
            final long now = getElapsedRealtime();
            final long window = now - getStart();
            if (window > triggerWindow) {
                setCount(1);
                setStart(now);
                return false;
            } else {
                int count = getCount() + 1;
                setCount(count);
                EventLogTags.writeRescueNote(uid, count, window);
                Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
                        + (window / 1000) + " sec");
                return (count >= triggerCount);
            }
        }
    }

    /**
     * Specialization of {@link Threshold} for monitoring boot events. It stores
     * counters in system properties for robustness.
     */
    private static class BootThreshold extends Threshold {
        public BootThreshold() {
            // We're interested in TRIGGER_COUNT events in any
            // BOOT_TRIGGER_WINDOW_MILLIS second period; this window is super relaxed because
            // booting can take a long time if forced to dexopt things.
            super(android.os.Process.ROOT_UID, TRIGGER_COUNT, BOOT_TRIGGER_WINDOW_MILLIS);
        public int onBootLoop() {
            if (isDisabled()) {
                return PackageHealthObserverImpact.USER_IMPACT_NONE;
            }

        @Override
        public int getCount() {
            return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
            return mapRescueLevelToUserImpact(getRescueLevel());
        }

        @Override
        public void setCount(int count) {
            SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
        public boolean executeBootLoopMitigation() {
            if (isDisabled()) {
                return false;
            }

        @Override
        public long getStart() {
            return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
            incrementRescueLevel(Process.ROOT_UID);
            executeRescueLevel(mContext);
            return true;
        }

        @Override
        public void setStart(long start) {
            SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
        public String getName() {
            return NAME;
        }
    }

+1 −1
Original line number Diff line number Diff line
@@ -758,7 +758,7 @@ public final class SystemServer {
        // note that we just booted, which might send out a rescue party if
        // we're stuck in a runtime restart loop.
        RescueParty.registerHealthObserver(mSystemContext);
        RescueParty.noteBoot(mSystemContext);
        PackageWatchdog.getInstance(mSystemContext).noteBoot();

        // Manages LEDs and display backlight so we need it to bring up the display.
        t.traceBegin("StartLightsService");
+14 −49
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ import static com.android.dx.mockito.inline.extended.ExtendedMockito.doAnswer;
import static com.android.dx.mockito.inline.extended.ExtendedMockito.doReturn;
import static com.android.dx.mockito.inline.extended.ExtendedMockito.verify;
import static com.android.dx.mockito.inline.extended.ExtendedMockito.when;
import static com.android.server.RescueParty.LEVEL_FACTORY_RESET;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -144,7 +145,6 @@ public class RescuePartyTest {


        doReturn(CURRENT_NETWORK_TIME_MILLIS).when(() -> RescueParty.getElapsedRealtime());
        RescueParty.resetAllThresholds();
        FlagNamespaceUtils.resetKnownResetNamespacesFlagCounterForTest();

        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL,
@@ -160,28 +160,28 @@ public class RescuePartyTest {

    @Test
    public void testBootLoopDetectionWithExecutionForAllRescueLevels() {
        noteBoot(RescueParty.TRIGGER_COUNT);
        noteBoot();

        verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));

        noteBoot(RescueParty.TRIGGER_COUNT);
        noteBoot();

        verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_CHANGES);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));

        noteBoot(RescueParty.TRIGGER_COUNT);
        noteBoot();

        verifySettingsResets(Settings.RESET_MODE_TRUSTED_DEFAULTS);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));

        noteBoot(RescueParty.TRIGGER_COUNT);
        noteBoot();

        verify(() -> RecoverySystem.rebootPromptAndWipeUserData(mMockContext, RescueParty.TAG));
        assertEquals(RescueParty.LEVEL_FACTORY_RESET,
        assertEquals(LEVEL_FACTORY_RESET,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
    }

@@ -208,48 +208,15 @@ public class RescuePartyTest {
        notePersistentAppCrash();

        verify(() -> RecoverySystem.rebootPromptAndWipeUserData(mMockContext, RescueParty.TAG));
        assertEquals(RescueParty.LEVEL_FACTORY_RESET,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
    }

    @Test
    public void testBootLoopDetectionWithWrongInterval() {
        noteBoot(RescueParty.TRIGGER_COUNT - 1);

        // last boot is just outside of the boot loop detection window
        doReturn(CURRENT_NETWORK_TIME_MILLIS + RescueParty.BOOT_TRIGGER_WINDOW_MILLIS + 1).when(
                () -> RescueParty.getElapsedRealtime());
        noteBoot(/*numTimes=*/1);

        assertEquals(RescueParty.LEVEL_NONE,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
    }

    @Test
    public void testBootLoopDetectionWithProperInterval() {
        noteBoot(RescueParty.TRIGGER_COUNT - 1);

        // last boot is just inside of the boot loop detection window
        doReturn(CURRENT_NETWORK_TIME_MILLIS + RescueParty.BOOT_TRIGGER_WINDOW_MILLIS).when(
                () -> RescueParty.getElapsedRealtime());
        noteBoot(/*numTimes=*/1);

        verifySettingsResets(Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
        assertEquals(RescueParty.LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
    }

    @Test
    public void testBootLoopDetectionWithWrongTriggerCount() {
        noteBoot(RescueParty.TRIGGER_COUNT - 1);
        assertEquals(RescueParty.LEVEL_NONE,
        assertEquals(LEVEL_FACTORY_RESET,
                SystemProperties.getInt(RescueParty.PROP_RESCUE_LEVEL, RescueParty.LEVEL_NONE));
    }

    @Test
    public void testIsAttemptingFactoryReset() {
        noteBoot(RescueParty.TRIGGER_COUNT * 4);

        for (int i = 0; i < LEVEL_FACTORY_RESET; i++) {
            noteBoot();
        }
        verify(() -> RecoverySystem.rebootPromptAndWipeUserData(mMockContext, RescueParty.TAG));
        assertTrue(RescueParty.isAttemptingFactoryReset());
    }
@@ -306,7 +273,7 @@ public class RescuePartyTest {

        // Ensure that no action is taken for cases where the failure reason is unknown
        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                RescueParty.LEVEL_FACTORY_RESET));
                LEVEL_FACTORY_RESET));
        assertEquals(observer.onHealthCheckFailed(null, PackageWatchdog.FAILURE_REASON_UNKNOWN),
                PackageHealthObserverImpact.USER_IMPACT_NONE);

@@ -342,7 +309,7 @@ public class RescuePartyTest {


        SystemProperties.set(RescueParty.PROP_RESCUE_LEVEL, Integer.toString(
                RescueParty.LEVEL_FACTORY_RESET));
                LEVEL_FACTORY_RESET));
        assertEquals(observer.onHealthCheckFailed(null,
                PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING),
                PackageHealthObserverImpact.USER_IMPACT_HIGH);
@@ -366,10 +333,8 @@ public class RescuePartyTest {
                eq(resetMode), anyInt()));
    }

    private void noteBoot(int numTimes) {
        for (int i = 0; i < numTimes; i++) {
            RescueParty.noteBoot(mMockContext);
        }
    private void noteBoot() {
        RescuePartyObserver.getInstance(mMockContext).executeBootLoopMitigation();
    }

    private void notePersistentAppCrash() {
+113 −0

File changed.

Preview size limit exceeded, changes collapsed.