Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f707567c authored by Gavin Corkery's avatar Gavin Corkery Committed by Android (Google) Code Review
Browse files

Merge "Move native crash detection to Package Watchdog"

parents b36c04ad f9b3fd49
Loading
Loading
Loading
Loading
+107 −24
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ import android.net.ConnectivityModuleConnector;
import android.os.Environment;
import android.os.Handler;
import android.os.Looper;
import android.os.SystemProperties;
import android.provider.DeviceConfig;
import android.text.TextUtils;
import android.util.ArrayMap;
@@ -82,6 +83,12 @@ public class PackageWatchdog {
    static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED =
            "watchdog_explicit_health_check_enabled";

    // TODO: make the following values configurable via DeviceConfig
    private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS =
            TimeUnit.SECONDS.toMillis(30);
    private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10;


    public static final int FAILURE_REASON_UNKNOWN = 0;
    public static final int FAILURE_REASON_NATIVE_CRASH = 1;
    public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2;
@@ -110,6 +117,8 @@ public class PackageWatchdog {
    // Whether explicit health checks are enabled or not
    private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true;

    private long mNumberOfNativeCrashPollsRemaining;

    private static final int DB_VERSION = 1;
    private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog";
    private static final String TAG_PACKAGE = "package";
@@ -188,6 +197,7 @@ public class PackageWatchdog {
        mHealthCheckController = controller;
        mConnectivityModuleConnector = connectivityModuleConnector;
        mSystemClock = clock;
        mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS;
        loadFromFile();
    }

@@ -337,6 +347,9 @@ public class PackageWatchdog {
                    return;
                }

                if (failureReason == FAILURE_REASON_NATIVE_CRASH) {
                    handleFailureImmediately(packages, failureReason);
                } else {
                    for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
                        VersionedPackage versionedPackage = packages.get(pIndex);
                        // Observer that will receive failure for versionedPackage
@@ -350,7 +363,8 @@ public class PackageWatchdog {
                            if (registeredObserver != null
                                    && observer.onPackageFailureLocked(
                                    versionedPackage.getPackageName())) {
                            int impact = registeredObserver.onHealthCheckFailed(versionedPackage);
                                int impact = registeredObserver.onHealthCheckFailed(
                                        versionedPackage, failureReason);
                                if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
                                        && impact < currentObserverImpact) {
                                    currentObserverToNotify = registeredObserver;
@@ -365,9 +379,36 @@ public class PackageWatchdog {
                        }
                    }
                }
            }
        });
    }

    /**
     * For native crashes, call directly into each observer to mitigate the error without going
     * through failure threshold logic.
     */
    private void handleFailureImmediately(List<VersionedPackage> packages,
            @FailureReasons int failureReason) {
        VersionedPackage failingPackage = packages.size() > 0 ? packages.get(0) : null;
        PackageHealthObserver currentObserverToNotify = null;
        int currentObserverImpact = Integer.MAX_VALUE;
        for (ObserverInternal observer: mAllObservers.values()) {
            PackageHealthObserver registeredObserver = observer.registeredObserver;
            if (registeredObserver != null) {
                int impact = registeredObserver.onHealthCheckFailed(
                        failingPackage, failureReason);
                if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
                        && impact < currentObserverImpact) {
                    currentObserverToNotify = registeredObserver;
                    currentObserverImpact = impact;
                }
            }
        }
        if (currentObserverToNotify != null) {
            currentObserverToNotify.execute(failingPackage,  failureReason);
        }
    }

    // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also
    // avoid holding lock?
    // This currently adds about 7ms extra to shutdown thread
@@ -400,6 +441,37 @@ public class PackageWatchdog {
        }
    }

    /**
     * This method should be only called on mShortTaskHandler, since it modifies
     * {@link #mNumberOfNativeCrashPollsRemaining}.
     */
    private void checkAndMitigateNativeCrashes() {
        mNumberOfNativeCrashPollsRemaining--;
        // Check if native watchdog reported a crash
        if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) {
            // We rollback everything available when crash is unattributable
            onPackageFailure(Collections.EMPTY_LIST, FAILURE_REASON_NATIVE_CRASH);
            // we stop polling after an attempt to execute rollback, regardless of whether the
            // attempt succeeds or not
        } else {
            if (mNumberOfNativeCrashPollsRemaining > 0) {
                mShortTaskHandler.postDelayed(() -> checkAndMitigateNativeCrashes(),
                        NATIVE_CRASH_POLLING_INTERVAL_MILLIS);
            }
        }
    }

    /**
     * Since this method can eventually trigger a rollback, it should be called
     * only once boot has completed {@code onBootCompleted} and not earlier, because the install
     * session must be entirely completed before we try to rollback.
     */
    public void scheduleCheckAndMitigateNativeCrashes() {
        Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check "
                + "and mitigate native crashes");
        mShortTaskHandler.post(()->checkAndMitigateNativeCrashes());
    }

    /** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */
    @Retention(SOURCE)
    @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_NONE,
@@ -422,17 +494,28 @@ public class PackageWatchdog {
        /**
         * Called when health check fails for the {@code versionedPackage}.
         *
         * @param versionedPackage the package that is failing. This may be null if a native
         *                          service is crashing.
         * @param failureReason   the type of failure that is occurring.
         *
         *
         * @return any one of {@link PackageHealthObserverImpact} to express the impact
         * to the user on {@link #execute}
         */
        @PackageHealthObserverImpact int onHealthCheckFailed(VersionedPackage versionedPackage);
        @PackageHealthObserverImpact int onHealthCheckFailed(
                @Nullable VersionedPackage versionedPackage,
                @FailureReasons int failureReason);

        /**
         * Executes mitigation for {@link #onHealthCheckFailed}.
         *
         * @param versionedPackage the package that is failing. This may be null if a native
         *                          service is crashing.
         * @param failureReason   the type of failure that is occurring.
         * @return {@code true} if action was executed successfully, {@code false} otherwise
         */
        boolean execute(VersionedPackage versionedPackage, @FailureReasons int failureReason);
        boolean execute(@Nullable VersionedPackage versionedPackage,
                @FailureReasons int failureReason);

        // TODO(b/120598832): Ensure uniqueness?
        /**
+17 −40
Original line number Diff line number Diff line
@@ -61,7 +61,6 @@ import java.io.PrintWriter;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
 * {@link PackageHealthObserver} for {@link RollbackManagerService}.
@@ -74,10 +73,6 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
    private static final String TAG = "RollbackPackageHealthObserver";
    private static final String NAME = "rollback-observer";
    private static final int INVALID_ROLLBACK_ID = -1;
    // TODO: make the following values configurable via DeviceConfig
    private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS =
            TimeUnit.SECONDS.toMillis(30);
    private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10;

    private final Context mContext;
    private final Handler mHandler;
@@ -85,13 +80,9 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
    // Staged rollback ids that have been committed but their session is not yet ready
    @GuardedBy("mPendingStagedRollbackIds")
    private final Set<Integer> mPendingStagedRollbackIds = new ArraySet<>();
    // this field is initialized in the c'tor and then only accessed from mHandler thread, so
    // no need to guard with a lock
    private long mNumberOfNativeCrashPollsRemaining;

    RollbackPackageHealthObserver(Context context) {
        mContext = context;
        mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS;
        HandlerThread handlerThread = new HandlerThread("RollbackPackageHealthObserver");
        handlerThread.start();
        mHandler = handlerThread.getThreadHandler();
@@ -102,7 +93,14 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
    }

    @Override
    public int onHealthCheckFailed(VersionedPackage failedPackage) {
    public int onHealthCheckFailed(@Nullable VersionedPackage failedPackage,
            @FailureReasons int failureReason) {
        // For native crashes, we will roll back any available rollbacks
        if (failureReason == PackageWatchdog.FAILURE_REASON_NATIVE_CRASH
                && !mContext.getSystemService(RollbackManager.class)
                .getAvailableRollbacks().isEmpty()) {
            return PackageHealthObserverImpact.USER_IMPACT_MEDIUM;
        }
        if (getAvailableRollback(failedPackage) == null) {
            // Don't handle the notification, no rollbacks available for the package
            return PackageHealthObserverImpact.USER_IMPACT_NONE;
@@ -113,7 +111,13 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
    }

    @Override
    public boolean execute(VersionedPackage failedPackage, @FailureReasons int rollbackReason) {
    public boolean execute(@Nullable VersionedPackage failedPackage,
            @FailureReasons int rollbackReason) {
        if (rollbackReason == PackageWatchdog.FAILURE_REASON_NATIVE_CRASH) {
            rollbackAll();
            return true;
        }

        RollbackInfo rollback = getAvailableRollback(failedPackage);
        if (rollback == null) {
            Slog.w(TAG, "Expected rollback but no valid rollback found for package: [ "
@@ -152,7 +156,8 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
        String moduleMetadataPackageName = getModuleMetadataPackageName();

        if (!rollbackManager.getAvailableRollbacks().isEmpty()) {
            scheduleCheckAndMitigateNativeCrashes();
            // TODO(gavincorkery): Call into Package Watchdog from outside the observer
            PackageWatchdog.getInstance(mContext).scheduleCheckAndMitigateNativeCrashes();
        }

        int rollbackId = popLastStagedRollbackId();
@@ -343,24 +348,6 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
        }
    }

    /**
     * This method should be only called on mHandler thread, since it modifies
     * {@link #mNumberOfNativeCrashPollsRemaining} and we want to keep this class lock free.
     */
    private void checkAndMitigateNativeCrashes() {
        mNumberOfNativeCrashPollsRemaining--;
        // Check if native watchdog reported a crash
        if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) {
            rollbackAll();
            // we stop polling after an attempt to execute rollback, regardless of whether the
            // attempt succeeds or not
        } else {
            if (mNumberOfNativeCrashPollsRemaining > 0) {
                mHandler.postDelayed(() -> checkAndMitigateNativeCrashes(),
                        NATIVE_CRASH_POLLING_INTERVAL_MILLIS);
            }
        }
    }

    /**
     * Returns true if the package name is the name of a module.
@@ -456,16 +443,6 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve
        }
    }

    /**
     * Since this method can eventually trigger a RollbackManager rollback, it should be called
     * only once boot has completed {@code onBootCompleted} and not earlier, because the install
     * session must be entirely completed before we try to rollback.
     */
    private void scheduleCheckAndMitigateNativeCrashes() {
        Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check "
                + "and mitigate native crashes");
        mHandler.post(()->checkAndMitigateNativeCrashes());
    }

    private int mapFailureReasonToMetric(@FailureReasons int failureReason) {
        switch (failureReason) {
+3 −2
Original line number Diff line number Diff line
@@ -328,7 +328,8 @@ public class PackageWatchdogTest {
        long differentVersionCode = 2L;
        TestObserver observer = new TestObserver(OBSERVER_NAME_1) {
                @Override
                public int onHealthCheckFailed(VersionedPackage versionedPackage) {
                public int onHealthCheckFailed(VersionedPackage versionedPackage,
                        int failureReason) {
                    if (versionedPackage.getVersionCode() == VERSION_CODE) {
                        // Only rollback for specific versionCode
                        return PackageHealthObserverImpact.USER_IMPACT_MEDIUM;
@@ -1012,7 +1013,7 @@ public class PackageWatchdogTest {
            mImpact = impact;
        }

        public int onHealthCheckFailed(VersionedPackage versionedPackage) {
        public int onHealthCheckFailed(VersionedPackage versionedPackage, int failureReason) {
            mHealthCheckFailedPackages.add(versionedPackage.getPackageName());
            return mImpact;
        }