Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3b224617 authored by Varun Shah's avatar Varun Shah Committed by Android (Google) Code Review
Browse files

Merge "Introduce a process crash count tolerence limit."

parents 012eb240 75df622b
Loading
Loading
Loading
Loading
+32 −0
Original line number Diff line number Diff line
@@ -91,6 +91,9 @@ final class ActivityManagerConstants extends ContentObserver {
    static final String KEY_TOP_TO_FGS_GRACE_DURATION = "top_to_fgs_grace_duration";
    static final String KEY_PENDINGINTENT_WARNING_THRESHOLD = "pendingintent_warning_threshold";
    static final String KEY_MIN_CRASH_INTERVAL = "min_crash_interval";
    static final String KEY_PROCESS_CRASH_COUNT_RESET_INTERVAL =
            "process_crash_count_reset_interval";
    static final String KEY_PROCESS_CRASH_COUNT_LIMIT = "process_crash_count_limit";

    private static final int DEFAULT_MAX_CACHED_PROCESSES = 32;
    private static final long DEFAULT_BACKGROUND_SETTLE_TIME = 60*1000;
@@ -126,6 +129,8 @@ final class ActivityManagerConstants extends ContentObserver {
    private static final int DEFAULT_PENDINGINTENT_WARNING_THRESHOLD = 2000;
    private static final int DEFAULT_MIN_CRASH_INTERVAL = 2 * 60 * 1000;
    private static final int DEFAULT_MAX_PHANTOM_PROCESSES = 32;
    private static final int DEFAULT_PROCESS_CRASH_COUNT_RESET_INTERVAL = 12 * 60 * 60 * 1000;
    private static final int DEFAULT_PROCESS_CRASH_COUNT_LIMIT = 12;


    // Flag stored in the DeviceConfig API.
@@ -301,9 +306,27 @@ final class ActivityManagerConstants extends ContentObserver {
    /**
     * The minimum time we allow between crashes, for us to consider this
     * application to be bad and stop its services and reject broadcasts.
     * A reasonable interval here would be anything between 1-3 minutes.
     */
    public static int MIN_CRASH_INTERVAL = DEFAULT_MIN_CRASH_INTERVAL;

    /**
     * We will allow for a maximum number of {@link PROCESS_CRASH_COUNT_LIMIT} crashes within this
     * time period before we consider the application to be bad and stop services and reject
     * broadcasts.
     * A reasonable reset interval here would be anything between 10-20 hours along with a crash
     * count limit of 10-20 crashes.
     */
    static long PROCESS_CRASH_COUNT_RESET_INTERVAL = DEFAULT_PROCESS_CRASH_COUNT_RESET_INTERVAL;

    /**
     * The maximum number of crashes allowed within {@link PROCESS_CRASH_COUNT_RESET_INTERVAL_MS}
     * before we consider the application to be bad and stop services and reject broadcasts.
     * A reasonable crash count limit here would be anything between 10-20 crashes along with a
     * reset interval of 10-20 hours.
     */
    static int PROCESS_CRASH_COUNT_LIMIT = DEFAULT_PROCESS_CRASH_COUNT_LIMIT;

    // Indicates whether the activity starts logging is enabled.
    // Controlled by Settings.Global.ACTIVITY_STARTS_LOGGING_ENABLED
    volatile boolean mFlagActivityStartsLoggingEnabled;
@@ -694,6 +717,11 @@ final class ActivityManagerConstants extends ContentObserver {
                    DEFAULT_MIN_CRASH_INTERVAL);
            PENDINGINTENT_WARNING_THRESHOLD = mParser.getInt(KEY_PENDINGINTENT_WARNING_THRESHOLD,
                    DEFAULT_PENDINGINTENT_WARNING_THRESHOLD);
            PROCESS_CRASH_COUNT_RESET_INTERVAL = mParser.getInt(
                    KEY_PROCESS_CRASH_COUNT_RESET_INTERVAL,
                    DEFAULT_PROCESS_CRASH_COUNT_RESET_INTERVAL);
            PROCESS_CRASH_COUNT_LIMIT = mParser.getInt(KEY_PROCESS_CRASH_COUNT_LIMIT,
                    DEFAULT_PROCESS_CRASH_COUNT_LIMIT);

            if (POWER_CHECK_INTERVAL != currentPowerCheckInterval) {
                mService.mHandler.removeMessages(
@@ -934,6 +962,10 @@ final class ActivityManagerConstants extends ContentObserver {
        pw.println(TOP_TO_FGS_GRACE_DURATION);
        pw.print("  "); pw.print(KEY_MIN_CRASH_INTERVAL); pw.print("=");
        pw.println(MIN_CRASH_INTERVAL);
        pw.print("  "); pw.print(KEY_PROCESS_CRASH_COUNT_RESET_INTERVAL); pw.print("=");
        pw.println(PROCESS_CRASH_COUNT_RESET_INTERVAL);
        pw.print("  "); pw.print(KEY_PROCESS_CRASH_COUNT_LIMIT); pw.print("=");
        pw.println(PROCESS_CRASH_COUNT_LIMIT);
        pw.print("  "); pw.print(KEY_IMPERCEPTIBLE_KILL_EXEMPT_PROC_STATES); pw.print("=");
        pw.println(Arrays.toString(IMPERCEPTIBLE_KILL_EXEMPT_PROC_STATES.toArray()));
        pw.print("  "); pw.print(KEY_IMPERCEPTIBLE_KILL_EXEMPT_PACKAGES); pw.print("=");
+106 −34
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@ package com.android.server.am;
import static android.app.ActivityTaskManager.INVALID_TASK_ID;
import static android.content.pm.ApplicationInfo.FLAG_SYSTEM;

import static com.android.server.am.ActivityManagerConstants.PROCESS_CRASH_COUNT_LIMIT;
import static com.android.server.am.ActivityManagerConstants.PROCESS_CRASH_COUNT_RESET_INTERVAL;
import static com.android.server.am.ActivityManagerDebugConfig.TAG_AM;
import static com.android.server.am.ActivityManagerDebugConfig.TAG_WITH_CLASS_NAME;
import static com.android.server.am.ActivityManagerService.MY_PID;
@@ -44,6 +46,7 @@ import android.provider.Settings;
import android.util.ArrayMap;
import android.util.ArraySet;
import android.util.EventLog;
import android.util.Pair;
import android.util.Slog;
import android.util.SparseArray;
import android.util.TimeUtils;
@@ -89,6 +92,12 @@ class AppErrors {
     */
    private final ProcessMap<Long> mProcessCrashShowDialogTimes = new ProcessMap<>();

    /**
     * A pairing between how many times various processes have crashed since a given time.
     * Entry and exit conditions for this map are similar to mProcessCrashTimes.
     */
    private final ProcessMap<Pair<Long, Integer>> mProcessCrashCounts = new ProcessMap<>();

    /**
     * Set of applications that we consider to be bad, and will reject
     * incoming broadcasts from (which the user has no control over).
@@ -118,6 +127,7 @@ class AppErrors {
        mProcessCrashTimes.clear();
        mProcessCrashTimesPersistent.clear();
        mProcessCrashShowDialogTimes.clear();
        mProcessCrashCounts.clear();
        synchronized (mBadProcesses) {
            mBadProcesses.clear();
        }
@@ -196,9 +206,9 @@ class AppErrors {
    }

    boolean dumpLocked(FileDescriptor fd, PrintWriter pw, boolean needSep, String dumpPackage) {
        final long now = SystemClock.uptimeMillis();
        if (!mProcessCrashTimes.getMap().isEmpty()) {
            boolean printed = false;
            final long now = SystemClock.uptimeMillis();
            final ArrayMap<String, SparseArray<Long>> pmap = mProcessCrashTimes.getMap();
            final int processCount = pmap.size();
            for (int ip = 0; ip < processCount; ip++) {
@@ -227,6 +237,36 @@ class AppErrors {
            }
        }

        if (!mProcessCrashCounts.getMap().isEmpty()) {
            boolean printed = false;
            final ArrayMap<String, SparseArray<Pair<Long, Integer>>> pmap =
                    mProcessCrashCounts.getMap();
            final int processCount = pmap.size();
            for (int ip = 0; ip < processCount; ip++) {
                final String pname = pmap.keyAt(ip);
                final SparseArray<Pair<Long, Integer>> uids = pmap.valueAt(ip);
                final int uidCount = uids.size();
                for (int i = 0; i < uidCount; i++) {
                    final int puid = uids.keyAt(i);
                    final ProcessRecord r = mService.getProcessNames().get(pname, puid);
                    if (dumpPackage != null && (r == null || !r.pkgList.containsKey(dumpPackage))) {
                        continue;
                    }
                    if (!printed) {
                        if (needSep) pw.println();
                        needSep = true;
                        pw.println("  First time processes crashed and counts:");
                        printed = true;
                    }
                    pw.print("    Process "); pw.print(pname);
                    pw.print(" uid "); pw.print(puid);
                    pw.print(": first crashed ");
                    TimeUtils.formatDuration(now - uids.valueAt(i).first, pw);
                    pw.print(" ago; crashes since then: "); pw.println(uids.valueAt(i).second);
                }
            }
        }

        if (!mBadProcesses.getMap().isEmpty()) {
            boolean printed = false;
            final ArrayMap<String, SparseArray<BadProcessInfo>> pmap = mBadProcesses.getMap();
@@ -295,12 +335,32 @@ class AppErrors {

    void resetProcessCrashTimeLocked(final String processName, final int uid) {
        mProcessCrashTimes.remove(processName, uid);
        mProcessCrashCounts.remove(processName, uid);
    }

    void resetProcessCrashTimeLocked(boolean resetEntireUser, int appId, int userId) {
        final ArrayMap<String, SparseArray<Long>> pmap = mProcessCrashTimes.getMap();
        for (int ip = pmap.size() - 1; ip >= 0; ip--) {
            SparseArray<Long> ba = pmap.valueAt(ip);
        final ArrayMap<String, SparseArray<Long>> pTimeMap = mProcessCrashTimes.getMap();
        for (int ip = pTimeMap.size() - 1; ip >= 0; ip--) {
            SparseArray<Long> ba = pTimeMap.valueAt(ip);
            resetProcessCrashMapLocked(ba, resetEntireUser, appId, userId);
            if (ba.size() == 0) {
                pTimeMap.removeAt(ip);
            }
        }

        final ArrayMap<String, SparseArray<Pair<Long, Integer>>> pCountMap =
                                                                    mProcessCrashCounts.getMap();
        for (int ip = pCountMap.size() - 1; ip >= 0; ip--) {
            SparseArray<Pair<Long, Integer>> ba = pCountMap.valueAt(ip);
            resetProcessCrashMapLocked(ba, resetEntireUser, appId, userId);
            if (ba.size() == 0) {
                pCountMap.removeAt(ip);
            }
        }
    }

    private void resetProcessCrashMapLocked(SparseArray<?> ba, boolean resetEntireUser,
            int appId, int userId) {
        for (int i = ba.size() - 1; i >= 0; i--) {
            boolean remove = false;
            final int entUid = ba.keyAt(i);
@@ -321,10 +381,6 @@ class AppErrors {
                ba.removeAt(i);
            }
        }
            if (ba.size() == 0) {
                pmap.removeAt(ip);
            }
        }
    }

    void loadAppsNotReportingCrashesFromConfigLocked(String appsNotReportingCrashesConfig) {
@@ -567,12 +623,6 @@ class AppErrors {
            if (res == AppErrorDialog.FORCE_QUIT_AND_REPORT) {
                appErrorIntent = createAppErrorIntentLocked(r, timeMillis, crashInfo);
            }
            if (r != null && !r.isolated && res != AppErrorDialog.RESTART) {
                // XXX Can't keep track of crash time for isolated processes,
                // since they don't have a persistent identity.
                mProcessCrashTimes.put(r.processName, r.uid,
                        SystemClock.uptimeMillis());
            }
        }

        if (appErrorIntent != null) {
@@ -742,11 +792,14 @@ class AppErrors {
            }
        }

        if (crashTime != null && now < crashTime + ActivityManagerConstants.MIN_CRASH_INTERVAL) {
            // The process crashed again very quickly. If it was a bound foreground service, let's
            // try to restart again in a while, otherwise the process loses!
            Slog.w(TAG, "Process " + app.processName
                    + " has crashed too many times: killing!");
        final boolean quickCrash = crashTime != null
                && now < crashTime + ActivityManagerConstants.MIN_CRASH_INTERVAL;
        if (quickCrash || isProcOverCrashLimit(app, now)) {
            // The process either crashed again very quickly or has been crashing periodically in
            // the last few hours. If it was a bound foreground service, let's try to restart again
            // in a while, otherwise the process loses!
            Slog.w(TAG, "Process " + app.processName + " has crashed too many times, killing!"
                    + " Reason: " + (quickCrash ? "crashed quickly" : "over process crash limit"));
            EventLog.writeEvent(EventLogTags.AM_PROCESS_CRASHED_TOO_MUCH,
                    app.userId, app.processName, app.uid);
            mService.mAtmInternal.onHandleAppCrash(app.getWindowProcessController());
@@ -765,6 +818,7 @@ class AppErrors {
                                new BadProcessInfo(now, shortMsg, longMsg, stackTrace));
                    }
                    mProcessCrashTimes.remove(app.processName, app.uid);
                    mProcessCrashCounts.remove(app.processName, app.uid);
                }
                app.bad = true;
                app.removed = true;
@@ -809,12 +863,30 @@ class AppErrors {
            // because they don't have a persistent identity.
            mProcessCrashTimes.put(app.processName, app.uid, now);
            mProcessCrashTimesPersistent.put(app.processName, app.uid, now);
            updateProcessCrashCount(app.processName, app.uid, now);
        }

        if (app.crashHandler != null) mService.mHandler.post(app.crashHandler);
        return true;
    }

    private void updateProcessCrashCount(String processName, int uid, long now) {
        Pair<Long, Integer> count = mProcessCrashCounts.get(processName, uid);
        if (count == null || (count.first + PROCESS_CRASH_COUNT_RESET_INTERVAL) < now) {
            count = new Pair<>(now, 1);
        } else {
            count = new Pair<>(count.first, count.second + 1);
        }
        mProcessCrashCounts.put(processName, uid, count);
    }

    private boolean isProcOverCrashLimit(ProcessRecord app, long now) {
        final Pair<Long, Integer> crashCount = mProcessCrashCounts.get(app.processName, app.uid);
        return !app.isolated && crashCount != null
                && now < (crashCount.first + PROCESS_CRASH_COUNT_RESET_INTERVAL)
                && crashCount.second >= PROCESS_CRASH_COUNT_LIMIT;
    }

    void handleShowAppErrorUi(Message msg) {
        AppErrorDialog.Data data = (AppErrorDialog.Data) msg.obj;
        boolean showBackground = Settings.Secure.getInt(mContext.getContentResolver(),