Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5156a247 authored by Ioannis Ilkos's avatar Ioannis Ilkos Committed by Android (Google) Code Review
Browse files

Merge "Throttle watchdog thread dumps" into main

parents 4dcec4af 316ee802
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -57,3 +57,13 @@ flag {
    bug: "314735374"
    is_fixed_read_only: true
}

flag {
    name: "pre_watchdog_throttle_thread_dump"
    namespace: "stability"
    description: "Throttles how often a pre-watchdog results in a thread dump"
    bug: "417424880"
    metadata {
        purpose: PURPOSE_BUGFIX
    }
}
+48 −18
Original line number Diff line number Diff line
@@ -106,6 +106,10 @@ public class Watchdog implements Dumpable {
    // The pre-watchdog event is similar to a full watchdog except it does not crash system server.
    private static final int PRE_WATCHDOG_TIMEOUT_RATIO = 4;

    // Throttle non-fatal thread dumps to avoid adversely affecting performance.
    private static final long PRE_WATCHDOG_COOL_OFF_MILLIS =
            com.android.internal.os.Flags.preWatchdogThrottleThreadDump() ? 60 * 60 * 1000 : 0;

    // These are temporally ordered: larger values as lateness increases
    static final int COMPLETED = 0;
    static final int WAITING = 1;
@@ -453,6 +457,26 @@ public class Watchdog implements Dumpable {
        void monitor();
    }

    static final class Throttler {
        private final Clock mUptimeClock;
        private final long mCoolOffPeriodMillis;
        private long mLastTriggerMillis = 0;

        Throttler(Clock uptimeClock, long coolOffPeriodMillis) {
            this.mUptimeClock = uptimeClock;
            this.mCoolOffPeriodMillis = coolOffPeriodMillis;
        }

        boolean isThrottled() {
            return mLastTriggerMillis > 0
                    && (mUptimeClock.millis() - mLastTriggerMillis) < mCoolOffPeriodMillis;
        }

        void markTrigger() {
            mLastTriggerMillis = mUptimeClock.millis();
        }
    }

    public static Watchdog getInstance() {
        if (sWatchdog == null) {
            sWatchdog = new Watchdog();
@@ -828,8 +852,9 @@ public class Watchdog implements Dumpable {
    }

    private void run() {
        boolean waitedHalf = false;

        boolean preWatchdogTriggered = false;
        Throttler preWatchdogThrottler = new Throttler(
                SystemClock.uptimeClock(), PRE_WATCHDOG_COOL_OFF_MILLIS);
        while (true) {
            List<HandlerChecker> blockedCheckers = Collections.emptyList();
            String subject = "";
@@ -881,15 +906,15 @@ public class Watchdog implements Dumpable {
                final int waitState = evaluateCheckerCompletionLocked();
                if (waitState == COMPLETED) {
                    // The monitors have returned; reset
                    waitedHalf = false;
                    preWatchdogTriggered = false;
                    continue;
                } else if (waitState == WAITING) {
                    // still waiting but within their configured intervals; back off and recheck
                    continue;
                } else if (waitState == WAITED_UNTIL_PRE_WATCHDOG) {
                    if (!waitedHalf) {
                    if (!preWatchdogTriggered) {
                        Slog.i(TAG, "WAITED_UNTIL_PRE_WATCHDOG");
                        waitedHalf = true;
                        preWatchdogTriggered = true;
                        // We've waited until the pre-watchdog, but we'd need to do the stack trace
                        // dump w/o the lock.
                        blockedCheckers = getCheckersWithStateLocked(WAITED_UNTIL_PRE_WATCHDOG);
@@ -914,7 +939,15 @@ public class Watchdog implements Dumpable {
            //
            // Then, if we reached the full timeout, kill this process so that the system will
            // restart. If we reached pre-watchdog timeout, just log some information and continue.
            logWatchog(doWaitedPreDump, subject, pids);

            // Get critical event log before logging the watchdog so that it doesn't
            // occur in the log.
            String criticalLog = CriticalEventLog.getInstance().logLinesForSystemServerTraceFile();
            UUID errorId = logWatchdog(doWaitedPreDump, subject);
            if (!doWaitedPreDump || !preWatchdogThrottler.isThrottled()) {
                collectThreadDumps(errorId, doWaitedPreDump, criticalLog, subject, pids);
                preWatchdogThrottler.markTrigger();
            }

            if (doWaitedPreDump) {
                // We have waited for only pre-watchdog timeout, we continue to wait for the
@@ -934,7 +967,7 @@ public class Watchdog implements Dumpable {
                    int res = controller.systemNotResponding(subject);
                    if (res >= 0) {
                        Slog.i(TAG, "Activity controller requested to coninue to wait");
                        waitedHalf = false;
                        preWatchdogTriggered = false;
                        continue;
                    }
                } catch (RemoteException e) {
@@ -963,15 +996,11 @@ public class Watchdog implements Dumpable {
                System.exit(10);
            }

            waitedHalf = false;
            preWatchdogTriggered = false;
        }
    }

    private void logWatchog(boolean preWatchdog, String subject, ArrayList<Integer> pids) {
        // Get critical event log before logging the pre-watchdog so that it doesn't
        // occur in the log.
        String criticalEvents =
                CriticalEventLog.getInstance().logLinesForSystemServerTraceFile();
    private UUID logWatchdog(boolean preWatchdog, String subject) {
        final UUID errorId = mTraceErrorLogger.generateErrorId();
        if (mTraceErrorLogger.isAddErrorIdEnabled()) {
            mTraceErrorLogger.addProcessInfoAndErrorIdToTrace("system_server", Process.myPid(),
@@ -979,13 +1008,10 @@ public class Watchdog implements Dumpable {
            mTraceErrorLogger.addSubjectToTrace(subject, errorId);
        }

        final String dropboxTag;
        if (preWatchdog) {
            dropboxTag = "pre_watchdog";
            CriticalEventLog.getInstance().logHalfWatchdog(subject);
            FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_PRE_WATCHDOG_OCCURRED);
        } else {
            dropboxTag = "watchdog";
            CriticalEventLog.getInstance().logWatchdog(subject, errorId);
            EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
            // Log the atom as early as possible since it is used as a mechanism to trigger
@@ -993,10 +1019,14 @@ public class Watchdog implements Dumpable {
            // point in time when the Watchdog happens as possible.
            FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject);
        }
        return errorId;
    }

    private void collectThreadDumps(UUID errorId, boolean preWatchdog, String criticalEvents,
            String subject, ArrayList<Integer> pids) {
        String dropboxTag = preWatchdog ? "pre_watchdog" : "watchdog";
        final LinkedHashMap headersMap =
                com.android.server.am.Flags.enableDropboxWatchdogHeaders()
                ? new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag)) : null;
                new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag));
        long anrTime = SystemClock.uptimeMillis();
        StringBuilder report = new StringBuilder();
        report.append(ResourcePressureUtil.currentPsiState());
+26 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ import android.os.SimpleClock;
import androidx.test.runner.AndroidJUnit4;

import com.android.server.Watchdog.HandlerChecker;
import com.android.server.Watchdog.Throttler;

import org.junit.Before;
import org.junit.Test;
@@ -133,6 +134,31 @@ public class WatchdogTest {
        verify(monitor).monitor();
    }

    @Test
    public void testThrottler() {
        final int throttlerCoolOffMs = 100;
        Throttler throttler = new Throttler(mClock, throttlerCoolOffMs);
        // Initially not throttled
        assertEquals(false, throttler.isThrottled());

        throttler.markTrigger();
        assertEquals(true, throttler.isThrottled());

        mClock.advanceBy(throttlerCoolOffMs - 1);
        assertEquals(true, throttler.isThrottled());

        mClock.advanceBy(1);
        assertEquals(false, throttler.isThrottled());
    }

    @Test
    public void testThrottlerDisabled() {
        Throttler throttler = new Throttler(mClock, 0);
        assertEquals(false, throttler.isThrottled());
        throttler.markTrigger();
        assertEquals(false, throttler.isThrottled());
    }

    private static class TestClock extends SimpleClock {
        long mNowMillis = 1;