Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 316ee802 authored by Ioannis Ilkos's avatar Ioannis Ilkos
Browse files

Throttle watchdog thread dumps

Flag: com.android.internal.os.pre_watchdog_throttle_thread_dump
Test: manual, atest WatchdogTest
Bug: 417424880
Change-Id: I891cbe3185319d4d030601ab5943f47de05449ea
parent ce154082
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -57,3 +57,13 @@ flag {
    bug: "314735374"
    is_fixed_read_only: true
}

flag {
    name: "pre_watchdog_throttle_thread_dump"
    namespace: "stability"
    description: "Throttles how often a pre-watchdog results in a thread dump"
    bug: "417424880"
    metadata {
        purpose: PURPOSE_BUGFIX
    }
}
+48 −18
Original line number Diff line number Diff line
@@ -106,6 +106,10 @@ public class Watchdog implements Dumpable {
    // The pre-watchdog event is similar to a full watchdog except it does not crash system server.
    private static final int PRE_WATCHDOG_TIMEOUT_RATIO = 4;

    // Throttle non-fatal thread dumps to avoid adversely affecting performance.
    private static final long PRE_WATCHDOG_COOL_OFF_MILLIS =
            com.android.internal.os.Flags.preWatchdogThrottleThreadDump() ? 60 * 60 * 1000 : 0;

    // These are temporally ordered: larger values as lateness increases
    static final int COMPLETED = 0;
    static final int WAITING = 1;
@@ -453,6 +457,26 @@ public class Watchdog implements Dumpable {
        void monitor();
    }

    static final class Throttler {
        private final Clock mUptimeClock;
        private final long mCoolOffPeriodMillis;
        private long mLastTriggerMillis = 0;

        Throttler(Clock uptimeClock, long coolOffPeriodMillis) {
            this.mUptimeClock = uptimeClock;
            this.mCoolOffPeriodMillis = coolOffPeriodMillis;
        }

        boolean isThrottled() {
            return mLastTriggerMillis > 0
                    && (mUptimeClock.millis() - mLastTriggerMillis) < mCoolOffPeriodMillis;
        }

        void markTrigger() {
            mLastTriggerMillis = mUptimeClock.millis();
        }
    }

    public static Watchdog getInstance() {
        if (sWatchdog == null) {
            sWatchdog = new Watchdog();
@@ -828,8 +852,9 @@ public class Watchdog implements Dumpable {
    }

    private void run() {
        boolean waitedHalf = false;

        boolean preWatchdogTriggered = false;
        Throttler preWatchdogThrottler = new Throttler(
                SystemClock.uptimeClock(), PRE_WATCHDOG_COOL_OFF_MILLIS);
        while (true) {
            List<HandlerChecker> blockedCheckers = Collections.emptyList();
            String subject = "";
@@ -881,15 +906,15 @@ public class Watchdog implements Dumpable {
                final int waitState = evaluateCheckerCompletionLocked();
                if (waitState == COMPLETED) {
                    // The monitors have returned; reset
                    waitedHalf = false;
                    preWatchdogTriggered = false;
                    continue;
                } else if (waitState == WAITING) {
                    // still waiting but within their configured intervals; back off and recheck
                    continue;
                } else if (waitState == WAITED_UNTIL_PRE_WATCHDOG) {
                    if (!waitedHalf) {
                    if (!preWatchdogTriggered) {
                        Slog.i(TAG, "WAITED_UNTIL_PRE_WATCHDOG");
                        waitedHalf = true;
                        preWatchdogTriggered = true;
                        // We've waited until the pre-watchdog, but we'd need to do the stack trace
                        // dump w/o the lock.
                        blockedCheckers = getCheckersWithStateLocked(WAITED_UNTIL_PRE_WATCHDOG);
@@ -914,7 +939,15 @@ public class Watchdog implements Dumpable {
            //
            // Then, if we reached the full timeout, kill this process so that the system will
            // restart. If we reached pre-watchdog timeout, just log some information and continue.
            logWatchog(doWaitedPreDump, subject, pids);

            // Get critical event log before logging the watchdog so that it doesn't
            // occur in the log.
            String criticalLog = CriticalEventLog.getInstance().logLinesForSystemServerTraceFile();
            UUID errorId = logWatchdog(doWaitedPreDump, subject);
            if (!doWaitedPreDump || !preWatchdogThrottler.isThrottled()) {
                collectThreadDumps(errorId, doWaitedPreDump, criticalLog, subject, pids);
                preWatchdogThrottler.markTrigger();
            }

            if (doWaitedPreDump) {
                // We have waited for only pre-watchdog timeout, we continue to wait for the
@@ -934,7 +967,7 @@ public class Watchdog implements Dumpable {
                    int res = controller.systemNotResponding(subject);
                    if (res >= 0) {
                        Slog.i(TAG, "Activity controller requested to coninue to wait");
                        waitedHalf = false;
                        preWatchdogTriggered = false;
                        continue;
                    }
                } catch (RemoteException e) {
@@ -963,15 +996,11 @@ public class Watchdog implements Dumpable {
                System.exit(10);
            }

            waitedHalf = false;
            preWatchdogTriggered = false;
        }
    }

    private void logWatchog(boolean preWatchdog, String subject, ArrayList<Integer> pids) {
        // Get critical event log before logging the pre-watchdog so that it doesn't
        // occur in the log.
        String criticalEvents =
                CriticalEventLog.getInstance().logLinesForSystemServerTraceFile();
    private UUID logWatchdog(boolean preWatchdog, String subject) {
        final UUID errorId = mTraceErrorLogger.generateErrorId();
        if (mTraceErrorLogger.isAddErrorIdEnabled()) {
            mTraceErrorLogger.addProcessInfoAndErrorIdToTrace("system_server", Process.myPid(),
@@ -979,13 +1008,10 @@ public class Watchdog implements Dumpable {
            mTraceErrorLogger.addSubjectToTrace(subject, errorId);
        }

        final String dropboxTag;
        if (preWatchdog) {
            dropboxTag = "pre_watchdog";
            CriticalEventLog.getInstance().logHalfWatchdog(subject);
            FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_PRE_WATCHDOG_OCCURRED);
        } else {
            dropboxTag = "watchdog";
            CriticalEventLog.getInstance().logWatchdog(subject, errorId);
            EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
            // Log the atom as early as possible since it is used as a mechanism to trigger
@@ -993,10 +1019,14 @@ public class Watchdog implements Dumpable {
            // point in time when the Watchdog happens as possible.
            FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject);
        }
        return errorId;
    }

    private void collectThreadDumps(UUID errorId, boolean preWatchdog, String criticalEvents,
            String subject, ArrayList<Integer> pids) {
        String dropboxTag = preWatchdog ? "pre_watchdog" : "watchdog";
        final LinkedHashMap headersMap =
                com.android.server.am.Flags.enableDropboxWatchdogHeaders()
                ? new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag)) : null;
                new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag));
        long anrTime = SystemClock.uptimeMillis();
        StringBuilder report = new StringBuilder();
        report.append(ResourcePressureUtil.currentPsiState());
+26 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ import android.os.SimpleClock;
import androidx.test.runner.AndroidJUnit4;

import com.android.server.Watchdog.HandlerChecker;
import com.android.server.Watchdog.Throttler;

import org.junit.Before;
import org.junit.Test;
@@ -133,6 +134,31 @@ public class WatchdogTest {
        verify(monitor).monitor();
    }

    @Test
    public void testThrottler() {
        final int throttlerCoolOffMs = 100;
        Throttler throttler = new Throttler(mClock, throttlerCoolOffMs);
        // Initially not throttled
        assertEquals(false, throttler.isThrottled());

        throttler.markTrigger();
        assertEquals(true, throttler.isThrottled());

        mClock.advanceBy(throttlerCoolOffMs - 1);
        assertEquals(true, throttler.isThrottled());

        mClock.advanceBy(1);
        assertEquals(false, throttler.isThrottled());
    }

    @Test
    public void testThrottlerDisabled() {
        Throttler throttler = new Throttler(mClock, 0);
        assertEquals(false, throttler.isThrottled());
        throttler.markTrigger();
        assertEquals(false, throttler.isThrottled());
    }

    private static class TestClock extends SimpleClock {
        long mNowMillis = 1;