Loading core/java/com/android/internal/os/flags.aconfig +11 −1 Original line number Diff line number Diff line Loading @@ -57,3 +57,13 @@ flag { bug: "314735374" is_fixed_read_only: true } flag { name: "pre_watchdog_throttle_thread_dump" namespace: "stability" description: "Throttles how often a pre-watchdog results in a thread dump" bug: "417424880" metadata { purpose: PURPOSE_BUGFIX } } services/core/java/com/android/server/Watchdog.java +48 −18 Original line number Diff line number Diff line Loading @@ -106,6 +106,10 @@ public class Watchdog implements Dumpable { // The pre-watchdog event is similar to a full watchdog except it does not crash system server. private static final int PRE_WATCHDOG_TIMEOUT_RATIO = 4; // Throttle non-fatal thread dumps to avoid adversely affecting performance. private static final long PRE_WATCHDOG_COOL_OFF_MILLIS = com.android.internal.os.Flags.preWatchdogThrottleThreadDump() ? 60 * 60 * 1000 : 0; // These are temporally ordered: larger values as lateness increases static final int COMPLETED = 0; static final int WAITING = 1; Loading Loading @@ -453,6 +457,26 @@ public class Watchdog implements Dumpable { void monitor(); } static final class Throttler { private final Clock mUptimeClock; private final long mCoolOffPeriodMillis; private long mLastTriggerMillis = 0; Throttler(Clock uptimeClock, long coolOffPeriodMillis) { this.mUptimeClock = uptimeClock; this.mCoolOffPeriodMillis = coolOffPeriodMillis; } boolean isThrottled() { return mLastTriggerMillis > 0 && (mUptimeClock.millis() - mLastTriggerMillis) < mCoolOffPeriodMillis; } void markTrigger() { mLastTriggerMillis = mUptimeClock.millis(); } } public static Watchdog getInstance() { if (sWatchdog == null) { sWatchdog = new Watchdog(); Loading Loading @@ -828,8 +852,9 @@ public class Watchdog implements Dumpable { } private void run() { boolean waitedHalf = false; boolean preWatchdogTriggered = false; Throttler preWatchdogThrottler = new Throttler( SystemClock.uptimeClock(), PRE_WATCHDOG_COOL_OFF_MILLIS); while (true) { List<HandlerChecker> blockedCheckers = Collections.emptyList(); String subject = ""; Loading Loading @@ -881,15 +906,15 @@ public class Watchdog implements Dumpable { final int waitState = evaluateCheckerCompletionLocked(); if (waitState == COMPLETED) { // The monitors have returned; reset waitedHalf = false; preWatchdogTriggered = false; continue; } else if (waitState == WAITING) { // still waiting but within their configured intervals; back off and recheck continue; } else if (waitState == WAITED_UNTIL_PRE_WATCHDOG) { if (!waitedHalf) { if (!preWatchdogTriggered) { Slog.i(TAG, "WAITED_UNTIL_PRE_WATCHDOG"); waitedHalf = true; preWatchdogTriggered = true; // We've waited until the pre-watchdog, but we'd need to do the stack trace // dump w/o the lock. blockedCheckers = getCheckersWithStateLocked(WAITED_UNTIL_PRE_WATCHDOG); Loading @@ -914,7 +939,15 @@ public class Watchdog implements Dumpable { // // Then, if we reached the full timeout, kill this process so that the system will // restart. If we reached pre-watchdog timeout, just log some information and continue. logWatchog(doWaitedPreDump, subject, pids); // Get critical event log before logging the watchdog so that it doesn't // occur in the log. String criticalLog = CriticalEventLog.getInstance().logLinesForSystemServerTraceFile(); UUID errorId = logWatchdog(doWaitedPreDump, subject); if (!doWaitedPreDump || !preWatchdogThrottler.isThrottled()) { collectThreadDumps(errorId, doWaitedPreDump, criticalLog, subject, pids); preWatchdogThrottler.markTrigger(); } if (doWaitedPreDump) { // We have waited for only pre-watchdog timeout, we continue to wait for the Loading @@ -934,7 +967,7 @@ public class Watchdog implements Dumpable { int res = controller.systemNotResponding(subject); if (res >= 0) { Slog.i(TAG, "Activity controller requested to coninue to wait"); waitedHalf = false; preWatchdogTriggered = false; continue; } } catch (RemoteException e) { Loading Loading @@ -963,15 +996,11 @@ public class Watchdog implements Dumpable { System.exit(10); } waitedHalf = false; preWatchdogTriggered = false; } } private void logWatchog(boolean preWatchdog, String subject, ArrayList<Integer> pids) { // Get critical event log before logging the pre-watchdog so that it doesn't // occur in the log. String criticalEvents = CriticalEventLog.getInstance().logLinesForSystemServerTraceFile(); private UUID logWatchdog(boolean preWatchdog, String subject) { final UUID errorId = mTraceErrorLogger.generateErrorId(); if (mTraceErrorLogger.isAddErrorIdEnabled()) { mTraceErrorLogger.addProcessInfoAndErrorIdToTrace("system_server", Process.myPid(), Loading @@ -979,13 +1008,10 @@ public class Watchdog implements Dumpable { mTraceErrorLogger.addSubjectToTrace(subject, errorId); } final String dropboxTag; if (preWatchdog) { dropboxTag = "pre_watchdog"; CriticalEventLog.getInstance().logHalfWatchdog(subject); FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_PRE_WATCHDOG_OCCURRED); } else { dropboxTag = "watchdog"; CriticalEventLog.getInstance().logWatchdog(subject, errorId); EventLog.writeEvent(EventLogTags.WATCHDOG, subject); // Log the atom as early as possible since it is used as a mechanism to trigger Loading @@ -993,10 +1019,14 @@ public class Watchdog implements Dumpable { // point in time when the Watchdog happens as possible. FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject); } return errorId; } private void collectThreadDumps(UUID errorId, boolean preWatchdog, String criticalEvents, String subject, ArrayList<Integer> pids) { String dropboxTag = preWatchdog ? "pre_watchdog" : "watchdog"; final LinkedHashMap headersMap = com.android.server.am.Flags.enableDropboxWatchdogHeaders() ? new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag)) : null; new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag)); long anrTime = SystemClock.uptimeMillis(); StringBuilder report = new StringBuilder(); report.append(ResourcePressureUtil.currentPsiState()); Loading services/tests/servicestests/src/com/android/server/WatchdogTest.java +26 −0 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ import android.os.SimpleClock; import androidx.test.runner.AndroidJUnit4; import com.android.server.Watchdog.HandlerChecker; import com.android.server.Watchdog.Throttler; import org.junit.Before; import org.junit.Test; Loading Loading @@ -133,6 +134,31 @@ public class WatchdogTest { verify(monitor).monitor(); } @Test public void testThrottler() { final int throttlerCoolOffMs = 100; Throttler throttler = new Throttler(mClock, throttlerCoolOffMs); // Initially not throttled assertEquals(false, throttler.isThrottled()); throttler.markTrigger(); assertEquals(true, throttler.isThrottled()); mClock.advanceBy(throttlerCoolOffMs - 1); assertEquals(true, throttler.isThrottled()); mClock.advanceBy(1); assertEquals(false, throttler.isThrottled()); } @Test public void testThrottlerDisabled() { Throttler throttler = new Throttler(mClock, 0); assertEquals(false, throttler.isThrottled()); throttler.markTrigger(); assertEquals(false, throttler.isThrottled()); } private static class TestClock extends SimpleClock { long mNowMillis = 1; Loading Loading
core/java/com/android/internal/os/flags.aconfig +11 −1 Original line number Diff line number Diff line Loading @@ -57,3 +57,13 @@ flag { bug: "314735374" is_fixed_read_only: true } flag { name: "pre_watchdog_throttle_thread_dump" namespace: "stability" description: "Throttles how often a pre-watchdog results in a thread dump" bug: "417424880" metadata { purpose: PURPOSE_BUGFIX } }
services/core/java/com/android/server/Watchdog.java +48 −18 Original line number Diff line number Diff line Loading @@ -106,6 +106,10 @@ public class Watchdog implements Dumpable { // The pre-watchdog event is similar to a full watchdog except it does not crash system server. private static final int PRE_WATCHDOG_TIMEOUT_RATIO = 4; // Throttle non-fatal thread dumps to avoid adversely affecting performance. private static final long PRE_WATCHDOG_COOL_OFF_MILLIS = com.android.internal.os.Flags.preWatchdogThrottleThreadDump() ? 60 * 60 * 1000 : 0; // These are temporally ordered: larger values as lateness increases static final int COMPLETED = 0; static final int WAITING = 1; Loading Loading @@ -453,6 +457,26 @@ public class Watchdog implements Dumpable { void monitor(); } static final class Throttler { private final Clock mUptimeClock; private final long mCoolOffPeriodMillis; private long mLastTriggerMillis = 0; Throttler(Clock uptimeClock, long coolOffPeriodMillis) { this.mUptimeClock = uptimeClock; this.mCoolOffPeriodMillis = coolOffPeriodMillis; } boolean isThrottled() { return mLastTriggerMillis > 0 && (mUptimeClock.millis() - mLastTriggerMillis) < mCoolOffPeriodMillis; } void markTrigger() { mLastTriggerMillis = mUptimeClock.millis(); } } public static Watchdog getInstance() { if (sWatchdog == null) { sWatchdog = new Watchdog(); Loading Loading @@ -828,8 +852,9 @@ public class Watchdog implements Dumpable { } private void run() { boolean waitedHalf = false; boolean preWatchdogTriggered = false; Throttler preWatchdogThrottler = new Throttler( SystemClock.uptimeClock(), PRE_WATCHDOG_COOL_OFF_MILLIS); while (true) { List<HandlerChecker> blockedCheckers = Collections.emptyList(); String subject = ""; Loading Loading @@ -881,15 +906,15 @@ public class Watchdog implements Dumpable { final int waitState = evaluateCheckerCompletionLocked(); if (waitState == COMPLETED) { // The monitors have returned; reset waitedHalf = false; preWatchdogTriggered = false; continue; } else if (waitState == WAITING) { // still waiting but within their configured intervals; back off and recheck continue; } else if (waitState == WAITED_UNTIL_PRE_WATCHDOG) { if (!waitedHalf) { if (!preWatchdogTriggered) { Slog.i(TAG, "WAITED_UNTIL_PRE_WATCHDOG"); waitedHalf = true; preWatchdogTriggered = true; // We've waited until the pre-watchdog, but we'd need to do the stack trace // dump w/o the lock. blockedCheckers = getCheckersWithStateLocked(WAITED_UNTIL_PRE_WATCHDOG); Loading @@ -914,7 +939,15 @@ public class Watchdog implements Dumpable { // // Then, if we reached the full timeout, kill this process so that the system will // restart. If we reached pre-watchdog timeout, just log some information and continue. logWatchog(doWaitedPreDump, subject, pids); // Get critical event log before logging the watchdog so that it doesn't // occur in the log. String criticalLog = CriticalEventLog.getInstance().logLinesForSystemServerTraceFile(); UUID errorId = logWatchdog(doWaitedPreDump, subject); if (!doWaitedPreDump || !preWatchdogThrottler.isThrottled()) { collectThreadDumps(errorId, doWaitedPreDump, criticalLog, subject, pids); preWatchdogThrottler.markTrigger(); } if (doWaitedPreDump) { // We have waited for only pre-watchdog timeout, we continue to wait for the Loading @@ -934,7 +967,7 @@ public class Watchdog implements Dumpable { int res = controller.systemNotResponding(subject); if (res >= 0) { Slog.i(TAG, "Activity controller requested to coninue to wait"); waitedHalf = false; preWatchdogTriggered = false; continue; } } catch (RemoteException e) { Loading Loading @@ -963,15 +996,11 @@ public class Watchdog implements Dumpable { System.exit(10); } waitedHalf = false; preWatchdogTriggered = false; } } private void logWatchog(boolean preWatchdog, String subject, ArrayList<Integer> pids) { // Get critical event log before logging the pre-watchdog so that it doesn't // occur in the log. String criticalEvents = CriticalEventLog.getInstance().logLinesForSystemServerTraceFile(); private UUID logWatchdog(boolean preWatchdog, String subject) { final UUID errorId = mTraceErrorLogger.generateErrorId(); if (mTraceErrorLogger.isAddErrorIdEnabled()) { mTraceErrorLogger.addProcessInfoAndErrorIdToTrace("system_server", Process.myPid(), Loading @@ -979,13 +1008,10 @@ public class Watchdog implements Dumpable { mTraceErrorLogger.addSubjectToTrace(subject, errorId); } final String dropboxTag; if (preWatchdog) { dropboxTag = "pre_watchdog"; CriticalEventLog.getInstance().logHalfWatchdog(subject); FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_PRE_WATCHDOG_OCCURRED); } else { dropboxTag = "watchdog"; CriticalEventLog.getInstance().logWatchdog(subject, errorId); EventLog.writeEvent(EventLogTags.WATCHDOG, subject); // Log the atom as early as possible since it is used as a mechanism to trigger Loading @@ -993,10 +1019,14 @@ public class Watchdog implements Dumpable { // point in time when the Watchdog happens as possible. FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject); } return errorId; } private void collectThreadDumps(UUID errorId, boolean preWatchdog, String criticalEvents, String subject, ArrayList<Integer> pids) { String dropboxTag = preWatchdog ? "pre_watchdog" : "watchdog"; final LinkedHashMap headersMap = com.android.server.am.Flags.enableDropboxWatchdogHeaders() ? new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag)) : null; new LinkedHashMap<>(Collections.singletonMap("Watchdog-Type", dropboxTag)); long anrTime = SystemClock.uptimeMillis(); StringBuilder report = new StringBuilder(); report.append(ResourcePressureUtil.currentPsiState()); Loading
services/tests/servicestests/src/com/android/server/WatchdogTest.java +26 −0 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ import android.os.SimpleClock; import androidx.test.runner.AndroidJUnit4; import com.android.server.Watchdog.HandlerChecker; import com.android.server.Watchdog.Throttler; import org.junit.Before; import org.junit.Test; Loading Loading @@ -133,6 +134,31 @@ public class WatchdogTest { verify(monitor).monitor(); } @Test public void testThrottler() { final int throttlerCoolOffMs = 100; Throttler throttler = new Throttler(mClock, throttlerCoolOffMs); // Initially not throttled assertEquals(false, throttler.isThrottled()); throttler.markTrigger(); assertEquals(true, throttler.isThrottled()); mClock.advanceBy(throttlerCoolOffMs - 1); assertEquals(true, throttler.isThrottled()); mClock.advanceBy(1); assertEquals(false, throttler.isThrottled()); } @Test public void testThrottlerDisabled() { Throttler throttler = new Throttler(mClock, 0); assertEquals(false, throttler.isThrottled()); throttler.markTrigger(); assertEquals(false, throttler.isThrottled()); } private static class TestClock extends SimpleClock { long mNowMillis = 1; Loading