Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e6f81cf1 authored by Christopher Tate's avatar Christopher Tate
Browse files

Support different watchdog timeouts for different entities

We need to be able to perform very lengthy operations on some threads
(e.g. the I/O thread responsible for installing multi-gigabyte APKs) but
still have long-run deadlock/hang detection applied to those threads.
Previously the watchdog mechanism applied the same policy to all
monitored threads: unresponsive after 60 seconds => restart the system.

Now, each monitored entity can have its own independent timeout after
which the watchdog declares deadlock and restarts the runtime.  The
halfway-finished intermediate thread stacks are dumped based on the
specific entity's declared timeout, not the global 30 second checking
interval.

With that new mechanism in place, the Package Manager's lengthy-I/O
thread watchdog timeout is raised to 10 minutes.

Bug 11278188

Change-Id: I512599260009c31416b2385f778681e5b9597f05
parent d9e98a4b
Loading
Loading
Loading
Loading
+77 −36
Original line number Diff line number Diff line
@@ -58,7 +58,14 @@ public class Watchdog extends Thread {
    // Set this to true to have the watchdog record kernel thread stacks when it fires
    static final boolean RECORD_KERNEL_THREADS = true;

    static final int TIME_TO_WAIT = DB ? 5*1000 : 30*1000;
    static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
    static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;

    // These are temporally ordered: larger values as lateness increases
    static final int COMPLETED = 0;
    static final int WAITING = 1;
    static final int WAITED_HALF = 2;
    static final int OVERDUE = 3;

    static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
        "/system/bin/mediaserver",
@@ -87,13 +94,17 @@ public class Watchdog extends Thread {
    public final class HandlerChecker implements Runnable {
        private final Handler mHandler;
        private final String mName;
        private final long mWaitMax;
        private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
        private boolean mCompleted;
        private Monitor mCurrentMonitor;
        private long mStartTime;

        HandlerChecker(Handler handler, String name) {
        HandlerChecker(Handler handler, String name, long waitMaxMillis) {
            mHandler = handler;
            mName = name;
            mWaitMax = waitMaxMillis;
            mCompleted = true;
        }

        public void addMonitor(Monitor monitor) {
@@ -111,13 +122,34 @@ public class Watchdog extends Thread {
                mCompleted = true;
                return;
            }

            if (!mCompleted) {
                // we already have a check in flight, so no need
                return;
            }

            mCompleted = false;
            mCurrentMonitor = null;
            mStartTime = SystemClock.uptimeMillis();
            mHandler.postAtFrontOfQueue(this);
        }

        public boolean isCompletedLocked() {
            return mCompleted;
        public boolean isOverdueLocked() {
            return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
        }

        public int getCompletionStateLocked() {
            if (mCompleted) {
                return COMPLETED;
            } else {
                long latency = SystemClock.uptimeMillis() - mStartTime;
                if (latency < mWaitMax/2) {
                    return WAITING;
                } else if (latency < mWaitMax) {
                    return WAITED_HALF;
                }
            }
            return OVERDUE;
        }

        public Thread getThread() {
@@ -186,16 +218,19 @@ public class Watchdog extends Thread {

        // The shared foreground thread is the main checker.  It is where we
        // will also dispatch monitor checks and do other work.
        mMonitorChecker = new HandlerChecker(FgThread.getHandler(), "foreground thread");
        mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
                "foreground thread", DEFAULT_TIMEOUT);
        mHandlerCheckers.add(mMonitorChecker);
        // Add checker for main thread.  We only do a quick check since there
        // can be UI running on the thread.
        mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
                "main thread"));
                "main thread", DEFAULT_TIMEOUT));
        // Add checker for shared UI thread.
        mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), "ui thread"));
        mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
                "ui thread", DEFAULT_TIMEOUT));
        // And also check IO thread.
        mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), "i/o thread"));
        mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
                "i/o thread", DEFAULT_TIMEOUT));
    }

    public void init(Context context, BatteryService battery,
@@ -242,11 +277,15 @@ public class Watchdog extends Thread {
    }

    public void addThread(Handler thread, String name) {
        addThread(thread, name, DEFAULT_TIMEOUT);
    }

    public void addThread(Handler thread, String name, long timeoutMillis) {
        synchronized (this) {
            if (isAlive()) {
                throw new RuntimeException("Threads can't be added once the Watchdog is running");
            }
            mHandlerCheckers.add(new HandlerChecker(thread, name));
            mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
        }
    }

@@ -259,21 +298,20 @@ public class Watchdog extends Thread {
        pms.reboot(false, reason, false);
    }

    private boolean haveAllCheckersCompletedLocked() {
    private int evaluateCheckerCompletionLocked() {
        int state = COMPLETED;
        for (int i=0; i<mHandlerCheckers.size(); i++) {
            HandlerChecker hc = mHandlerCheckers.get(i);
            if (!hc.isCompletedLocked()) {
                return false;
            state = Math.max(state, hc.getCompletionStateLocked());
        }
        }
        return true;
        return state;
    }

    private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
        ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
        for (int i=0; i<mHandlerCheckers.size(); i++) {
            HandlerChecker hc = mHandlerCheckers.get(i);
            if (!hc.isCompletedLocked()) {
            if (hc.isOverdueLocked()) {
                checkers.add(hc);
            }
        }
@@ -299,15 +337,13 @@ public class Watchdog extends Thread {
            final String subject;
            final boolean allowRestart;
            synchronized (this) {
                long timeout = TIME_TO_WAIT;
                if (!waitedHalf) {
                    // If we are not at the half-point of waiting, perform a
                    // new set of checks.  Otherwise we are still waiting for a previous set.
                long timeout = CHECK_INTERVAL;
                // Make sure we (re)spin the checkers that have become idle within
                // this wait-and-check interval
                for (int i=0; i<mHandlerCheckers.size(); i++) {
                    HandlerChecker hc = mHandlerCheckers.get(i);
                    hc.scheduleCheckLocked();
                }
                }

                // NOTE: We use uptimeMillis() here because we do not want to increment the time we
                // wait while asleep. If the device is asleep then the thing that we are waiting
@@ -320,15 +356,18 @@ public class Watchdog extends Thread {
                    } catch (InterruptedException e) {
                        Log.wtf(TAG, e);
                    }
                    timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
                    timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
                }

                if (haveAllCheckersCompletedLocked()) {
                    // The monitors have returned.
                final int waitState = evaluateCheckerCompletionLocked();
                if (waitState == COMPLETED) {
                    // The monitors have returned; reset
                    waitedHalf = false;
                    continue;
                }

                } else if (waitState == WAITING) {
                    // still waiting but within their configured intervals; back off and recheck
                    continue;
                } else if (waitState == WAITED_HALF) {
                    if (!waitedHalf) {
                        // We've waited half the deadlock-detection interval.  Pull a stack
                        // trace and wait another half.
@@ -337,9 +376,11 @@ public class Watchdog extends Thread {
                        ActivityManagerService.dumpStackTraces(true, pids, null, null,
                                NATIVE_STACKS_OF_INTEREST);
                        waitedHalf = true;
                    }
                    continue;
                }

                // something is overdue!
                blockedCheckers = getBlockedCheckersLocked();
                subject = describeCheckersLocked(blockedCheckers);
                allowRestart = mAllowRestart;
+10 −1
Original line number Diff line number Diff line
@@ -220,6 +220,14 @@ public class PackageManagerService extends IPackageManager.Stub {

    static final int REMOVE_CHATTY = 1<<16;

    /**
     * Timeout (in milliseconds) after which the watchdog should declare that
     * our handler thread is wedged.  The usual default for such things is one
     * minute but we sometimes do very lengthy I/O operations on this thread,
     * such as installing multi-gigabyte applications, so ours needs to be longer.
     */
    private static final long WATCHDOG_TIMEOUT = 1000*60*10;     // ten minutes

    /**
     * Whether verification is enabled by default.
     */
@@ -1115,7 +1123,8 @@ public class PackageManagerService extends IPackageManager.Stub {
        synchronized (mPackages) {
            mHandlerThread.start();
            mHandler = new PackageHandler(mHandlerThread.getLooper());
            Watchdog.getInstance().addThread(mHandler, mHandlerThread.getName());
            Watchdog.getInstance().addThread(mHandler, mHandlerThread.getName(),
                    WATCHDOG_TIMEOUT);

            File dataDir = Environment.getDataDirectory();
            mAppDataDir = new File(dataDir, "data");