Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 12278ded authored by Zimuzo Ezeozue's avatar Zimuzo Ezeozue Committed by Android (Google) Code Review
Browse files

Merge "Add Packagewatchdog to monitor health of apps on the device"

parents 83ddf8e8 6efba543
Loading
Loading
Loading
Loading
+572 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.server;

import android.content.Context;
import android.os.Environment;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.Looper;
import android.os.Message;
import android.os.Process;
import android.os.SystemClock;
import android.text.TextUtils;
import android.util.ArrayMap;
import android.util.AtomicFile;
import android.util.Log;
import android.util.Slog;
import android.util.Xml;

import com.android.internal.annotations.GuardedBy;
import com.android.internal.util.FastXmlSerializer;
import com.android.internal.util.XmlUtils;

import libcore.io.IoUtils;

import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlSerializer;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 * Monitors the health of packages on the system and notifies interested observers when packages
 * fail. All registered observers will be notified until an observer takes a mitigation action.
 */
public class PackageWatchdog {
    private static final String TAG = "PackageWatchdog";
    // Duration to count package failures before it resets to 0
    private static final int TRIGGER_DURATION_MS = 60000;
    // Number of package failures within the duration above before we notify observers
    private static final int TRIGGER_FAILURE_COUNT = 5;
    private static final int DB_VERSION = 1;
    private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog";
    private static final String TAG_PACKAGE = "package";
    private static final String TAG_OBSERVER = "observer";
    private static final String ATTR_VERSION = "version";
    private static final String ATTR_NAME = "name";
    private static final String ATTR_DURATION = "duration";
    private static final int MESSAGE_SAVE_FILE = 1;

    private static PackageWatchdog sPackageWatchdog;

    private final Object mLock = new Object();
    // System server context
    private final Context mContext;
    // Handler to run package cleanup runnables
    private final Handler mTimerHandler;
    private final HandlerThread mIoThread = new HandlerThread("package_watchdog_io",
            Process.THREAD_PRIORITY_BACKGROUND);
    private final Handler mIoHandler;
    // Maps observer names to package observers that have been registered since the last boot
    @GuardedBy("mLock")
    final Map<String, PackageHealthObserver> mRegisteredObservers = new ArrayMap<>();
    // Maps observer names to internal observers (registered or not) loaded from file
    @GuardedBy("mLock")
    final Map<String, ObserverInternal> mAllObservers = new ArrayMap<>();
    // /data/system/ directory
    private final File mSystemDir = new File(Environment.getDataDirectory(), "system");
    // File containing the XML data of monitored packages
    private final AtomicFile mPolicyFile =
            new AtomicFile(new File(mSystemDir, "package-watchdog.xml"));
    // Runnable to prune monitored packages that have expired
    private final Runnable mPackageCleanup;
    // Last SystemClock#uptimeMillis a package clean up was executed.
    // 0 if mPackageCleanup not running.
    private long mUptimeAtLastRescheduleMs;
    // Duration a package cleanup was last scheduled for.
    // 0 if mPackageCleanup not running.
    private long mDurationAtLastReschedule;

    private PackageWatchdog(Context context) {
        mContext = context;
        mTimerHandler = new Handler(Looper.myLooper());
        mIoThread.start();
        mIoHandler = new IoHandler(mIoThread.getLooper());
        mPackageCleanup = this::rescheduleCleanup;
        loadFromFile();
    }

    /** Creates or gets singleton instance of PackageWatchdog. */
    public static synchronized PackageWatchdog getInstance(Context context) {
        if (sPackageWatchdog == null) {
            sPackageWatchdog = new PackageWatchdog(context);
        }
        return sPackageWatchdog;
    }

    /**
     * Registers {@code observer} to listen for package failures
     *
     * <p>Observers are expected to call this on boot. It does not specify any packages but
     * it will resume observing any packages requested from a previous boot.
     */
    public void registerHealthObserver(PackageHealthObserver observer) {
        synchronized (mLock) {
            mRegisteredObservers.put(observer.getName(), observer);
            if (mDurationAtLastReschedule == 0) {
                // Nothing running, schedule
                rescheduleCleanup();
            }
        }
    }

    /**
     * Starts observing the health of the {@code packages} for {@code observer} and notifies
     * {@code observer} of any package failures within the monitoring duration.
     *
     * <p>If {@code observer} is already monitoring a package in {@code packageNames},
     * the monitoring window of that package will be reset to {@code hours}.
     *
     * @throws IllegalArgumentException if {@code packageNames} is empty
     * or {@code hours} is less than 1
     */
    public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames,
            int hours) {
        if (packageNames.isEmpty() || hours < 1) {
            throw new IllegalArgumentException("Observation not started, no packages specified"
                    + "or invalid hours");
        }
        long durationMs = TimeUnit.HOURS.toMillis(hours);
        List<MonitoredPackage> packages = new ArrayList<>();
        for (String packageName : packageNames) {
            packages.add(new MonitoredPackage(packageName, durationMs));
        }
        synchronized (mLock) {
            ObserverInternal oldObserver = mAllObservers.get(observer.getName());
            if (oldObserver == null) {
                Slog.d(TAG, observer.getName() + " started monitoring health of packages "
                        + packageNames);
                mAllObservers.put(observer.getName(),
                        new ObserverInternal(observer.getName(), packages));
            } else {
                Slog.d(TAG, observer.getName() + " added the following packages to monitor "
                        + packageNames);
                oldObserver.updatePackages(packages);
            }
        }
        registerHealthObserver(observer);
        // Always reschedule because we may need to expire packages
        // earlier than we are already scheduled for
        rescheduleCleanup();
        sendIoMessage(MESSAGE_SAVE_FILE);
    }

    /**
     * Unregisters {@code observer} from listening to package failure.
     * Additionally, this stops observing any packages that may have previously been observed
     * even from a previous boot.
     */
    public void unregisterHealthObserver(PackageHealthObserver observer) {
        synchronized (mLock) {
            mAllObservers.remove(observer.getName());
            mRegisteredObservers.remove(observer.getName());
        }
        sendIoMessage(MESSAGE_SAVE_FILE);
    }

    // TODO(zezeozue:) Accept current versionCodes of failing packages?
    /**
     * Called when a process fails either due to a crash or ANR.
     *
     * <p>All registered observers for the packages contained in the process will be notified in
     * order of priority unitl an observer signifies that it has taken action and other observers
     * should not notified.
     *
     * <p>This method could be called frequently if there is a severe problem on the device.
     */
    public void onPackageFailure(String[] packages) {
        synchronized (mLock) {
            if (mRegisteredObservers.isEmpty()) {
                return;
            }
            for (String packageName : packages) {
                for (ObserverInternal observer : mAllObservers.values()) {
                    if (observer.onPackageFailure(packageName)) {
                        PackageHealthObserver activeObserver =
                                mRegisteredObservers.get(observer.mName);
                        if (activeObserver != null
                                && activeObserver.onHealthCheckFailed(packageName)) {
                            // Observer has handled, do not notify other observers
                            break;
                        }
                    }
                }
            }
        }
    }

    // TODO(zezeozue): Optimize write? Maybe only write a separate smaller file?
    // This currently adds about 7ms extra to shutdown thread
    /** Writes the package information to file during shutdown. */
    public void writeNow() {
        if (!mAllObservers.isEmpty()) {
            mIoHandler.removeMessages(MESSAGE_SAVE_FILE);
            pruneObservers(SystemClock.uptimeMillis() - mUptimeAtLastRescheduleMs);
            saveToFile();
            Slog.i(TAG, "Last write to update package durations");
        }
    }

    /** Register instances of this interface to receive notifications on package failure. */
    public interface PackageHealthObserver {
        /**
         * Called when health check fails for the {@code packages}.
         * @return {@code true} if action was taken and other observers should not be notified of
         * this failure, {@code false} otherwise.
         */
        boolean onHealthCheckFailed(String packageName);

        // TODO(zezeozue): Ensure uniqueness?
        /**
         * Identifier for the observer, should not change across device updates otherwise the
         * watchdog may drop observing packages with the old name.
         */
        String getName();
    }

    /** Reschedules handler to prune expired packages from observers. */
    private void rescheduleCleanup() {
        synchronized (mLock) {
            long nextDurationToScheduleMs = getEarliestPackageExpiryLocked();
            if (nextDurationToScheduleMs == Long.MAX_VALUE) {
                Slog.i(TAG, "No monitored packages, ending package cleanup");
                mDurationAtLastReschedule = 0;
                mUptimeAtLastRescheduleMs = 0;
                return;
            }
            long uptimeMs = SystemClock.uptimeMillis();
            // O if mPackageCleanup not running
            long elapsedDurationMs = mUptimeAtLastRescheduleMs == 0
                    ? 0 : uptimeMs - mUptimeAtLastRescheduleMs;
            // O if mPackageCleanup not running
            long remainingDurationMs = mDurationAtLastReschedule - elapsedDurationMs;

            if (mUptimeAtLastRescheduleMs == 0 || nextDurationToScheduleMs < remainingDurationMs) {
                // First schedule or an earlier reschedule
                pruneObservers(elapsedDurationMs);
                mTimerHandler.removeCallbacks(mPackageCleanup);
                mTimerHandler.postDelayed(mPackageCleanup, nextDurationToScheduleMs);
                mDurationAtLastReschedule = nextDurationToScheduleMs;
                mUptimeAtLastRescheduleMs = uptimeMs;
            }
        }
    }

    /**
     * Returns the earliest time a package should expire.
     * @returns Long#MAX_VALUE if there are no observed packages.
     */
    private long getEarliestPackageExpiryLocked() {
        long shortestDurationMs = Long.MAX_VALUE;
        for (ObserverInternal observer : mAllObservers.values()) {
            for (MonitoredPackage p : observer.mPackages.values()) {
                if (p.mDurationMs < shortestDurationMs) {
                    shortestDurationMs = p.mDurationMs;
                }
            }
        }
        Slog.v(TAG, "Earliest package time is " + shortestDurationMs);
        return shortestDurationMs;
    }

    /**
     * Removes {@code elapsedMs} milliseconds from all durations on monitored packages.
     * Discards expired packages and discards observers without any packages.
     */
    private void pruneObservers(long elapsedMs) {
        if (elapsedMs == 0) {
            return;
        }
        synchronized (mLock) {
            Slog.d(TAG, "Removing expired packages after " + elapsedMs + "ms");
            Iterator<ObserverInternal> it = mAllObservers.values().iterator();
            while (it.hasNext()) {
                ObserverInternal observer = it.next();
                if (!observer.updateMonitoringDurations(elapsedMs)) {
                    Slog.i(TAG, "Discarding observer " + observer.mName + ". All packages expired");
                    it.remove();
                }
            }
        }
        sendIoMessage(MESSAGE_SAVE_FILE);
    }

    /**
     * Loads mAllObservers from file.
     *
     * <p>Note that this is <b>not</b> thread safe and should only called be called
     * from the constructor.
     */
    private void loadFromFile() {
        InputStream infile = null;
        mAllObservers.clear();
        try {
            infile = mPolicyFile.openRead();
            final XmlPullParser parser = Xml.newPullParser();
            parser.setInput(infile, StandardCharsets.UTF_8.name());
            XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG);
            int outerDepth = parser.getDepth();
            while (XmlUtils.nextElementWithin(parser, outerDepth)) {
                ObserverInternal observer = ObserverInternal.read(parser);
                if (observer != null) {
                    mAllObservers.put(observer.mName, observer);
                }
            }
        } catch (FileNotFoundException e) {
            // Nothing to monitor
        } catch (IOException e) {
            Log.wtf(TAG, "Unable to read monitored packages", e);
        } catch (NumberFormatException e) {
            Log.wtf(TAG, "Unable to parse monitored package windows", e);
        } catch (XmlPullParserException e) {
            Log.wtf(TAG, "Unable to parse monitored packages", e);
        } finally {
            IoUtils.closeQuietly(infile);
        }
    }

    /**
     * Persists mAllObservers to file and ignores threshold information.
     *
     * <p>Note that this is <b>not</b> thread safe and should only be called on the
     * single threaded IoHandler.
     */
    private boolean saveToFile() {
        FileOutputStream stream;
        try {
            stream = mPolicyFile.startWrite();
        } catch (IOException e) {
            Slog.w(TAG, "Cannot update monitored packages", e);
            return false;
        }

        try {
            XmlSerializer out = new FastXmlSerializer();
            out.setOutput(stream, StandardCharsets.UTF_8.name());
            out.startDocument(null, true);
            out.startTag(null, TAG_PACKAGE_WATCHDOG);
            out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION));
            for (ObserverInternal observer : mAllObservers.values()) {
                observer.write(out);
            }
            out.endTag(null, TAG_PACKAGE_WATCHDOG);
            out.endDocument();
            mPolicyFile.finishWrite(stream);
            return true;
        } catch (IOException e) {
            Slog.w(TAG, "Failed to save monitored packages, restoring backup", e);
            mPolicyFile.failWrite(stream);
            return false;
        } finally {
            IoUtils.closeQuietly(stream);
        }
    }

    private void sendIoMessage(int what) {
        if (!mIoHandler.hasMessages(what)) {
            Message m = Message.obtain(mIoHandler, what);
            mIoHandler.sendMessage(m);
        }
    }

    /**
     * Represents an observer monitoring a set of packages along with the failure thresholds for
     * each package.
     */
    static class ObserverInternal {
        public final String mName;
        public final ArrayMap<String, MonitoredPackage> mPackages;

        ObserverInternal(String name, List<MonitoredPackage> packages) {
            mName = name;
            mPackages = new ArrayMap<>();
            updatePackages(packages);
        }

        /**
         * Writes important details to file. Doesn't persist any package failure thresholds.
         *
         * <p>Note that this method is <b>not</b> thread safe. It should only be called from
         * #saveToFile which runs on a single threaded handler.
         */
        public boolean write(XmlSerializer out) {
            try {
                out.startTag(null, TAG_OBSERVER);
                out.attribute(null, ATTR_NAME, mName);
                for (int i = 0; i < mPackages.size(); i++) {
                    MonitoredPackage p = mPackages.valueAt(i);
                    out.startTag(null, TAG_PACKAGE);
                    out.attribute(null, ATTR_NAME, p.mName);
                    out.attribute(null, ATTR_DURATION, String.valueOf(p.mDurationMs));
                    out.endTag(null, TAG_PACKAGE);
                }
                out.endTag(null, TAG_OBSERVER);
                return true;
            } catch (IOException e) {
                Slog.w(TAG, "Cannot save observer", e);
                return false;
            }
        }

        public void updatePackages(List<MonitoredPackage> packages) {
            synchronized (mName) {
                for (MonitoredPackage p : packages) {
                    mPackages.put(p.mName, p);
                }
            }
        }

        /**
         * Reduces the monitoring durations of all packages observed by this observer by
         *  {@code elapsedMs}. If any duration is less than 0, the package is removed from
         * observation.
         *
         * @returns {@code true} if there are still packages to be observed, {@code false} otherwise
         */
        public boolean updateMonitoringDurations(long elapsedMs) {
            List<MonitoredPackage> packages = new ArrayList<>();
            synchronized (mName) {
                Iterator<MonitoredPackage> it = mPackages.values().iterator();
                while (it.hasNext()) {
                    MonitoredPackage p = it.next();
                    long newDuration = p.mDurationMs - elapsedMs;
                    if (newDuration > 0) {
                        p.mDurationMs = newDuration;
                    } else {
                        it.remove();
                    }
                }
                return !mPackages.isEmpty();
            }
        }

        /**
         * Increments failure counts of {@code packageName}.
         * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise
         */
        public boolean onPackageFailure(String packageName) {
            synchronized (mName) {
                MonitoredPackage p = mPackages.get(packageName);
                if (p != null) {
                    return p.onFailure();
                }
                return false;
            }
        }

        /**
         * Returns one ObserverInternal from the {@code parser} and advances its state.
         *
         * <p>Note that this method is <b>not</b> thread safe. It should only be called from
         * #loadFromFile which in turn is only called on construction of the
         * singleton PackageWatchdog.
         **/
        public static ObserverInternal read(XmlPullParser parser) {
            String observerName = null;
            if (TAG_OBSERVER.equals(parser.getName())) {
                observerName = parser.getAttributeValue(null, ATTR_NAME);
                if (TextUtils.isEmpty(observerName)) {
                    return null;
                }
            }
            List<MonitoredPackage> packages = new ArrayList<>();
            int innerDepth = parser.getDepth();
            try {
                while (XmlUtils.nextElementWithin(parser, innerDepth)) {
                    if (TAG_PACKAGE.equals(parser.getName())) {
                        String packageName = parser.getAttributeValue(null, ATTR_NAME);
                        long duration = Long.parseLong(
                                parser.getAttributeValue(null, ATTR_DURATION));
                        if (!TextUtils.isEmpty(packageName)) {
                            packages.add(new MonitoredPackage(packageName, duration));
                        }
                    }
                }
            } catch (IOException e) {
                return null;
            } catch (XmlPullParserException e) {
                return null;
            }
            if (packages.isEmpty()) {
                return null;
            }
            return new ObserverInternal(observerName, packages);
        }
    }

    /** Represents a package along with the time it should be monitored for. */
    static class MonitoredPackage {
        public final String mName;
        // System uptime duration to monitor package
        public long mDurationMs;
        // System uptime of first package failure
        private long mUptimeStartMs;
        // Number of failures since mUptimeStartMs
        private int mFailures;

        MonitoredPackage(String name, long durationMs) {
            mName = name;
            mDurationMs = durationMs;
        }

        /**
         * Increment package failures or resets failure count depending on the last package failure.
         *
         * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise
         */
        public synchronized boolean onFailure() {
            final long now = SystemClock.uptimeMillis();
            final long duration = now - mUptimeStartMs;
            if (duration > TRIGGER_DURATION_MS) {
                // TODO(zezeozue): Reseting to 1 is not correct
                // because there may be more than 1 failure in the last trigger window from now
                // This is the RescueParty impl, will leave for now
                mFailures = 1;
                mUptimeStartMs = now;
            } else {
                mFailures++;
            }
            return mFailures >= TRIGGER_FAILURE_COUNT;
        }
    }

    private class IoHandler extends Handler {
        IoHandler(Looper looper) {
            super(looper);
        }

        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
                case MESSAGE_SAVE_FILE:
                    saveToFile();
                    break;
            }
        }
    }
}
+5 −1
Original line number Diff line number Diff line
@@ -336,6 +336,7 @@ import com.android.server.IoThread;
import com.android.server.LocalServices;
import com.android.server.LockGuard;
import com.android.server.NetworkManagementInternal;
import com.android.server.PackageWatchdog;
import com.android.server.RescueParty;
import com.android.server.ServiceThread;
import com.android.server.SystemConfig;
@@ -587,6 +588,7 @@ public class ActivityManagerService extends IActivityManager.Stub
    public final PendingIntentController mPendingIntentController;
    final AppErrors mAppErrors;
    final PackageWatchdog mPackageWatchdog;
    /**
     * Indicates the maximum time spent waiting for the network rules to get updated.
@@ -2209,6 +2211,7 @@ public class ActivityManagerService extends IActivityManager.Stub
        mContext = mInjector.getContext();
        mUiContext = null;
        mAppErrors = null;
        mPackageWatchdog = null;
        mActiveUids = new ActiveUids(this, false /* postChangesToAtm */);
        mAppOpsService = mInjector.getAppOpsService(null /* file */, null /* handler */);
        mBatteryStatsService = null;
@@ -2275,7 +2278,8 @@ public class ActivityManagerService extends IActivityManager.Stub
        mServices = new ActiveServices(this);
        mProviderMap = new ProviderMap(this);
        mAppErrors = new AppErrors(mUiContext, this);
        mPackageWatchdog = PackageWatchdog.getInstance(mUiContext);
        mAppErrors = new AppErrors(mUiContext, this, mPackageWatchdog);
        mActiveUids = new ActiveUids(this, true /* postChangesToAtm */);
        final File systemDir = SystemServiceManager.ensureSystemDir();
+22 −5

File changed.

Preview size limit exceeded, changes collapsed.

+2 −0
Original line number Diff line number Diff line
@@ -296,6 +296,7 @@ import com.android.server.EventLogTags;
import com.android.server.FgThread;
import com.android.server.LocalServices;
import com.android.server.LockGuard;
import com.android.server.PackageWatchdog;
import com.android.server.ServiceThread;
import com.android.server.SystemConfig;
import com.android.server.SystemServerInitThreadPool;
@@ -9492,6 +9493,7 @@ public class PackageManagerService extends IPackageManager.Stub
        mPackageUsage.writeNow(mPackages);
        mCompilerStats.writeNow();
        mDexManager.writePackageDexUsageNow();
        PackageWatchdog.getInstance(mContext).writeNow();
        // This is the last chance to write out pending restriction settings
        synchronized (mPackages) {