Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3a88d55f authored by Alex Buynytskyy's avatar Alex Buynytskyy Committed by Android (Google) Code Review
Browse files

Merge "DL lifecycle: handle slow DL binding." into sc-dev

parents b286e715 7e06d712
Loading
Loading
Loading
Loading
+10 −8
Original line number Diff line number Diff line
@@ -23,32 +23,34 @@ package android.content.pm;
oneway interface IDataLoaderStatusListener {
    /** The DataLoader process died, binder disconnected or class destroyed. */
    const int DATA_LOADER_DESTROYED = 0;
    /** The system is in process of binding to the DataLoader. */
    const int DATA_LOADER_BINDING = 1;
    /** DataLoader process is running and bound to. */
    const int DATA_LOADER_BOUND = 1;
    const int DATA_LOADER_BOUND = 2;
    /** DataLoader has handled onCreate(). */
    const int DATA_LOADER_CREATED = 2;
    const int DATA_LOADER_CREATED = 3;

    /** DataLoader can receive missing pages and read pages notifications,
     *  and ready to provide data. */
    const int DATA_LOADER_STARTED = 3;
    const int DATA_LOADER_STARTED = 4;
    /** DataLoader no longer ready to provide data and is not receiving
    *   any notifications from IncFS. */
    const int DATA_LOADER_STOPPED = 4;
    const int DATA_LOADER_STOPPED = 5;

    /** DataLoader streamed everything necessary to continue installation. */
    const int DATA_LOADER_IMAGE_READY = 5;
    const int DATA_LOADER_IMAGE_READY = 6;
    /** Installation can't continue as DataLoader failed to stream necessary data. */
    const int DATA_LOADER_IMAGE_NOT_READY = 6;
    const int DATA_LOADER_IMAGE_NOT_READY = 7;

    /** DataLoader instance can't run at the moment, but might recover later.
     *  It's up to system to decide if the app is still usable. */
    const int DATA_LOADER_UNAVAILABLE = 7;
    const int DATA_LOADER_UNAVAILABLE = 8;

    /** DataLoader reports that this instance is invalid and can never be restored.
    *   Warning: this is a terminal status that data loader should use carefully and
    *            the system should almost never use - e.g. only if all recovery attempts
    *            fail and all retry limits are exceeded. */
    const int DATA_LOADER_UNRECOVERABLE = 8;
    const int DATA_LOADER_UNRECOVERABLE = 9;

    /** There are no known issues with the data stream. */
    const int STREAM_HEALTHY = 0;
+2 −0
Original line number Diff line number Diff line
@@ -180,6 +180,8 @@ public class DataLoaderManagerService extends SystemService {
            mId = id;
            mListener = listener;
            mDataLoader = null;

            callListener(IDataLoaderStatusListener.DATA_LOADER_BINDING);
        }

        @Override
+6 −4
Original line number Diff line number Diff line
@@ -1011,8 +1011,9 @@ public class PackageInstallerSession extends IPackageInstallerSession.Stub {
                        "DataLoader installation of APEX modules is not allowed.");
            }

            if (this.params.dataLoaderParams.getComponentName().getPackageName()
                    == SYSTEM_DATA_LOADER_PACKAGE && mContext.checkCallingOrSelfPermission(
            boolean systemDataLoader = SYSTEM_DATA_LOADER_PACKAGE.equals(
                    this.params.dataLoaderParams.getComponentName().getPackageName());
            if (systemDataLoader && mContext.checkCallingOrSelfPermission(
                    Manifest.permission.USE_SYSTEM_DATA_LOADERS)
                    != PackageManager.PERMISSION_GRANTED) {
                throw new SecurityException("You need the "
@@ -3653,6 +3654,7 @@ public class PackageInstallerSession extends IPackageInstallerSession.Stub {
            @Override
            public void onStatusChanged(int dataLoaderId, int status) {
                switch (status) {
                    case IDataLoaderStatusListener.DATA_LOADER_BINDING:
                    case IDataLoaderStatusListener.DATA_LOADER_STOPPED:
                    case IDataLoaderStatusListener.DATA_LOADER_DESTROYED:
                        return;
@@ -3763,8 +3765,8 @@ public class PackageInstallerSession extends IPackageInstallerSession.Stub {
            healthCheckParams.unhealthyTimeoutMs = INCREMENTAL_STORAGE_UNHEALTHY_TIMEOUT_MS;
            healthCheckParams.unhealthyMonitoringMs = INCREMENTAL_STORAGE_UNHEALTHY_MONITORING_MS;

            final boolean systemDataLoader =
                    params.getComponentName().getPackageName() == SYSTEM_DATA_LOADER_PACKAGE;
            final boolean systemDataLoader = SYSTEM_DATA_LOADER_PACKAGE.equals(
                    params.getComponentName().getPackageName());

            final IStorageHealthListener healthListener = new IStorageHealthListener.Stub() {
                @Override
+79 −11
Original line number Diff line number Diff line
@@ -74,6 +74,13 @@ struct Constants {

    // If DL was up and not crashing for 10mins, we consider it healthy and reset all delays.
    static constexpr auto healthyDataLoaderUptime = 10min;

    // For healthy DLs, we'll retry every ~5secs for ~10min
    static constexpr auto bindRetryInterval = 5s;
    static constexpr auto bindGracePeriod = 10min;

    static constexpr auto bindingTimeout = 1min;

    // 10s, 100s (~2min), 1000s (~15min), 10000s (~3hrs)
    static constexpr auto minBindDelay = 10s;
    static constexpr auto maxBindDelay = 10000s;
@@ -293,6 +300,7 @@ IncrementalService::IncrementalService(ServiceManagerWrapper&& sm, std::string_v
        mTimedQueue(sm.getTimedQueue()),
        mProgressUpdateJobQueue(sm.getProgressUpdateJobQueue()),
        mFs(sm.getFs()),
        mClock(sm.getClock()),
        mIncrementalDir(rootDir) {
    CHECK(mVold) << "Vold service is unavailable";
    CHECK(mDataLoaderManager) << "DataLoaderManagerService is unavailable";
@@ -302,6 +310,7 @@ IncrementalService::IncrementalService(ServiceManagerWrapper&& sm, std::string_v
    CHECK(mTimedQueue) << "TimedQueue is unavailable";
    CHECK(mProgressUpdateJobQueue) << "mProgressUpdateJobQueue is unavailable";
    CHECK(mFs) << "Fs is unavailable";
    CHECK(mClock) << "Clock is unavailable";

    mJobQueue.reserve(16);
    mJobProcessor = std::thread([this]() {
@@ -2241,17 +2250,44 @@ void IncrementalService::DataLoaderStub::setTargetStatusLocked(int status) {
               << status << " (current " << mCurrentStatus << ")";
}

Milliseconds IncrementalService::DataLoaderStub::updateBindDelay() {
std::optional<Milliseconds> IncrementalService::DataLoaderStub::needToBind() {
    std::unique_lock lock(mMutex);

    const auto now = mService.mClock->now();
    const bool healthy = (mPreviousBindDelay == 0ms);

    if (mCurrentStatus == IDataLoaderStatusListener::DATA_LOADER_BINDING &&
        now - mCurrentStatusTs <= Constants::bindingTimeout) {
        LOG(INFO) << "Binding still in progress. "
                  << (healthy ? "The DL is healthy/freshly bound, ok to retry for a few times."
                              : "Already unhealthy, don't do anything.");
        // Binding still in progress.
        if (!healthy) {
            // Already unhealthy, don't do anything.
            return {};
        }
        // The DL is healthy/freshly bound, ok to retry for a few times.
        if (now - mPreviousBindTs <= Constants::bindGracePeriod) {
            // Still within grace period.
            if (now - mCurrentStatusTs >= Constants::bindRetryInterval) {
                // Retry interval passed, retrying.
                mCurrentStatusTs = now;
                mPreviousBindDelay = 0ms;
                return 0ms;
            }
            return {};
        }
        // fallthrough, mark as unhealthy, and retry with delay
    }

    const auto previousBindTs = mPreviousBindTs;
    const auto now = Clock::now();
    mPreviousBindTs = now;

    const auto nonCrashingInterval = std::max(castToMs(now - previousBindTs), 100ms);
    if (previousBindTs.time_since_epoch() == Clock::duration::zero() ||
        nonCrashingInterval > Constants::healthyDataLoaderUptime) {
        mPreviousBindDelay = 0ms;
        return mPreviousBindDelay;
        return 0ms;
    }

    constexpr auto minBindDelayMs = castToMs(Constants::minBindDelay);
@@ -2264,12 +2300,16 @@ Milliseconds IncrementalService::DataLoaderStub::updateBindDelay() {
    const auto bindDelayJitterRangeMs = bindDelayMs / Constants::bindDelayJitterDivider;
    const auto bindDelayJitterMs = rand() % (bindDelayJitterRangeMs * 2) - bindDelayJitterRangeMs;
    mPreviousBindDelay = std::chrono::milliseconds(bindDelayMs + bindDelayJitterMs);

    return mPreviousBindDelay;
}

bool IncrementalService::DataLoaderStub::bind() {
    const auto bindDelay = updateBindDelay();
    const auto maybeBindDelay = needToBind();
    if (!maybeBindDelay) {
        LOG(DEBUG) << "Skipping bind to " << mParams.packageName << " because of pending bind.";
        return true;
    }
    const auto bindDelay = *maybeBindDelay;
    if (bindDelay > 1s) {
        LOG(INFO) << "Delaying bind to " << mParams.packageName << " by "
                  << bindDelay.count() / 1000 << "s";
@@ -2279,7 +2319,21 @@ bool IncrementalService::DataLoaderStub::bind() {
    auto status = mService.mDataLoaderManager->bindToDataLoader(id(), mParams, bindDelay.count(),
                                                                this, &result);
    if (!status.isOk() || !result) {
        LOG(ERROR) << "Failed to bind a data loader for mount " << id();
        const bool healthy = (bindDelay == 0ms);
        LOG(ERROR) << "Failed to bind a data loader for mount " << id()
                   << (healthy ? ", retrying." : "");

        // Internal error, retry for healthy/new DLs.
        // Let needToBind migrate it to unhealthy after too many retries.
        if (healthy) {
            if (mService.addTimedJob(*mService.mTimedQueue, id(), Constants::bindRetryInterval,
                                     [this]() { fsmStep(); })) {
                // Mark as binding so that we know it's not the DL's fault.
                setCurrentStatus(IDataLoaderStatusListener::DATA_LOADER_BINDING);
                return true;
            }
        }

        return false;
    }
    return true;
@@ -2339,8 +2393,15 @@ bool IncrementalService::DataLoaderStub::fsmStep() {
            // Do nothing, this is a reset state.
            break;
        case IDataLoaderStatusListener::DATA_LOADER_DESTROYED: {
            switch (currentStatus) {
                case IDataLoaderStatusListener::DATA_LOADER_BINDING:
                    setCurrentStatus(IDataLoaderStatusListener::DATA_LOADER_DESTROYED);
                    return true;
                default:
                    return destroy();
            }
            break;
        }
        case IDataLoaderStatusListener::DATA_LOADER_STARTED: {
            switch (currentStatus) {
                case IDataLoaderStatusListener::DATA_LOADER_CREATED:
@@ -2353,6 +2414,7 @@ bool IncrementalService::DataLoaderStub::fsmStep() {
            switch (currentStatus) {
                case IDataLoaderStatusListener::DATA_LOADER_DESTROYED:
                case IDataLoaderStatusListener::DATA_LOADER_UNAVAILABLE:
                case IDataLoaderStatusListener::DATA_LOADER_BINDING:
                    return bind();
                case IDataLoaderStatusListener::DATA_LOADER_BOUND:
                    return create();
@@ -2372,7 +2434,8 @@ binder::Status IncrementalService::DataLoaderStub::onStatusChanged(MountId mount
                fromServiceSpecificError(-EINVAL, "onStatusChange came to invalid DataLoaderStub");
    }
    if (id() != mountId) {
        LOG(ERROR) << "Mount ID mismatch: expected " << id() << ", but got: " << mountId;
        LOG(ERROR) << "onStatusChanged: mount ID mismatch: expected " << id()
                   << ", but got: " << mountId;
        return binder::Status::fromServiceSpecificError(-EPERM, "Mount ID mismatch.");
    }
    if (newStatus == IDataLoaderStatusListener::DATA_LOADER_UNRECOVERABLE) {
@@ -2396,11 +2459,13 @@ void IncrementalService::DataLoaderStub::setCurrentStatus(int newStatus) {
        }

        oldStatus = mCurrentStatus;
        mCurrentStatus = newStatus;
        targetStatus = mTargetStatus;

        listener = mStatusListener;

        // Change the status.
        mCurrentStatus = newStatus;
        mCurrentStatusTs = mService.mClock->now();

        if (mCurrentStatus == IDataLoaderStatusListener::DATA_LOADER_UNAVAILABLE ||
            mCurrentStatus == IDataLoaderStatusListener::DATA_LOADER_UNRECOVERABLE) {
            // For unavailable, unbind from DataLoader to ensure proper re-commit.
@@ -2428,7 +2493,8 @@ binder::Status IncrementalService::DataLoaderStub::reportStreamHealth(MountId mo
                                         "reportStreamHealth came to invalid DataLoaderStub");
    }
    if (id() != mountId) {
        LOG(ERROR) << "Mount ID mismatch: expected " << id() << ", but got: " << mountId;
        LOG(ERROR) << "reportStreamHealth: mount ID mismatch: expected " << id()
                   << ", but got: " << mountId;
        return binder::Status::fromServiceSpecificError(-EPERM, "Mount ID mismatch.");
    }
    {
@@ -2694,6 +2760,8 @@ static std::string toHexString(const RawMetadata& metadata) {
void IncrementalService::DataLoaderStub::onDump(int fd) {
    dprintf(fd, "    dataLoader: {\n");
    dprintf(fd, "      currentStatus: %d\n", mCurrentStatus);
    dprintf(fd, "      currentStatusTs: %lldmcs\n",
            (long long)(elapsedMcs(mCurrentStatusTs, Clock::now())));
    dprintf(fd, "      targetStatus: %d\n", mTargetStatus);
    dprintf(fd, "      targetStatusTs: %lldmcs\n",
            (long long)(elapsedMcs(mTargetStatusTs, Clock::now())));
+6 −1
Original line number Diff line number Diff line
@@ -267,7 +267,10 @@ private:
        BootClockTsUs getOldestTsFromLastPendingReads();
        Milliseconds elapsedMsSinceKernelTs(TimePoint now, BootClockTsUs kernelTsUs);

        Milliseconds updateBindDelay();
        // If the stub has to bind to the DL.
        // Returns {} if bind operation is already in progress.
        // Or bind delay in ms.
        std::optional<Milliseconds> needToBind();

        void registerForPendingReads();
        void unregisterFromPendingReads();
@@ -283,6 +286,7 @@ private:

        std::condition_variable mStatusCondition;
        int mCurrentStatus = content::pm::IDataLoaderStatusListener::DATA_LOADER_DESTROYED;
        TimePoint mCurrentStatusTs = {};
        int mTargetStatus = content::pm::IDataLoaderStatusListener::DATA_LOADER_DESTROYED;
        TimePoint mTargetStatusTs = {};

@@ -443,6 +447,7 @@ private:
    const std::unique_ptr<TimedQueueWrapper> mTimedQueue;
    const std::unique_ptr<TimedQueueWrapper> mProgressUpdateJobQueue;
    const std::unique_ptr<FsWrapper> mFs;
    const std::unique_ptr<ClockWrapper> mClock;
    const std::string mIncrementalDir;

    mutable std::mutex mLock;
Loading