Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f8ab0938 authored by Andy Hung's avatar Andy Hung
Browse files

TimeCheck: Add second chance queue

Split the timeout into two waits.

This reduces the chance of false timeout when the monotonic clock
advances without an active CPU during an aborted suspend.

Removed default arguments for TimeCheck constructor
to avoid accidental argument confusion.

Test: overnight stress test
Test: atest libmediautils_test
Test: atest timecheck_tests
Bug: 227594853
Change-Id: If6507a053e5bf15ddd3a3f8f53bdc0d3643e5924
parent 2aa1510f
Loading
Loading
Loading
Loading
+13 −9
Original line number Diff line number Diff line
@@ -139,12 +139,13 @@ std::string TimeCheck::toString() {
    return getTimeCheckThread().toString();
}

TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, uint32_t requestedTimeoutMs,
        bool crashOnTimeout)
TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration,
        Duration secondChanceDuration, bool crashOnTimeout)
    : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>(
            tag, std::move(onTimer), crashOnTimeout, std::chrono::milliseconds(requestedTimeoutMs),
            std::chrono::system_clock::now(), gettid()) }
    , mTimerHandle(requestedTimeoutMs == 0
            tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration,
            secondChanceDuration, std::chrono::system_clock::now(), gettid()) }
    , mTimerHandle(requestedTimeoutDuration.count() == 0
              /* for TimeCheck we don't consider a non-zero secondChanceDuration here */
              ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag)
              : getTimeCheckThread().scheduleTask(
                      mTimeCheckHandler->tag,
@@ -154,7 +155,8 @@ TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, uint32_t reque
                      [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) {
                          timeCheckHandler->onTimeout(timerHandle);
                      },
                      std::chrono::milliseconds(requestedTimeoutMs))) {}
                      requestedTimeoutDuration,
                      secondChanceDuration)) {}

TimeCheck::~TimeCheck() {
    if (mTimeCheckHandler) {
@@ -228,6 +230,8 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con
            endSystemTime - startSystemTime).count();
    const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>(
            timeoutDuration).count();
    const float secondChanceMs = std::chrono::duration_cast<FloatMs>(
            secondChanceDuration).count();

    if (onTimer) {
        onTimer(true /* timeout */, elapsedSteadyMs);
@@ -262,8 +266,8 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con
            .append(tag)
            .append(" scheduled ").append(formatTime(startSystemTime))
            .append(" on thread ").append(std::to_string(tid)).append("\n")
            .append(analyzeTimeouts(
                    requestedTimeoutMs, elapsedSteadyMs, elapsedSystemMs)).append("\n")
            .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs,
                    elapsedSteadyMs, elapsedSystemMs)).append("\n")
            .append(halPids).append("\n")
            .append(summary);

@@ -295,7 +299,7 @@ mediautils::TimeCheck makeTimeCheckStatsForClassMethod(
                    } else {
                        stats->event(safeMethodName.asStringView(), elapsedMs);
                    }
            }, 0 /* requestedTimeoutMs */);
            }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */);
}

}  // namespace android::mediautils
+94 −16
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@

#include <mediautils/MediaUtilsDelayed.h>
#include <mediautils/TimerThread.h>
#include <utils/Log.h>
#include <utils/ThreadDefs.h>

using namespace std::chrono_literals;
@@ -33,17 +34,19 @@ extern std::string formatTime(std::chrono::system_clock::time_point t);
extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);

TimerThread::Handle TimerThread::scheduleTask(
        std::string_view tag, TimerCallback&& func, Duration timeoutDuration) {
        std::string_view tag, TimerCallback&& func,
        Duration timeoutDuration, Duration secondChanceDuration) {
    const auto now = std::chrono::system_clock::now();
    auto request = std::make_shared<const Request>(now, now +
            std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
            gettid(), tag);
            secondChanceDuration, gettid(), tag);
    return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
}

TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
    const auto now = std::chrono::system_clock::now();
    auto request = std::make_shared<const Request>(now, now, gettid(), tag);
    auto request = std::make_shared<const Request>(now, now,
            Duration{} /* secondChanceDuration */, gettid(), tag);
    return mNoTimeoutMap.add(std::move(request));
}

@@ -86,6 +89,8 @@ std::string TimerThread::toString(size_t retiredCount) const {

    return std::string("now ")
            .append(formatTime(std::chrono::system_clock::now()))
            .append("\nsecondChanceCount ")
            .append(std::to_string(mMonitorThread.getSecondChanceCount()))
            .append(analysisSummary)
            .append("\ntimeout [ ")
            .append(requestsToString(timeoutRequests))
@@ -288,16 +293,60 @@ TimerThread::MonitorThread::~MonitorThread() {
void TimerThread::MonitorThread::threadFunc() {
    std::unique_lock _l(mMutex);
    while (!mShouldExit) {
        Handle nextDeadline = INVALID_HANDLE;
        Handle now = INVALID_HANDLE;
        if (!mMonitorRequests.empty()) {
            Handle nextDeadline = mMonitorRequests.begin()->first;
            if (nextDeadline < std::chrono::steady_clock::now()) {
            nextDeadline = mMonitorRequests.begin()->first;
            now = std::chrono::steady_clock::now();
            if (nextDeadline < now) {
                auto node = mMonitorRequests.extract(mMonitorRequests.begin());
                // Deadline has expired, handle the request.
                auto secondChanceDuration = node.mapped().first->secondChanceDuration;
                if (secondChanceDuration.count() != 0) {
                    // We now apply the second chance duration to find the clock
                    // monotonic second deadline.  The unique key is then the
                    // pair<second_deadline, first_deadline>.
                    //
                    // The second chance prevents a false timeout should there be
                    // any clock monotonic advancement during suspend.
                    auto newHandle = now + secondChanceDuration;
                    ALOGD("%s: TimeCheck second chance applied for %s",
                            __func__, node.mapped().first->tag.c_str()); // should be rare event.
                    mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
                            std::make_pair(newHandle, nextDeadline),
                            std::move(node.mapped()));
                    // increment second chance counter.
                    mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
                } else {
                    {
                    auto node = mMonitorRequests.extract(mMonitorRequests.begin());
                        _l.unlock();
                        // We add Request to retired queue early so that it can be dumped out.
                        mTimeoutQueue.add(std::move(node.mapped().first));
                        node.mapped().second(nextDeadline);
                        // Caution: we don't hold lock when we call TimerCallback,
                        // but this is the timeout case!  We will crash soon,
                        // maybe before returning.
                        // anything left over is released here outside lock.
                    }
                    // reacquire the lock - if something was added, we loop immediately to check.
                    _l.lock();
                }
                // always process expiring monitor requests first.
                continue;
            }
        }
        // now process any second chance requests.
        if (!mSecondChanceRequests.empty()) {
            Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
            if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
            if (secondDeadline < now) {
                auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
                {
                    _l.unlock();
                    // We add Request to retired queue early so that it can be dumped out.
                    mTimeoutQueue.add(std::move(node.mapped().first));
                    const Handle originalHandle = node.key().second;
                    node.mapped().second(originalHandle);
                    // Caution: we don't hold lock when we call TimerCallback.
                    // This is benign issue - we permit concurrent operations
                    // while in the callback to the MonitorQueue.
@@ -308,6 +357,14 @@ void TimerThread::MonitorThread::threadFunc() {
                _l.lock();
                continue;
            }
            // update the deadline.
            if (nextDeadline == INVALID_HANDLE) {
                nextDeadline = secondDeadline;
            } else {
                nextDeadline = std::min(nextDeadline, secondDeadline);
            }
        }
        if (nextDeadline != INVALID_HANDLE) {
            mCond.wait_until(_l, nextDeadline);
        } else {
            mCond.wait(_l);
@@ -319,22 +376,36 @@ TimerThread::Handle TimerThread::MonitorThread::add(
        std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
    std::lock_guard _l(mMutex);
    const Handle handle = getUniqueHandle_l(timeout);
    mMonitorRequests.emplace(handle, std::make_pair(std::move(request), std::move(func)));
    mMonitorRequests.emplace_hint(mMonitorRequests.end(),
            handle, std::make_pair(std::move(request), std::move(func)));
    mCond.notify_all();
    return handle;
}

std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
    std::pair<std::shared_ptr<const Request>, TimerCallback> data;
    std::unique_lock ul(mMutex);
    const auto it = mMonitorRequests.find(handle);
    if (it == mMonitorRequests.end()) {
        return {};
    }
    std::shared_ptr<const TimerThread::Request> request = std::move(it->second.first);
    TimerCallback func = std::move(it->second.second);
    if (const auto it = mMonitorRequests.find(handle);
        it != mMonitorRequests.end()) {
        data = std::move(it->second);
        mMonitorRequests.erase(it);
    ul.unlock();  // manually release lock here so func is released outside of lock.
    return request;
        ul.unlock();  // manually release lock here so func (data.second)
                      // is released outside of lock.
        return data.first;  // request
    }

    // this check is O(N), but since the second chance requests are ordered
    // in terms of earliest expiration time, we would expect better than average results.
    for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
        if (it->first.second == handle) {
            data = std::move(it->second);
            mSecondChanceRequests.erase(it);
            ul.unlock();  // manually release lock here so func (data.second)
                          // is released outside of lock.
            return data.first; // request
        }
    }
    return {};
}

void TimerThread::MonitorThread::copyRequests(
@@ -343,6 +414,13 @@ void TimerThread::MonitorThread::copyRequests(
    for (const auto &[deadline, monitorpair] : mMonitorRequests) {
        requests.emplace_back(monitorpair.first);
    }
    // we combine the second map with the first map - this is
    // everything that is pending on the monitor thread.
    // The second map will be older than the first map so this
    // is in order.
    for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
        requests.emplace_back(monitorpair.first);
    }
}

}  // namespace android::mediautils
+3 −1
Original line number Diff line number Diff line
@@ -48,7 +48,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
    std::string name = data_provider.ConsumeRandomLengthString(kMaxStringLen);

    // 3. The constructor, which is fuzzed here:
    android::mediautils::TimeCheck timeCheck(name.c_str(), {} /* onTimer */, timeoutMs);
    android::mediautils::TimeCheck timeCheck(name.c_str(), {} /* onTimer */,
            std::chrono::milliseconds(timeoutMs),
            {} /* secondChanceDuration */, true /* crashOnTimeout */);
    // We will leave some buffer to avoid sleeping too long
    uint8_t sleep_amount_ms = data_provider.ConsumeIntegralInRange<uint8_t>(0, timeoutMs / 2);

+21 −5
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@

#pragma once

#include <chrono>
#include <vector>

#include <mediautils/TimerThread.h>
@@ -44,7 +45,16 @@ class TimeCheck {
    // The default timeout is chosen to be less than system server watchdog timeout
    // Note: kDefaultTimeOutMs should be no less than 2 seconds, otherwise spurious timeouts
    // may occur with system suspend.
    static constexpr uint32_t kDefaultTimeOutMs = 5000;
    static constexpr TimeCheck::Duration kDefaultTimeoutDuration = std::chrono::milliseconds(3000);

    // Due to suspend abort not incrementing the monotonic clock,
    // we allow another second chance timeout after the first timeout expires.
    //
    // The total timeout is therefore kDefaultTimeoutDuration + kDefaultSecondChanceDuration,
    // and the result is more stable when the monotonic clock increments during suspend.
    //
    static constexpr TimeCheck::Duration kDefaultSecondChanceDuration =
            std::chrono::milliseconds(2000);

    /**
     * TimeCheck is a RAII object which will notify a callback
@@ -64,14 +74,18 @@ class TimeCheck {
     *                  to block for callback completion if it is already in progress
     *                  (for maximum concurrency and reduced deadlock potential), so use proper
     *                  lifetime analysis (e.g. shared or weak pointers).
     * \param requestedTimeoutMs timeout in milliseconds.
     * \param requestedTimeoutDuration timeout in milliseconds.
     *                  A zero timeout means no timeout is set -
     *                  the callback is called only when
     *                  the TimeCheck object is destroyed or leaves scope.
     * \param secondChanceDuration additional milliseconds to wait if the first timeout expires.
     *                  This is used to prevent false timeouts if the steady (monotonic)
     *                  clock advances on aborted suspend.
     * \param crashOnTimeout true if the object issues an abort on timeout.
     */
    explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer = {},
            uint32_t requestedTimeoutMs = kDefaultTimeOutMs, bool crashOnTimeout = true);
    explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer,
            Duration requestedTimeoutDuration, Duration secondChanceDuration,
            bool crashOnTimeout);

    TimeCheck() = default;
    // Remove copy constructors as there should only be one call to the destructor.
@@ -91,13 +105,14 @@ class TimeCheck {
    public:
        template <typename S, typename F>
        TimeCheckHandler(S&& _tag, F&& _onTimer, bool _crashOnTimeout,
            Duration _timeoutDuration,
            Duration _timeoutDuration, Duration _secondChanceDuration,
            std::chrono::system_clock::time_point _startSystemTime,
            pid_t _tid)
            : tag(std::forward<S>(_tag))
            , onTimer(std::forward<F>(_onTimer))
            , crashOnTimeout(_crashOnTimeout)
            , timeoutDuration(_timeoutDuration)
            , secondChanceDuration(_secondChanceDuration)
            , startSystemTime(_startSystemTime)
            , tid(_tid)
            {}
@@ -105,6 +120,7 @@ class TimeCheck {
        const OnTimerFunc onTimer;
        const bool crashOnTimeout;
        const Duration timeoutDuration;
        const Duration secondChanceDuration;
        const std::chrono::system_clock::time_point startSystemTime;
        const pid_t tid;

+25 −3
Original line number Diff line number Diff line
@@ -130,7 +130,8 @@ class TimerThread {
     * \returns       a handle that can be used for cancellation.
     */
    Handle scheduleTask(
            std::string_view tag, TimerCallback&& func, Duration timeoutDuration);
            std::string_view tag, TimerCallback&& func,
            Duration timeoutDuration, Duration secondChanceDuration);

    /**
     * Tracks a task that shows up on toString() until cancelled.
@@ -204,24 +205,30 @@ class TimerThread {
  private:
    // To minimize movement of data, we pass around shared_ptrs to Requests.
    // These are allocated and deallocated outside of the lock.
    // TODO(b/243839867) consider options to merge Request with the
    // TimeCheck::TimeCheckHandler struct.
    struct Request {
        Request(std::chrono::system_clock::time_point _scheduled,
                std::chrono::system_clock::time_point _deadline,
                Duration _secondChanceDuration,
                pid_t _tid,
                std::string_view _tag)
            : scheduled(_scheduled)
            , deadline(_deadline)
            , secondChanceDuration(_secondChanceDuration)
            , tid(_tid)
            , tag(_tag)
            {}

        const std::chrono::system_clock::time_point scheduled;
        const std::chrono::system_clock::time_point deadline; // deadline := scheduled + timeout
        const std::chrono::system_clock::time_point deadline; // deadline := scheduled
                                                              // + timeoutDuration
                                                              // + secondChanceDuration
                                                              // if deadline == scheduled, no
                                                              // timeout, task not executed.
        Duration secondChanceDuration;
        const pid_t tid;
        const FixedString62 tag;

        std::string toString() const;
    };

@@ -270,6 +277,7 @@ class TimerThread {
    // call on timeout.
    // This class is thread-safe.
    class MonitorThread {
        std::atomic<size_t> mSecondChanceCount{};
        mutable std::mutex mMutex;
        mutable std::condition_variable mCond GUARDED_BY(mMutex);

@@ -278,6 +286,17 @@ class TimerThread {
        std::map<Handle, std::pair<std::shared_ptr<const Request>, TimerCallback>>
                mMonitorRequests GUARDED_BY(mMutex);

        // Due to monotonic/steady clock inaccuracies during suspend,
        // we allow an additional second chance waiting time to prevent
        // false removal.

        // This mSecondChanceRequests queue is almost always empty.
        // Using a pair with the original handle allows lookup and keeps
        // the Key unique.
        std::map<std::pair<Handle /* new */, Handle /* original */>,
                std::pair<std::shared_ptr<const Request>, TimerCallback>>
                        mSecondChanceRequests GUARDED_BY(mMutex);

        RequestQueue& mTimeoutQueue; // locked internally, added to when request times out.

        // Worker thread variables
@@ -302,6 +321,9 @@ class TimerThread {
                Duration timeout);
        std::shared_ptr<const Request> remove(Handle handle);
        void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
        size_t getSecondChanceCount() const {
            return mSecondChanceCount.load(std::memory_order_relaxed);
        }
    };

    // Analysis contains info deduced by analysisTimeout().
Loading