Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit df1ed5c6 authored by Andy Hung's avatar Andy Hung
Browse files

TimeCheck: Use clock monotonic for elapsed time

The timeout deadline uses clock monotonic.

Update the elapsed time passed to the callbacks to
use clock monotonic as well.  Clock monotonic counts only the
time the CPU is active (vs suspend) and is a better estimate
for performance monitoring.

Test: overnight stress test
Test: atest libmediautils_test
Test: atest timecheck_tests
Bug: 227594853
Merged-In: Ie35de689245a04ba50f4d4f04a10e2e0ded2293b
Change-Id: Ie35de689245a04ba50f4d4f04a10e2e0ded2293b
parent c3238c22
Loading
Loading
Loading
Loading
+71 −17
Original line number Diff line number Diff line
@@ -139,22 +139,22 @@ std::string TimeCheck::toString() {
    return getTimeCheckThread().toString();
}

TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, uint32_t timeoutMs,
TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, uint32_t requestedTimeoutMs,
        bool crashOnTimeout)
    : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>(
            tag, std::move(onTimer), crashOnTimeout,
            tag, std::move(onTimer), crashOnTimeout, std::chrono::milliseconds(requestedTimeoutMs),
            std::chrono::system_clock::now(), gettid()) }
    , mTimerHandle(timeoutMs == 0
    , mTimerHandle(requestedTimeoutMs == 0
              ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag)
              : getTimeCheckThread().scheduleTask(
                      mTimeCheckHandler->tag,
                      // Pass in all the arguments by value to this task for safety.
                      // The thread could call the callback before the constructor is finished.
                      // The destructor is not blocked on callback.
                      [ timeCheckHandler = mTimeCheckHandler ] {
                          timeCheckHandler->onTimeout();
                      [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) {
                          timeCheckHandler->onTimeout(timerHandle);
                      },
                      std::chrono::milliseconds(timeoutMs))) {}
                      std::chrono::milliseconds(requestedTimeoutMs))) {}

TimeCheck::~TimeCheck() {
    if (mTimeCheckHandler) {
@@ -162,23 +162,75 @@ TimeCheck::~TimeCheck() {
    }
}

/* static */
std::string TimeCheck::analyzeTimeouts(
        float requestedTimeoutMs, float elapsedSteadyMs, float elapsedSystemMs) {
    // Track any OS clock issues with suspend.
    // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if
    // a suspend occurs; however, we always expect the timeout ms should always be slightly
    // less than the elapsed steady ms regardless of whether a suspend occurs or not.

    std::string s("Timeout ms ");
    s.append(std::to_string(requestedTimeoutMs))
        .append(" elapsed steady ms ").append(std::to_string(elapsedSteadyMs))
        .append(" elapsed system ms ").append(std::to_string(elapsedSystemMs));

    // Is there something unusual?
    static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f;

    if (requestedTimeoutMs > elapsedSteadyMs || requestedTimeoutMs > elapsedSystemMs) {
        s.append("\nError: early expiration - "
                "requestedTimeoutMs should be less than elapsed time");
    }

    if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) {
        s.append("\nWarning: steady time should not advance faster than system time");
    }

    // This has been found in suspend stress testing.
    if (elapsedSteadyMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) {
        s.append("\nWarning: steady time significantly exceeds timeout "
                "- possible thread stall or aborted suspend");
    }

    // This has been found in suspend stress testing.
    if (elapsedSystemMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) {
        s.append("\nInformation: system time significantly exceeds timeout "
                "- possible suspend");
    }
    return s;
}

// To avoid any potential race conditions, the timer handle
// (expiration = clock steady start + timeout) is passed into the callback.
void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const
{
    if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) {
        const std::chrono::system_clock::time_point endTime = std::chrono::system_clock::now();
        onTimer(false /* timeout */,
                std::chrono::duration_cast<std::chrono::duration<float, std::milli>>(
                        endTime - startTime).count());
        const std::chrono::steady_clock::time_point endSteadyTime =
                std::chrono::steady_clock::now();
        const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>(
                endSteadyTime - timerHandle + timeoutDuration).count();
        // send the elapsed steady time for statistics.
        onTimer(false /* timeout */, elapsedSteadyMs);
    }
}

void TimeCheck::TimeCheckHandler::onTimeout() const
// To avoid any potential race conditions, the timer handle
// (expiration = clock steady start + timeout) is passed into the callback.
void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const
{
    const std::chrono::system_clock::time_point endTime = std::chrono::system_clock::now();
    const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now();
    const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now();
    // timerHandle incorporates the timeout
    const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>(
            endSteadyTime - (timerHandle - timeoutDuration)).count();
    const float elapsedSystemMs = std::chrono::duration_cast<FloatMs>(
            endSystemTime - startSystemTime).count();
    const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>(
            timeoutDuration).count();

    if (onTimer) {
        onTimer(true /* timeout */,
                std::chrono::duration_cast<std::chrono::duration<float, std::milli>>(
                        endTime - startTime).count());
        onTimer(true /* timeout */, elapsedSteadyMs);
    }

    if (!crashOnTimeout) return;
@@ -208,8 +260,10 @@ void TimeCheck::TimeCheckHandler::onTimeout() const
    // Create abort message string - caution: this can be very large.
    const std::string abortMessage = std::string("TimeCheck timeout for ")
            .append(tag)
            .append(" scheduled ").append(formatTime(startTime))
            .append(" scheduled ").append(formatTime(startSystemTime))
            .append(" on thread ").append(std::to_string(tid)).append("\n")
            .append(analyzeTimeouts(
                    requestedTimeoutMs, elapsedSteadyMs, elapsedSystemMs)).append("\n")
            .append(halPids).append("\n")
            .append(summary);

@@ -241,7 +295,7 @@ mediautils::TimeCheck makeTimeCheckStatsForClassMethod(
                    } else {
                        stats->event(safeMethodName.asStringView(), elapsedMs);
                    }
            }, 0 /* timeoutMs */);
            }, 0 /* requestedTimeoutMs */);
}

}  // namespace android::mediautils
+28 −9
Original line number Diff line number Diff line
@@ -36,7 +36,10 @@ inline size_t countChars(std::string_view s, char c) {
TEST(TimerThread, Basic) {
    std::atomic<bool> taskRan = false;
    TimerThread thread;
    thread.scheduleTask("Basic", [&taskRan] { taskRan = true; }, 100ms);
    TimerThread::Handle handle =
            thread.scheduleTask("Basic", [&taskRan](TimerThread::Handle handle __unused) {
                    taskRan = true; }, 100ms);
    ASSERT_TRUE(TimerThread::isTimeoutHandle(handle));
    std::this_thread::sleep_for(100ms - kJitter);
    ASSERT_FALSE(taskRan);
    std::this_thread::sleep_for(2 * kJitter);
@@ -48,7 +51,9 @@ TEST(TimerThread, Cancel) {
    std::atomic<bool> taskRan = false;
    TimerThread thread;
    TimerThread::Handle handle =
            thread.scheduleTask("Cancel", [&taskRan] { taskRan = true; }, 100ms);
            thread.scheduleTask("Cancel", [&taskRan](TimerThread::Handle handle __unused) {
                    taskRan = true; }, 100ms);
    ASSERT_TRUE(TimerThread::isTimeoutHandle(handle));
    std::this_thread::sleep_for(100ms - kJitter);
    ASSERT_FALSE(taskRan);
    ASSERT_TRUE(thread.cancelTask(handle));
@@ -61,7 +66,10 @@ TEST(TimerThread, CancelAfterRun) {
    std::atomic<bool> taskRan = false;
    TimerThread thread;
    TimerThread::Handle handle =
            thread.scheduleTask("CancelAfterRun", [&taskRan] { taskRan = true; }, 100ms);
            thread.scheduleTask("CancelAfterRun",
                    [&taskRan](TimerThread::Handle handle __unused) {
                            taskRan = true; }, 100ms);
    ASSERT_TRUE(TimerThread::isTimeoutHandle(handle));
    std::this_thread::sleep_for(100ms + kJitter);
    ASSERT_TRUE(taskRan);
    ASSERT_FALSE(thread.cancelTask(handle));
@@ -74,12 +82,18 @@ TEST(TimerThread, MultipleTasks) {

    auto startTime = std::chrono::steady_clock::now();

    thread.scheduleTask("0", [&taskRan] { taskRan[0] = true; }, 300ms);
    thread.scheduleTask("1", [&taskRan] { taskRan[1] = true; }, 100ms);
    thread.scheduleTask("2", [&taskRan] { taskRan[2] = true; }, 200ms);
    thread.scheduleTask("3", [&taskRan] { taskRan[3] = true; }, 400ms);
    auto handle4 = thread.scheduleTask("4", [&taskRan] { taskRan[4] = true; }, 200ms);
    thread.scheduleTask("5", [&taskRan] { taskRan[5] = true; }, 200ms);
    thread.scheduleTask("0", [&taskRan](TimerThread::Handle handle __unused) {
            taskRan[0] = true; }, 300ms);
    thread.scheduleTask("1", [&taskRan](TimerThread::Handle handle __unused) {
            taskRan[1] = true; }, 100ms);
    thread.scheduleTask("2", [&taskRan](TimerThread::Handle handle __unused) {
            taskRan[2] = true; }, 200ms);
    thread.scheduleTask("3", [&taskRan](TimerThread::Handle handle __unused) {
            taskRan[3] = true; }, 400ms);
    auto handle4 = thread.scheduleTask("4", [&taskRan](TimerThread::Handle handle __unused) {
            taskRan[4] = true; }, 200ms);
    thread.scheduleTask("5", [&taskRan](TimerThread::Handle handle __unused) {
            taskRan[5] = true; }, 200ms);

    // 6 tasks pending
    ASSERT_EQ(6, countChars(thread.pendingToString(), REQUEST_START));
@@ -178,6 +192,10 @@ TEST(TimerThread, TrackedTasks) {
    auto handle1 = thread.trackTask("1");
    auto handle2 = thread.trackTask("2");

    ASSERT_TRUE(TimerThread::isNoTimeoutHandle(handle0));
    ASSERT_TRUE(TimerThread::isNoTimeoutHandle(handle1));
    ASSERT_TRUE(TimerThread::isNoTimeoutHandle(handle2));

    // 3 tasks pending
    ASSERT_EQ(3, countChars(thread.pendingToString(), REQUEST_START));
    // 0 tasks retired
@@ -201,6 +219,7 @@ TEST(TimerThread, TrackedTasks) {

    // Add another tracked task.
    auto handle3 = thread.trackTask("3");
    ASSERT_TRUE(TimerThread::isNoTimeoutHandle(handle3));

    // 2 tasks pending
    ASSERT_EQ(2, countChars(thread.pendingToString(), REQUEST_START));
+20 −29
Original line number Diff line number Diff line
@@ -25,16 +25,20 @@
#include <mediautils/TimerThread.h>
#include <utils/ThreadDefs.h>

using namespace std::chrono_literals;

namespace android::mediautils {

extern std::string formatTime(std::chrono::system_clock::time_point t);
extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);

TimerThread::Handle TimerThread::scheduleTask(
        std::string_view tag, std::function<void()>&& func, std::chrono::milliseconds timeout) {
        std::string_view tag, TimerCallback&& func, Duration timeoutDuration) {
    const auto now = std::chrono::system_clock::now();
    auto request = std::make_shared<const Request>(now, now + timeout, gettid(), tag);
    return mMonitorThread.add(std::move(request), std::move(func), timeout);
    auto request = std::make_shared<const Request>(now, now +
            std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
            gettid(), tag);
    return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
}

TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
@@ -44,7 +48,7 @@ TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
}

bool TimerThread::cancelTask(Handle handle) {
    std::shared_ptr<const Request> request = mNoTimeoutMap.isValidHandle(handle) ?
    std::shared_ptr<const Request> request = isNoTimeoutHandle(handle) ?
             mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle);
    if (!request) return false;
    mRetiredQueue.add(std::move(request));
@@ -125,12 +129,12 @@ struct TimerThread::Analysis TimerThread::analyzeTimeout(
    std::vector<std::shared_ptr<const Request>> pendingExact;
    std::vector<std::shared_ptr<const Request>> pendingPossible;

    // We look at pending requests that were scheduled no later than kDuration
    // We look at pending requests that were scheduled no later than kPendingDuration
    // after the timeout request. This prevents false matches with calls
    // that naturally block for a short period of time
    // such as HAL write() and read().
    //
    auto constexpr kDuration = std::chrono::milliseconds(1000);
    constexpr Duration kPendingDuration = 1000ms;
    for (const auto& pending : pendingRequests) {
        // If the pending tid is the same as timeout tid, problem identified.
        if (pending->tid == timeout->tid) {
@@ -139,7 +143,7 @@ struct TimerThread::Analysis TimerThread::analyzeTimeout(
        }

        // if the pending tid is scheduled within time limit
        if (pending->scheduled - timeout->scheduled < kDuration) {
        if (pending->scheduled - timeout->scheduled < kPendingDuration) {
            pendingPossible.emplace_back(pending);
        }
    }
@@ -239,15 +243,11 @@ void TimerThread::RequestQueue::copyRequests(
    }
}

bool TimerThread::NoTimeoutMap::isValidHandle(Handle handle) const {
    return handle > getIndexedHandle(mNoTimeoutRequests);
}

TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr<const Request> request) {
    std::lock_guard lg(mNTMutex);
    // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1),
    // This need not be under a lock, but we do so anyhow.
    const Handle handle = getIndexedHandle(mNoTimeoutRequests++);
    const Handle handle = getUniqueHandle_l();
    mMap[handle] = request;
    return handle;
}
@@ -269,16 +269,6 @@ void TimerThread::NoTimeoutMap::copyRequests(
    }
}

TimerThread::Handle TimerThread::MonitorThread::getUniqueHandle_l(
        std::chrono::milliseconds timeout) {
    // To avoid key collisions, advance by 1 tick until the key is unique.
    auto deadline = std::chrono::steady_clock::now() + timeout;
    for (; mMonitorRequests.find(deadline) != mMonitorRequests.end();
         deadline += std::chrono::steady_clock::duration(1))
        ;
    return deadline;
}

TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue)
        : mTimeoutQueue(timeoutQueue)
        , mThread([this] { threadFunc(); }) {
@@ -307,10 +297,12 @@ void TimerThread::MonitorThread::threadFunc() {
                    _l.unlock();
                    // We add Request to retired queue early so that it can be dumped out.
                    mTimeoutQueue.add(std::move(node.mapped().first));
                    node.mapped().second(); // Caution: we don't hold lock here - but do we care?
                                            // this is the timeout case!  We will crash soon,
                                            // maybe before returning.
                    // anything left over is released here outside lock.
                    node.mapped().second(nextDeadline);
                    // Caution: we don't hold lock when we call TimerCallback.
                    // This is benign issue - we permit concurrent operations
                    // while in the callback to the MonitorQueue.
                    //
                    // Anything left over is released here outside lock.
                }
                // reacquire the lock - if something was added, we loop immediately to check.
                _l.lock();
@@ -324,8 +316,7 @@ void TimerThread::MonitorThread::threadFunc() {
}

TimerThread::Handle TimerThread::MonitorThread::add(
        std::shared_ptr<const Request> request, std::function<void()>&& func,
        std::chrono::milliseconds timeout) {
        std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
    std::lock_guard _l(mMutex);
    const Handle handle = getUniqueHandle_l(timeout);
    mMonitorRequests.emplace(handle, std::make_pair(std::move(request), std::move(func)));
@@ -340,7 +331,7 @@ std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(H
        return {};
    }
    std::shared_ptr<const TimerThread::Request> request = std::move(it->second.first);
    std::function<void()> func = std::move(it->second.second);
    TimerCallback func = std::move(it->second.second);
    mMonitorRequests.erase(it);
    ul.unlock();  // manually release lock here so func is released outside of lock.
    return request;
+29 −11
Original line number Diff line number Diff line
@@ -27,9 +27,23 @@ namespace android::mediautils {

class TimeCheck {
  public:

    // Duration for TimeCheck is based on steady_clock, typically nanoseconds.
    using Duration = std::chrono::steady_clock::duration;

    // Duration for printing is in milliseconds, using float for additional precision.
    using FloatMs = std::chrono::duration<float, std::milli>;

    // OnTimerFunc is the callback function with 2 parameters.
    //  bool timeout  (which is true when the TimeCheck object
    //                 times out, false when the TimeCheck object is
    //                 destroyed or leaves scope before the timer expires.)
    //  float elapsedMs (the elapsed time to this event).
    using OnTimerFunc = std::function<void(bool /* timeout */, float /* elapsedMs */ )>;

    // The default timeout is chosen to be less than system server watchdog timeout
    // Note: kDefaultTimeOutMs should be no less than 2 seconds, otherwise spurious timeouts
    // may occur with system suspend.
    static constexpr uint32_t kDefaultTimeOutMs = 5000;

    /**
@@ -44,24 +58,20 @@ class TimeCheck {
     * the deallocation.
     *
     * \param tag       string associated with the TimeCheck object.
     * \param onTimer   callback function with 2 parameters
     *                      bool timeout  (which is true when the TimeCheck object
     *                                    times out, false when the TimeCheck object is
     *                                    destroyed or leaves scope before the timer expires.)
     *                      float elapsedMs (the elapsed time to this event).
     * \param onTimer   callback function with 2 parameters (described above in OnTimerFunc).
     *                  The callback when timeout is true will be called on a different thread.
     *                  This will cancel the callback on the destructor but is not guaranteed
     *                  to block for callback completion if it is already in progress
     *                  (for maximum concurrency and reduced deadlock potential), so use proper
     *                  lifetime analysis (e.g. shared or weak pointers).
     * \param timeoutMs timeout in milliseconds.
     * \param requestedTimeoutMs timeout in milliseconds.
     *                  A zero timeout means no timeout is set -
     *                  the callback is called only when
     *                  the TimeCheck object is destroyed or leaves scope.
     * \param crashOnTimeout true if the object issues an abort on timeout.
     */
    explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer = {},
            uint32_t timeoutMs = kDefaultTimeOutMs, bool crashOnTimeout = true);
            uint32_t requestedTimeoutMs = kDefaultTimeOutMs, bool crashOnTimeout = true);

    TimeCheck() = default;
    // Remove copy constructors as there should only be one call to the destructor.
@@ -81,24 +91,32 @@ class TimeCheck {
    public:
        template <typename S, typename F>
        TimeCheckHandler(S&& _tag, F&& _onTimer, bool _crashOnTimeout,
            const std::chrono::system_clock::time_point& _startTime,
            Duration _timeoutDuration,
            std::chrono::system_clock::time_point _startSystemTime,
            pid_t _tid)
            : tag(std::forward<S>(_tag))
            , onTimer(std::forward<F>(_onTimer))
            , crashOnTimeout(_crashOnTimeout)
            , startTime(_startTime)
            , timeoutDuration(_timeoutDuration)
            , startSystemTime(_startSystemTime)
            , tid(_tid)
            {}
        const FixedString62 tag;
        const OnTimerFunc onTimer;
        const bool crashOnTimeout;
        const std::chrono::system_clock::time_point startTime;
        const Duration timeoutDuration;
        const std::chrono::system_clock::time_point startSystemTime;
        const pid_t tid;

        void onCancel(TimerThread::Handle handle) const;
        void onTimeout() const;
        void onTimeout(TimerThread::Handle handle) const;
    };

    // Returns a string that represents the timeout vs elapsed time,
    // and diagnostics if there are any potential issues.
    static std::string analyzeTimeouts(
            float timeoutMs, float elapsedSteadyMs, float elapsedSystemMs);

    static TimerThread& getTimeCheckThread();
    static void accessAudioHalPids(std::vector<pid_t>* pids, bool update);

+94 −24

File changed.

Preview size limit exceeded, changes collapsed.