Loading media/utils/TimeCheck.cpp +26 −9 Original line number Diff line number Diff line Loading @@ -14,11 +14,14 @@ * limitations under the License. */ #include <csignal> #include "mediautils/TimerThread.h" #define LOG_TAG "TimeCheck" #include <optional> #include <android-base/logging.h> #include <android-base/strings.h> #include <audio_utils/clock.h> #include <mediautils/EventLog.h> #include <mediautils/FixedString.h> Loading @@ -27,20 +30,21 @@ #include <mediautils/TidWrapper.h> #include <utils/Log.h> #if defined(__BIONIC__) #if defined(__ANDROID__) #include "debuggerd/handler.h" #endif namespace android::mediautils { // This function appropriately signals a pid to dump a backtrace if we are // running on device. // running on device (and the HAL exists). If we are not running on an Android // device, there is no HAL to signal (so we do nothing). static inline void signalAudioHAL([[maybe_unused]] pid_t pid) { #if defined(__BIONIC__) #if defined(__ANDROID__) sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0}); #endif } namespace android::mediautils { /** * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point. */ Loading Loading @@ -148,7 +152,7 @@ TimerThread& TimeCheck::getTimeCheckThread() { std::string TimeCheck::toString() { // note pending and retired are individually locked for maximum concurrency, // snapshot is not instantaneous at a single time. return getTimeCheckThread().toString(); return getTimeCheckThread().getSnapshotAnalysis().toString(); } TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration, Loading Loading @@ -253,7 +257,7 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con // Generate the TimerThread summary string early before sending signals to the // HAL processes which can affect thread behavior. const std::string summary = getTimeCheckThread().toString(4 /* retiredCount */); const auto snapshotAnalysis = getTimeCheckThread().getSnapshotAnalysis(4 /* retiredCount */); // Generate audio HAL processes tombstones and allow time to complete // before forcing restart Loading Loading @@ -281,7 +285,7 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs, elapsedSteadyMs, elapsedSystemMs)).append("\n") .append(halPids).append("\n") .append(summary); .append(snapshotAnalysis.toString()); // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h: // Log message text may be truncated to less than an Loading @@ -291,7 +295,20 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con // to avoid the size limitation. LOG(FATAL) does an abort whereas // LOG(FATAL_WITHOUT_ABORT) does not abort. LOG(FATAL) << abortMessage; static constexpr pid_t invalidPid = TimerThread::SnapshotAnalysis::INVALID_PID; pid_t tidToAbort = invalidPid; if (snapshotAnalysis.suspectTid != invalidPid) { tidToAbort = snapshotAnalysis.suspectTid; } else if (snapshotAnalysis.timeoutTid != invalidPid) { tidToAbort = snapshotAnalysis.timeoutTid; } LOG(FATAL_WITHOUT_ABORT) << abortMessage; const auto ret = abortTid(tidToAbort); if (ret < 0) { LOG(FATAL) << "TimeCheck thread signal failed, aborting process. " "errno: " << errno << base::ErrnoNumberAsString(errno); } } // Automatically create a TimeCheck class for a class and method. Loading media/utils/TimerThread.cpp +36 −41 Original line number Diff line number Diff line Loading @@ -59,39 +59,29 @@ bool TimerThread::cancelTask(Handle handle) { return true; } std::string TimerThread::toString(size_t retiredCount) const { std::string TimerThread::SnapshotAnalysis::toString() const { // Note: These request queues are snapshot very close together but // not at "identical" times as we don't use a class-wide lock. std::vector<std::shared_ptr<const Request>> timeoutRequests; std::vector<std::shared_ptr<const Request>> retiredRequests; mTimeoutQueue.copyRequests(timeoutRequests); mRetiredQueue.copyRequests(retiredRequests, retiredCount); std::vector<std::shared_ptr<const Request>> pendingRequests = getPendingRequests(); struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests); std::string analysisSummary; if (!analysis.summary.empty()) { analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]"); } std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]"); std::string timeoutStack; if (analysis.timeoutTid != -1) { timeoutStack = std::string("\ntimeout(") .append(std::to_string(analysis.timeoutTid)).append(") callstack [\n") .append(getCallStackStringForTid(analysis.timeoutTid)).append("]"); } std::string blockedStack; if (analysis.HALBlockedTid != -1) { if (timeoutTid != -1) { timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(") .append(std::to_string(timeoutTid)).append(") callstack [\n") .append(getCallStackStringForTid(timeoutTid)).append("]"); } if (suspectTid != -1 && suspectTid != timeoutTid) { blockedStack = std::string("\nblocked(") .append(std::to_string(analysis.HALBlockedTid)).append(") callstack [\n") .append(getCallStackStringForTid(analysis.HALBlockedTid)).append("]"); .append(std::to_string(suspectTid)).append(") callstack [\n") .append(getCallStackStringForTid(suspectTid)).append("]"); } return std::string("now ") .append(formatTime(std::chrono::system_clock::now())) .append("\nsecondChanceCount ") .append(std::to_string(mMonitorThread.getSecondChanceCount())) .append(std::to_string(secondChanceCount)) .append(analysisSummary) .append("\ntimeout [ ") .append(requestsToString(timeoutRequests)) Loading Loading @@ -121,16 +111,23 @@ bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request return separatorPos != std::string::npos; } /* static */ struct TimerThread::Analysis TimerThread::analyzeTimeout( const std::vector<std::shared_ptr<const Request>>& timeoutRequests, const std::vector<std::shared_ptr<const Request>>& pendingRequests) { if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say. struct TimerThread::SnapshotAnalysis TimerThread::getSnapshotAnalysis(size_t retiredCount) const { struct SnapshotAnalysis analysis{}; // The following snapshot of the TimerThread state will be utilized for // analysis. Note, there is no lock around these calls, so there could be // a state update between them. mTimeoutQueue.copyRequests(analysis.timeoutRequests); mRetiredQueue.copyRequests(analysis.retiredRequests, retiredCount); analysis.pendingRequests = getPendingRequests(); analysis.secondChanceCount = mMonitorThread.getSecondChanceCount(); // No call has timed out, so there is no analysis to be done. if (analysis.timeoutRequests.empty()) return analysis; // for now look at last timeout (in our case, the only timeout) const std::shared_ptr<const Request> timeout = timeoutRequests.back(); const std::shared_ptr<const Request> timeout = analysis.timeoutRequests.back(); analysis.timeoutTid = timeout->tid; if (analysis.pendingRequests.empty()) return analysis; // pending Requests that are problematic. std::vector<std::shared_ptr<const Request>> pendingExact; std::vector<std::shared_ptr<const Request>> pendingPossible; Loading @@ -141,7 +138,7 @@ struct TimerThread::Analysis TimerThread::analyzeTimeout( // such as HAL write() and read(). // constexpr Duration kPendingDuration = 1000ms; for (const auto& pending : pendingRequests) { for (const auto& pending : analysis.pendingRequests) { // If the pending tid is the same as timeout tid, problem identified. if (pending->tid == timeout->tid) { pendingExact.emplace_back(pending); Loading @@ -154,29 +151,27 @@ struct TimerThread::Analysis TimerThread::analyzeTimeout( } } struct Analysis analysis{}; analysis.timeoutTid = timeout->tid; std::string& summary = analysis.summary; std::string& description = analysis.description; if (!pendingExact.empty()) { const auto& request = pendingExact.front(); const bool hal = isRequestFromHal(request); if (hal) { summary = std::string("Blocked directly due to HAL call: ") description = std::string("Blocked directly due to HAL call: ") .append(request->toString()); analysis.suspectTid= request->tid; } } if (summary.empty() && !pendingPossible.empty()) { if (description.empty() && !pendingPossible.empty()) { for (const auto& request : pendingPossible) { const bool hal = isRequestFromHal(request); if (hal) { // The first blocked call is the most likely one. // Recent calls might be temporarily blocked // calls such as write() or read() depending on kDuration. summary = std::string("Blocked possibly due to HAL call: ") description = std::string("Blocked possibly due to HAL call: ") .append(request->toString()); analysis.HALBlockedTid = request->tid; analysis.suspectTid= request->tid; } } } Loading media/utils/include/mediautils/TidWrapper.h +19 −0 Original line number Diff line number Diff line Loading @@ -16,8 +16,11 @@ #pragma once #if defined(__linux__) #include <signal.h> #include <sys/syscall.h> #include <unistd.h> #endif namespace android::mediautils { Loading @@ -31,4 +34,20 @@ inline pid_t getThreadIdWrapper() { #endif } // Send an abort signal to a (linux) thread id. inline int abortTid(int tid) { #if defined(__linux__) const pid_t pid = getpid(); siginfo_t siginfo = { .si_code = SI_QUEUE, .si_pid = pid, .si_uid = getuid(), }; return syscall(SYS_rt_tgsigqueueinfo, pid, tid, SIGABRT, &siginfo); #else errno = ENODEV; return -1; #endif } } media/utils/include/mediautils/TimeCheck.h +0 −1 Original line number Diff line number Diff line Loading @@ -123,7 +123,6 @@ class TimeCheck { const Duration secondChanceDuration; const std::chrono::system_clock::time_point startSystemTime; const pid_t tid; void onCancel(TimerThread::Handle handle) const; void onTimeout(TimerThread::Handle handle) const; }; Loading media/utils/include/mediautils/TimerThread.h +45 −27 Original line number Diff line number Diff line Loading @@ -21,9 +21,11 @@ #include <deque> #include <functional> #include <map> #include <memory> #include <mutex> #include <string> #include <thread> #include <vector> #include <android-base/thread_annotations.h> Loading Loading @@ -151,7 +153,15 @@ class TimerThread { */ bool cancelTask(Handle handle); std::string toString(size_t retiredCount = SIZE_MAX) const; struct SnapshotAnalysis; /** * Take a snapshot of the current state of the TimerThread and determine the * potential cause of a deadlock. * \param retiredCount The number of successfully retired calls to capture * (may be many). * \return See below for a description of a SnapShotAnalysis object */ SnapshotAnalysis getSnapshotAnalysis(size_t retiredCount = SIZE_MAX) const; /** * Returns a string representation of the TimerThread queue. Loading Loading @@ -202,7 +212,6 @@ class TimerThread { return s; } private: // To minimize movement of data, we pass around shared_ptrs to Requests. // These are allocated and deallocated outside of the lock. // TODO(b/243839867) consider options to merge Request with the Loading Loading @@ -232,6 +241,40 @@ class TimerThread { std::string toString() const; }; // SnapshotAnalysis contains info deduced by analysisTimeout(). struct SnapshotAnalysis { // If we were unable to determine any applicable thread ids, // we leave their value as INVALID_PID. // Note, we use the linux thread id (not pthread), so its type is pid_t. static constexpr pid_t INVALID_PID = -1; // Description of likely issue and/or blocked method. // Empty if no actionable info. std::string description; // Tid of the (latest) monitored thread which has timed out. // This is the thread which the suspect is deduced with respect to. // Most often, this is the thread which an abort is being triggered // from. pid_t timeoutTid = INVALID_PID; // Tid of the (HAL) thread which has likely halted progress, selected // from pendingRequests. May be the same as timeoutTid, if the timed-out // thread directly called into the HAL. pid_t suspectTid = INVALID_PID; // Number of second chances given by the timer thread size_t secondChanceCount; // List of pending requests std::vector<std::shared_ptr<const Request>> pendingRequests; // List of timed-out requests std::vector<std::shared_ptr<const Request>> timeoutRequests; // List of retired requests std::vector<std::shared_ptr<const Request>> retiredRequests; // Dumps the information contained above as well as additional call // stacks where applicable. std::string toString() const; }; private: // Deque of requests, in order of add(). // This class is thread-safe. class RequestQueue { Loading Loading @@ -326,36 +369,11 @@ class TimerThread { } }; // Analysis contains info deduced by analysisTimeout(). // // Summary is the result string from checking timeoutRequests to see if // any might be caused by blocked calls in pendingRequests. // // Summary string is empty if there is no automatic actionable info. // // timeoutTid is the tid selected from timeoutRequests (if any). // // HALBlockedTid is the tid that is blocked from pendingRequests believed // to cause the timeout. // HALBlockedTid may be INVALID_PID if no suspected tid is found, // and if HALBlockedTid is valid, it will not be the same as timeoutTid. // static constexpr pid_t INVALID_PID = -1; struct Analysis { std::string summary; pid_t timeoutTid = INVALID_PID; pid_t HALBlockedTid = INVALID_PID; }; // A HAL method is where the substring "Hidl" is in the class name. // The tag should look like: ... Hidl ... :: ... static bool isRequestFromHal(const std::shared_ptr<const Request>& request); // Returns analysis from the requests. static Analysis analyzeTimeout( const std::vector<std::shared_ptr<const Request>>& timeoutRequests, const std::vector<std::shared_ptr<const Request>>& pendingRequests); std::vector<std::shared_ptr<const Request>> getPendingRequests() const; static constexpr size_t kRetiredQueueMax = 16; Loading Loading
media/utils/TimeCheck.cpp +26 −9 Original line number Diff line number Diff line Loading @@ -14,11 +14,14 @@ * limitations under the License. */ #include <csignal> #include "mediautils/TimerThread.h" #define LOG_TAG "TimeCheck" #include <optional> #include <android-base/logging.h> #include <android-base/strings.h> #include <audio_utils/clock.h> #include <mediautils/EventLog.h> #include <mediautils/FixedString.h> Loading @@ -27,20 +30,21 @@ #include <mediautils/TidWrapper.h> #include <utils/Log.h> #if defined(__BIONIC__) #if defined(__ANDROID__) #include "debuggerd/handler.h" #endif namespace android::mediautils { // This function appropriately signals a pid to dump a backtrace if we are // running on device. // running on device (and the HAL exists). If we are not running on an Android // device, there is no HAL to signal (so we do nothing). static inline void signalAudioHAL([[maybe_unused]] pid_t pid) { #if defined(__BIONIC__) #if defined(__ANDROID__) sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0}); #endif } namespace android::mediautils { /** * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point. */ Loading Loading @@ -148,7 +152,7 @@ TimerThread& TimeCheck::getTimeCheckThread() { std::string TimeCheck::toString() { // note pending and retired are individually locked for maximum concurrency, // snapshot is not instantaneous at a single time. return getTimeCheckThread().toString(); return getTimeCheckThread().getSnapshotAnalysis().toString(); } TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration, Loading Loading @@ -253,7 +257,7 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con // Generate the TimerThread summary string early before sending signals to the // HAL processes which can affect thread behavior. const std::string summary = getTimeCheckThread().toString(4 /* retiredCount */); const auto snapshotAnalysis = getTimeCheckThread().getSnapshotAnalysis(4 /* retiredCount */); // Generate audio HAL processes tombstones and allow time to complete // before forcing restart Loading Loading @@ -281,7 +285,7 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs, elapsedSteadyMs, elapsedSystemMs)).append("\n") .append(halPids).append("\n") .append(summary); .append(snapshotAnalysis.toString()); // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h: // Log message text may be truncated to less than an Loading @@ -291,7 +295,20 @@ void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) con // to avoid the size limitation. LOG(FATAL) does an abort whereas // LOG(FATAL_WITHOUT_ABORT) does not abort. LOG(FATAL) << abortMessage; static constexpr pid_t invalidPid = TimerThread::SnapshotAnalysis::INVALID_PID; pid_t tidToAbort = invalidPid; if (snapshotAnalysis.suspectTid != invalidPid) { tidToAbort = snapshotAnalysis.suspectTid; } else if (snapshotAnalysis.timeoutTid != invalidPid) { tidToAbort = snapshotAnalysis.timeoutTid; } LOG(FATAL_WITHOUT_ABORT) << abortMessage; const auto ret = abortTid(tidToAbort); if (ret < 0) { LOG(FATAL) << "TimeCheck thread signal failed, aborting process. " "errno: " << errno << base::ErrnoNumberAsString(errno); } } // Automatically create a TimeCheck class for a class and method. Loading
media/utils/TimerThread.cpp +36 −41 Original line number Diff line number Diff line Loading @@ -59,39 +59,29 @@ bool TimerThread::cancelTask(Handle handle) { return true; } std::string TimerThread::toString(size_t retiredCount) const { std::string TimerThread::SnapshotAnalysis::toString() const { // Note: These request queues are snapshot very close together but // not at "identical" times as we don't use a class-wide lock. std::vector<std::shared_ptr<const Request>> timeoutRequests; std::vector<std::shared_ptr<const Request>> retiredRequests; mTimeoutQueue.copyRequests(timeoutRequests); mRetiredQueue.copyRequests(retiredRequests, retiredCount); std::vector<std::shared_ptr<const Request>> pendingRequests = getPendingRequests(); struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests); std::string analysisSummary; if (!analysis.summary.empty()) { analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]"); } std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]"); std::string timeoutStack; if (analysis.timeoutTid != -1) { timeoutStack = std::string("\ntimeout(") .append(std::to_string(analysis.timeoutTid)).append(") callstack [\n") .append(getCallStackStringForTid(analysis.timeoutTid)).append("]"); } std::string blockedStack; if (analysis.HALBlockedTid != -1) { if (timeoutTid != -1) { timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(") .append(std::to_string(timeoutTid)).append(") callstack [\n") .append(getCallStackStringForTid(timeoutTid)).append("]"); } if (suspectTid != -1 && suspectTid != timeoutTid) { blockedStack = std::string("\nblocked(") .append(std::to_string(analysis.HALBlockedTid)).append(") callstack [\n") .append(getCallStackStringForTid(analysis.HALBlockedTid)).append("]"); .append(std::to_string(suspectTid)).append(") callstack [\n") .append(getCallStackStringForTid(suspectTid)).append("]"); } return std::string("now ") .append(formatTime(std::chrono::system_clock::now())) .append("\nsecondChanceCount ") .append(std::to_string(mMonitorThread.getSecondChanceCount())) .append(std::to_string(secondChanceCount)) .append(analysisSummary) .append("\ntimeout [ ") .append(requestsToString(timeoutRequests)) Loading Loading @@ -121,16 +111,23 @@ bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request return separatorPos != std::string::npos; } /* static */ struct TimerThread::Analysis TimerThread::analyzeTimeout( const std::vector<std::shared_ptr<const Request>>& timeoutRequests, const std::vector<std::shared_ptr<const Request>>& pendingRequests) { if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say. struct TimerThread::SnapshotAnalysis TimerThread::getSnapshotAnalysis(size_t retiredCount) const { struct SnapshotAnalysis analysis{}; // The following snapshot of the TimerThread state will be utilized for // analysis. Note, there is no lock around these calls, so there could be // a state update between them. mTimeoutQueue.copyRequests(analysis.timeoutRequests); mRetiredQueue.copyRequests(analysis.retiredRequests, retiredCount); analysis.pendingRequests = getPendingRequests(); analysis.secondChanceCount = mMonitorThread.getSecondChanceCount(); // No call has timed out, so there is no analysis to be done. if (analysis.timeoutRequests.empty()) return analysis; // for now look at last timeout (in our case, the only timeout) const std::shared_ptr<const Request> timeout = timeoutRequests.back(); const std::shared_ptr<const Request> timeout = analysis.timeoutRequests.back(); analysis.timeoutTid = timeout->tid; if (analysis.pendingRequests.empty()) return analysis; // pending Requests that are problematic. std::vector<std::shared_ptr<const Request>> pendingExact; std::vector<std::shared_ptr<const Request>> pendingPossible; Loading @@ -141,7 +138,7 @@ struct TimerThread::Analysis TimerThread::analyzeTimeout( // such as HAL write() and read(). // constexpr Duration kPendingDuration = 1000ms; for (const auto& pending : pendingRequests) { for (const auto& pending : analysis.pendingRequests) { // If the pending tid is the same as timeout tid, problem identified. if (pending->tid == timeout->tid) { pendingExact.emplace_back(pending); Loading @@ -154,29 +151,27 @@ struct TimerThread::Analysis TimerThread::analyzeTimeout( } } struct Analysis analysis{}; analysis.timeoutTid = timeout->tid; std::string& summary = analysis.summary; std::string& description = analysis.description; if (!pendingExact.empty()) { const auto& request = pendingExact.front(); const bool hal = isRequestFromHal(request); if (hal) { summary = std::string("Blocked directly due to HAL call: ") description = std::string("Blocked directly due to HAL call: ") .append(request->toString()); analysis.suspectTid= request->tid; } } if (summary.empty() && !pendingPossible.empty()) { if (description.empty() && !pendingPossible.empty()) { for (const auto& request : pendingPossible) { const bool hal = isRequestFromHal(request); if (hal) { // The first blocked call is the most likely one. // Recent calls might be temporarily blocked // calls such as write() or read() depending on kDuration. summary = std::string("Blocked possibly due to HAL call: ") description = std::string("Blocked possibly due to HAL call: ") .append(request->toString()); analysis.HALBlockedTid = request->tid; analysis.suspectTid= request->tid; } } } Loading
media/utils/include/mediautils/TidWrapper.h +19 −0 Original line number Diff line number Diff line Loading @@ -16,8 +16,11 @@ #pragma once #if defined(__linux__) #include <signal.h> #include <sys/syscall.h> #include <unistd.h> #endif namespace android::mediautils { Loading @@ -31,4 +34,20 @@ inline pid_t getThreadIdWrapper() { #endif } // Send an abort signal to a (linux) thread id. inline int abortTid(int tid) { #if defined(__linux__) const pid_t pid = getpid(); siginfo_t siginfo = { .si_code = SI_QUEUE, .si_pid = pid, .si_uid = getuid(), }; return syscall(SYS_rt_tgsigqueueinfo, pid, tid, SIGABRT, &siginfo); #else errno = ENODEV; return -1; #endif } }
media/utils/include/mediautils/TimeCheck.h +0 −1 Original line number Diff line number Diff line Loading @@ -123,7 +123,6 @@ class TimeCheck { const Duration secondChanceDuration; const std::chrono::system_clock::time_point startSystemTime; const pid_t tid; void onCancel(TimerThread::Handle handle) const; void onTimeout(TimerThread::Handle handle) const; }; Loading
media/utils/include/mediautils/TimerThread.h +45 −27 Original line number Diff line number Diff line Loading @@ -21,9 +21,11 @@ #include <deque> #include <functional> #include <map> #include <memory> #include <mutex> #include <string> #include <thread> #include <vector> #include <android-base/thread_annotations.h> Loading Loading @@ -151,7 +153,15 @@ class TimerThread { */ bool cancelTask(Handle handle); std::string toString(size_t retiredCount = SIZE_MAX) const; struct SnapshotAnalysis; /** * Take a snapshot of the current state of the TimerThread and determine the * potential cause of a deadlock. * \param retiredCount The number of successfully retired calls to capture * (may be many). * \return See below for a description of a SnapShotAnalysis object */ SnapshotAnalysis getSnapshotAnalysis(size_t retiredCount = SIZE_MAX) const; /** * Returns a string representation of the TimerThread queue. Loading Loading @@ -202,7 +212,6 @@ class TimerThread { return s; } private: // To minimize movement of data, we pass around shared_ptrs to Requests. // These are allocated and deallocated outside of the lock. // TODO(b/243839867) consider options to merge Request with the Loading Loading @@ -232,6 +241,40 @@ class TimerThread { std::string toString() const; }; // SnapshotAnalysis contains info deduced by analysisTimeout(). struct SnapshotAnalysis { // If we were unable to determine any applicable thread ids, // we leave their value as INVALID_PID. // Note, we use the linux thread id (not pthread), so its type is pid_t. static constexpr pid_t INVALID_PID = -1; // Description of likely issue and/or blocked method. // Empty if no actionable info. std::string description; // Tid of the (latest) monitored thread which has timed out. // This is the thread which the suspect is deduced with respect to. // Most often, this is the thread which an abort is being triggered // from. pid_t timeoutTid = INVALID_PID; // Tid of the (HAL) thread which has likely halted progress, selected // from pendingRequests. May be the same as timeoutTid, if the timed-out // thread directly called into the HAL. pid_t suspectTid = INVALID_PID; // Number of second chances given by the timer thread size_t secondChanceCount; // List of pending requests std::vector<std::shared_ptr<const Request>> pendingRequests; // List of timed-out requests std::vector<std::shared_ptr<const Request>> timeoutRequests; // List of retired requests std::vector<std::shared_ptr<const Request>> retiredRequests; // Dumps the information contained above as well as additional call // stacks where applicable. std::string toString() const; }; private: // Deque of requests, in order of add(). // This class is thread-safe. class RequestQueue { Loading Loading @@ -326,36 +369,11 @@ class TimerThread { } }; // Analysis contains info deduced by analysisTimeout(). // // Summary is the result string from checking timeoutRequests to see if // any might be caused by blocked calls in pendingRequests. // // Summary string is empty if there is no automatic actionable info. // // timeoutTid is the tid selected from timeoutRequests (if any). // // HALBlockedTid is the tid that is blocked from pendingRequests believed // to cause the timeout. // HALBlockedTid may be INVALID_PID if no suspected tid is found, // and if HALBlockedTid is valid, it will not be the same as timeoutTid. // static constexpr pid_t INVALID_PID = -1; struct Analysis { std::string summary; pid_t timeoutTid = INVALID_PID; pid_t HALBlockedTid = INVALID_PID; }; // A HAL method is where the substring "Hidl" is in the class name. // The tag should look like: ... Hidl ... :: ... static bool isRequestFromHal(const std::shared_ptr<const Request>& request); // Returns analysis from the requests. static Analysis analyzeTimeout( const std::vector<std::shared_ptr<const Request>>& timeoutRequests, const std::vector<std::shared_ptr<const Request>>& pendingRequests); std::vector<std::shared_ptr<const Request>> getPendingRequests() const; static constexpr size_t kRetiredQueueMax = 16; Loading