TimerThread: Add timeout analysis logic (f45f34cd) · Commits · e / os / android_frameworks_av

media/utils/TimeCheck.cpp

+1 −0

Original line number	Diff line number	Diff line
		@@ -197,6 +197,7 @@ void TimeCheck::TimeCheckHandler::onTimeout() const
		} else {
		ALOGI("No HAL process pid available, skipping tombstones");
		}

		LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str());
		LOG_ALWAYS_FATAL("TimeCheck timeout for %s scheduled %s on thread %d\n%s",
		tag.c_str(), formatTime(startTime).c_str(), tid, summary.c_str());

media/utils/TimerThread.cpp

+100 −5

Original line number	Diff line number	Diff line
		@@ -53,17 +53,112 @@ bool TimerThread::cancelTask(Handle handle) {
		}

		std::string TimerThread::toString(size_t retiredCount) const {
		// Note: These request queues are snapshot very close together but
		// not at "identical" times as we don't use a class-wide lock.

		std::vector<std::shared_ptr<const Request>> timeoutRequests;
		std::vector<std::shared_ptr<const Request>> retiredRequests;
		mTimeoutQueue.copyRequests(timeoutRequests);
		mRetiredQueue.copyRequests(retiredRequests, retiredCount);
		std::vector<std::shared_ptr<const Request>> pendingRequests =
		getPendingRequests();

		struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests);
		std::string analysisSummary;
		if (!analysis.summary.empty()) {
		analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]");
		}
		return std::string("now ")
		.append(formatTime(std::chrono::system_clock::now()))
		.append("\npending [ ")
		.append(pendingToString())
		.append(" ]\ntimeout [ ")
		.append(timeoutToString())
		.append(analysisSummary)
		.append("\ntimeout [ ")
		.append(requestsToString(timeoutRequests))
		.append(" ]\npending [ ")
		.append(requestsToString(pendingRequests))
		.append(" ]\nretired [ ")
		.append(retiredToString(retiredCount))
		.append(requestsToString(retiredRequests))
		.append(" ]");
		}

		// A HAL method is where the substring "Hidl" is in the class name.
		// The tag should look like: ... Hidl ... :: ...
		// When the audio HAL is updated to AIDL perhaps we will use instead
		// a global directory of HAL classes.
		//
		// See MethodStatistics.cpp:
		// mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
		//
		/* static */
		bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
		const size_t hidlPos = request->tag.find("Hidl");
		if (hidlPos == std::string::npos) return false;
		// should be a separator afterwards Hidl which indicates the string was in the class.
		const size_t separatorPos = request->tag.find("::", hidlPos);
		return separatorPos != std::string::npos;
		}

		/* static */
		struct TimerThread::Analysis TimerThread::analyzeTimeout(
		const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
		const std::vector<std::shared_ptr<const Request>>& pendingRequests) {

		if (timeoutRequests.empty() \|\| pendingRequests.empty()) return {}; // nothing to say.

		// for now look at last timeout (in our case, the only timeout)
		const std::shared_ptr<const Request> timeout = timeoutRequests.back();

		// pending Requests that are problematic.
		std::vector<std::shared_ptr<const Request>> pendingExact;
		std::vector<std::shared_ptr<const Request>> pendingPossible;

		// We look at pending requests that were scheduled no later than kDuration
		// after the timeout request. This prevents false matches with calls
		// that naturally block for a short period of time
		// such as HAL write() and read().
		//
		auto constexpr kDuration = std::chrono::milliseconds(1000);
		for (const auto& pending : pendingRequests) {
		// If the pending tid is the same as timeout tid, problem identified.
		if (pending->tid == timeout->tid) {
		pendingExact.emplace_back(pending);
		continue;
		}

		// if the pending tid is scheduled within time limit
		if (pending->scheduled - timeout->scheduled < kDuration) {
		pendingPossible.emplace_back(pending);
		}
		}

		struct Analysis analysis{};

		analysis.timeoutTid = timeout->tid;
		std::string& summary = analysis.summary;
		if (!pendingExact.empty()) {
		const auto& request = pendingExact.front();
		const bool hal = isRequestFromHal(request);

		if (hal) {
		summary = std::string("Blocked directly due to HAL call: ")
		.append(request->toString());
		}
		}
		if (summary.empty() && !pendingPossible.empty()) {
		for (const auto& request : pendingPossible) {
		const bool hal = isRequestFromHal(request);
		if (hal) {
		// The first blocked call is the most likely one.
		// Recent calls might be temporarily blocked
		// calls such as write() or read() depending on kDuration.
		summary = std::string("Blocked possibly due to HAL call: ")
		.append(request->toString());
		analysis.HALBlockedTid = request->tid;
		}
		}
		}
		return analysis;
		}

		std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
		constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc.
		std::vector<std::shared_ptr<const Request>> pendingRequests;

media/utils/include/mediautils/TimerThread.h

+30 −0

Original line number	Diff line number	Diff line
		@@ -212,6 +212,36 @@ class TimerThread {
		void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
		};

		// Analysis contains info deduced by analysisTimeout().
		//
		// Summary is the result string from checking timeoutRequests to see if
		// any might be caused by blocked calls in pendingRequests.
		//
		// Summary string is empty if there is no automatic actionable info.
		//
		// timeoutTid is the tid selected from timeoutRequests (if any).
		//
		// HALBlockedTid is the tid that is blocked from pendingRequests believed
		// to cause the timeout.
		// HALBlockedTid may be INVALID_PID if no suspected tid is found,
		// and if HALBlockedTid is valid, it will not be the same as timeoutTid.
		//
		static constexpr pid_t INVALID_PID = -1;
		struct Analysis {
		std::string summary;
		pid_t timeoutTid = INVALID_PID;
		pid_t HALBlockedTid = INVALID_PID;
		};

		// A HAL method is where the substring "Hidl" is in the class name.
		// The tag should look like: ... Hidl ... :: ...
		static bool isRequestFromHal(const std::shared_ptr<const Request>& request);

		// Returns analysis from the requests.
		static Analysis analyzeTimeout(
		const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
		const std::vector<std::shared_ptr<const Request>>& pendingRequests);

		std::vector<std::shared_ptr<const Request>> getPendingRequests() const;

		// A no-timeout request is represented by a handles at the end of steady_clock time,