Merge "Use eBPF-based time-in-state monitoring for groups of threads" (371862bc) · Commits · e / os / platform_frameworks_native

libs/cputimeinstate/Android.bp

+1 −0

Original line number	Diff line number	Diff line
		@@ -33,5 +33,6 @@ cc_test {
		"-Wall",
		"-Wextra",
		],
		require_root: true,
		}

libs/cputimeinstate/cputimeinstate.cpp

+110 −1

Original line number	Diff line number	Diff line
		@@ -59,6 +59,7 @@ static std::set<uint32_t> gAllFreqs;
		static unique_fd gTisMapFd;
		static unique_fd gConcurrentMapFd;
		static unique_fd gUidLastUpdateMapFd;
		static unique_fd gPidTisMapFd;

		static std::optional<std::vector<uint32_t>> readNumbersFromFile(const std::string &path) {
		std::string data;
		@@ -139,6 +140,12 @@ static bool initGlobals() {
		unique_fd{bpf_obj_get(BPF_FS_PATH "map_time_in_state_uid_last_update_map")};
		if (gUidLastUpdateMapFd < 0) return false;

		gPidTisMapFd = unique_fd{mapRetrieveRO(BPF_FS_PATH "map_time_in_state_pid_time_in_state_map")};
		if (gPidTisMapFd < 0) return false;

		unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_map"));
		if (trackedPidMapFd < 0) return false;

		gInitialized = true;
		return true;
		}
		@@ -222,7 +229,8 @@ bool startTrackingUidTimes() {
		}

		gTracking = attachTracepointProgram("sched", "sched_switch") &&
		attachTracepointProgram("power", "cpu_frequency");
		attachTracepointProgram("power", "cpu_frequency") &&
		attachTracepointProgram("sched", "sched_process_free");
		return gTracking;
		}

		@@ -502,5 +510,106 @@ bool clearUidTimes(uint32_t uid) {
		return true;
		}

		bool startTrackingProcessCpuTimes(pid_t pid) {
		if (!gInitialized && !initGlobals()) return false;

		unique_fd trackedPidHashMapFd(
		mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_hash_map"));
		if (trackedPidHashMapFd < 0) return false;

		unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_map"));
		if (trackedPidMapFd < 0) return false;

		for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
		// Find first available [index, pid] entry in the pid_tracked_hash_map map
		if (writeToMapEntry(trackedPidHashMapFd, &index, &pid, BPF_NOEXIST) != 0) {
		if (errno != EEXIST) {
		return false;
		}
		continue; // This index is already taken
		}

		tracked_pid_t tracked_pid = {.pid = pid, .state = TRACKED_PID_STATE_ACTIVE};
		if (writeToMapEntry(trackedPidMapFd, &index, &tracked_pid, BPF_ANY) != 0) {
		return false;
		}
		return true;
		}
		return false;
		}

		// Marks the specified task identified by its PID (aka TID) for CPU time-in-state tracking
		// aggregated with other tasks sharing the same TGID and aggregation key.
		bool startAggregatingTaskCpuTimes(pid_t pid, uint16_t aggregationKey) {
		if (!gInitialized && !initGlobals()) return false;

		unique_fd taskAggregationMapFd(
		mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_task_aggregation_map"));
		if (taskAggregationMapFd < 0) return false;

		return writeToMapEntry(taskAggregationMapFd, &pid, &aggregationKey, BPF_ANY) == 0;
		}

		// Retrieves the times in ns that each thread spent running at each CPU freq, aggregated by
		// aggregation key.
		// Return contains no value on error, otherwise it contains a map from aggregation keys
		// to vectors of vectors using the format:
		// { aggKey0 -> [[t0_0_0, t0_0_1, ...], [t0_1_0, t0_1_1, ...], ...],
		// aggKey1 -> [[t1_0_0, t1_0_1, ...], [t1_1_0, t1_1_1, ...], ...], ... }
		// where ti_j_k is the ns tid i spent running on the jth cluster at the cluster's kth lowest freq.
		std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>>
		getAggregatedTaskCpuFreqTimes(pid_t tgid, const std::vector<uint16_t> &aggregationKeys) {
		if (!gInitialized && !initGlobals()) return {};

		uint32_t maxFreqCount = 0;
		std::vector<std::vector<uint64_t>> mapFormat;
		for (const auto &freqList : gPolicyFreqs) {
		if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
		mapFormat.emplace_back(freqList.size(), 0);
		}

		bool dataCollected = false;
		std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>> map;
		std::vector<tis_val_t> vals(gNCpus);
		for (uint16_t aggregationKey : aggregationKeys) {
		map.emplace(aggregationKey, mapFormat);

		aggregated_task_tis_key_t key{.tgid = tgid, .aggregation_key = aggregationKey};
		for (key.bucket = 0; key.bucket <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++key.bucket) {
		if (findMapEntry(gPidTisMapFd, &key, vals.data()) != 0) {
		if (errno != ENOENT) {
		return {};
		}
		continue;
		} else {
		dataCollected = true;
		}

		// Combine data by aggregating time-in-state data grouped by CPU cluster aka policy.
		uint32_t offset = key.bucket * FREQS_PER_ENTRY;
		uint32_t nextOffset = offset + FREQS_PER_ENTRY;
		for (uint32_t j = 0; j < gNPolicies; ++j) {
		if (offset >= gPolicyFreqs[j].size()) continue;
		auto begin = map[key.aggregation_key][j].begin() + offset;
		auto end = nextOffset < gPolicyFreqs[j].size() ? begin + FREQS_PER_ENTRY
		: map[key.aggregation_key][j].end();
		for (const auto &cpu : gPolicyCpus[j]) {
		std::transform(begin, end, std::begin(vals[cpu].ar), begin,
		std::plus<uint64_t>());
		}
		}
		}
		}

		if (!dataCollected) {
		// Check if eBPF is supported on this device. If it is, gTisMap should not be empty.
		time_key_t key;
		if (getFirstMapKey(gTisMapFd, &key) != 0) {
		return {};
		}
		}
		return map;
		}

		} // namespace bpf
		} // namespace android

libs/cputimeinstate/cputimeinstate.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -41,5 +41,10 @@ std::optional<std::unordered_map<uint32_t, concurrent_time_t>>
		getUidsUpdatedConcurrentTimes(uint64_t *lastUpdate);
		bool clearUidTimes(unsigned int uid);

		bool startTrackingProcessCpuTimes(pid_t pid);
		bool startAggregatingTaskCpuTimes(pid_t pid, uint16_t aggregationKey);
		std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>>
		getAggregatedTaskCpuFreqTimes(pid_t pid, const std::vector<uint16_t> &aggregationKeys);

		} // namespace bpf
		} // namespace android

libs/cputimeinstate/testtimeinstate.cpp

+82 −0

Original line number	Diff line number	Diff line
		@@ -19,6 +19,8 @@

		#include <sys/sysinfo.h>

		#include <pthread.h>
		#include <semaphore.h>
		#include <numeric>
		#include <unordered_map>
		#include <vector>
		@@ -504,5 +506,85 @@ TEST(TimeInStateTest, GetCpuFreqs) {
		for (size_t i = 0; i < freqs->size(); ++i) EXPECT_EQ((freqs)[i].size(), (times)[i].size());
		}

		uint64_t timeNanos() {
		struct timespec spec;
		clock_gettime(CLOCK_MONOTONIC, &spec);
		return spec.tv_sec * 1000000000 + spec.tv_nsec;
		}

		// Keeps CPU busy with some number crunching
		void useCpu() {
		long sum = 0;
		for (int i = 0; i < 100000; i++) {
		sum *= i;
		}
		}

		sem_t pingsem, pongsem;

		void testThread(void ) {
		for (int i = 0; i < 10; i++) {
		sem_wait(&pingsem);
		useCpu();
		sem_post(&pongsem);
		}
		return nullptr;
		}

		TEST(TimeInStateTest, GetAggregatedTaskCpuFreqTimes) {
		uint64_t startTimeNs = timeNanos();

		sem_init(&pingsem, 0, 1);
		sem_init(&pongsem, 0, 0);

		pthread_t thread;
		ASSERT_EQ(pthread_create(&thread, NULL, &testThread, NULL), 0);

		// This process may have been running for some time, so when we start tracking
		// CPU time, the very first switch may include the accumulated time.
		// Yield the remainder of this timeslice to the newly created thread.
		sem_wait(&pongsem);
		sem_post(&pingsem);

		pid_t tgid = getpid();
		startTrackingProcessCpuTimes(tgid);

		pid_t tid = pthread_gettid_np(thread);
		startAggregatingTaskCpuTimes(tid, 42);

		// Play ping-pong with the other thread to ensure that both threads get
		// some CPU time.
		for (int i = 0; i < 9; i++) {
		sem_wait(&pongsem);
		useCpu();
		sem_post(&pingsem);
		}

		pthread_join(thread, NULL);

		std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>> optionalMap =
		getAggregatedTaskCpuFreqTimes(tgid, {0, 42});
		ASSERT_TRUE(optionalMap);

		std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>> map = *optionalMap;
		ASSERT_EQ(map.size(), 2u);

		uint64_t testDurationNs = timeNanos() - startTimeNs;
		for (auto pair : map) {
		uint16_t aggregationKey = pair.first;
		ASSERT_TRUE(aggregationKey == 0 \|\| aggregationKey == 42);

		std::vector<std::vector<uint64_t>> timesInState = pair.second;
		uint64_t totalCpuTime = 0;
		for (size_t i = 0; i < timesInState.size(); i++) {
		for (size_t j = 0; j < timesInState[i].size(); j++) {
		totalCpuTime += timesInState[i][j];
		}
		}
		ASSERT_GT(totalCpuTime, 0ul);
		ASSERT_LE(totalCpuTime, testDurationNs);
		}
		}

		} // namespace bpf
		} // namespace android