Merge changes from topic "cgroup.events" into main am: 1b44b8c6d6 am: 816837eb5d (88fd3fbf) · Commits · e / os / android_system_core

libprocessgroup/include/processgroup/processgroup.h

+5 −5

Original line number	Diff line number	Diff line
		@@ -65,9 +65,8 @@ bool UsePerAppMemcg();
		// should be active again. E.g. Zygote specialization for child process.
		void DropTaskProfilesResourceCaching();

		// Return 0 and removes the cgroup if there are no longer any processes in it.
		// Returns -1 in the case of an error occurring or if there are processes still running
		// even after retrying for up to 200ms.
		// Return 0 if all processes were killed and the cgroup was successfully removed.
		// Returns -1 in the case of an error occurring or if there are processes still running.
		int killProcessGroup(uid_t uid, int initialPid, int signal);

		// Returns the same as killProcessGroup(), however it does not retry, which means
		@@ -76,8 +75,9 @@ int killProcessGroupOnce(uid_t uid, int initialPid, int signal);

		// Sends the provided signal to all members of a process group, but does not wait for processes to
		// exit, or for the cgroup to be removed. Callers should also ensure that killProcessGroup is called
		// later to ensure the cgroup is fully removed, otherwise system resources may leak.
		int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);
		// later to ensure the cgroup is fully removed, otherwise system resources will leak.
		// Returns true if no errors are encountered sending signals, otherwise false.
		bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);

		int createProcessGroup(uid_t uid, int initialPid, bool memControl = false);

libprocessgroup/processgroup.cpp

+215 −122

Original line number	Diff line number	Diff line
		@@ -22,6 +22,7 @@
		#include <errno.h>
		#include <fcntl.h>
		#include <inttypes.h>
		#include <poll.h>
		#include <signal.h>
		#include <stdio.h>
		#include <stdlib.h>
		@@ -30,6 +31,7 @@
		#include <unistd.h>

		#include <chrono>
		#include <cstring>
		#include <map>
		#include <memory>
		#include <mutex>
		@@ -53,7 +55,9 @@ using android::base::WriteStringToFile;

		using namespace std::chrono_literals;

		#define PROCESSGROUP_CGROUP_PROCS_FILE "/cgroup.procs"
		#define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs"
		#define PROCESSGROUP_CGROUP_KILL_FILE "cgroup.kill"
		#define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events"

		bool CgroupsAvailable() {
		static bool cgroups_available = access("/proc/cgroups", F_OK) == 0;
		@@ -74,6 +78,29 @@ bool CgroupGetControllerPath(const std::string& cgroup_name, std::string* path)
		return true;
		}

		static std::string ConvertUidToPath(const char* cgroup, uid_t uid) {
		return StringPrintf("%s/uid_%u", cgroup, uid);
		}

		static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
		return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
		}

		static bool CgroupKillAvailable() {
		static std::once_flag f;
		static bool cgroup_kill_available = false;
		std::call_once(f, []() {
		std::string cg_kill;
		CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cg_kill);
		// cgroup.kill is not on the root cgroup, so check a non-root cgroup that should always
		// exist
		cg_kill = ConvertUidToPath(cg_kill.c_str(), AID_ROOT) + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
		cgroup_kill_available = access(cg_kill.c_str(), F_OK) == 0;
		});

		return cgroup_kill_available;
		}

		static bool CgroupGetMemcgAppsPath(std::string* path) {
		CgroupController controller = CgroupMap::GetInstance().FindController("memory");

		@@ -205,39 +232,22 @@ bool SetUserProfiles(uid_t uid, const std::vector<std::string>& profiles) {
		false);
		}

		static std::string ConvertUidToPath(const char* cgroup, uid_t uid) {
		return StringPrintf("%s/uid_%u", cgroup, uid);
		}

		static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
		return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
		}

		static int RemoveCgroup(const char* cgroup, uid_t uid, int pid, unsigned int retries) {
		int ret = 0;
		auto uid_pid_path = ConvertUidPidToPath(cgroup, uid, pid);

		while (retries--) {
		ret = rmdir(uid_pid_path.c_str());
		// If we get an error 2 'No such file or directory' , that means the
		// cgroup is already removed, treat it as success and return 0 for
		// idempotency.
		if (ret < 0 && errno == ENOENT) {
		ret = 0;
		}
		if (!ret \|\| errno != EBUSY \|\| !retries) break;
		std::this_thread::sleep_for(5ms);
		}
		static int RemoveCgroup(const char* cgroup, uid_t uid, int pid) {
		auto path = ConvertUidPidToPath(cgroup, uid, pid);
		int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));

		if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) {
		// Isolated UIDs are unlikely to be reused soon after removal,
		// so free up the kernel resources for the UID level cgroup.
		const auto uid_path = ConvertUidToPath(cgroup, uid);
		ret = rmdir(uid_path.c_str());
		path = ConvertUidToPath(cgroup, uid);
		ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
		}

		if (ret < 0 && errno == ENOENT) {
		// This function is idempoetent, but still warn here.
		LOG(WARNING) << "RemoveCgroup: " << path << " does not exist.";
		ret = 0;
		}
		}

		return ret;
		}
		@@ -360,38 +370,47 @@ err:
		return false;
		}

		// Returns number of processes killed on success
		// Returns 0 if there are no processes in the process cgroup left to kill
		// Returns -1 on error
		static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid, int signal) {
		// We separate all of the pids in the cgroup into those pids that are also the leaders of
		// process groups (stored in the pgids set) and those that are not (stored in the pids set).
		std::set<pid_t> pgids;
		pgids.emplace(initialPid);
		std::set<pid_t> pids;
		int processes = 0;

		std::unique_ptr<FILE, decltype(&fclose)> fd(nullptr, fclose);
		bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
		std::set<pid_t> pgids, pids;

		if (CgroupsAvailable()) {
		auto path = ConvertUidPidToPath(cgroup, uid, initialPid) + PROCESSGROUP_CGROUP_PROCS_FILE;
		fd.reset(fopen(path.c_str(), "re"));
		if (!fd) {
		if (errno == ENOENT) {
		// This happens when the process is already dead or if, as the result of a bug, it
		// has been migrated to another cgroup. An example of a bug that can cause migration
		// to another cgroup is using the JoinCgroup action with a cgroup controller that
		// has been activated in the v2 cgroup hierarchy.
		goto kill;
		}
		PLOG(WARNING) << __func__ << " failed to open process cgroup uid " << uid << " pid "
		<< initialPid;
		return -1;
		std::string hierarchy_root_path, cgroup_v2_path;
		CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
		cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);

		if (signal == SIGKILL && CgroupKillAvailable()) {
		LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_KILL_FILE << " to SIGKILL "
		<< cgroup_v2_path;
		const std::string killfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
		if (WriteStringToFile("1", killfilepath)) {
		return true;
		} else {
		PLOG(ERROR) << "Failed to write 1 to " << killfilepath;
		// Fallback to cgroup.procs below
		}
		}

		// Since cgroup.kill only sends SIGKILLs, we read cgroup.procs to find each process to
		// signal individually. This is more costly than using cgroup.kill for SIGKILLs.
		LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal
		<< ") " << cgroup_v2_path;

		// We separate all of the pids in the cgroup into those pids that are also the leaders of
		// process groups (stored in the pgids set) and those that are not (stored in the pids set).
		const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
		std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(procsfilepath.c_str(), "re"), fclose);
		if (!fp) {
		// This should only happen if the cgroup has already been removed with a successful call
		// to killProcessGroup. Callers should only retry sendSignalToProcessGroup or
		// killProcessGroup calls if they fail without ENOENT.
		PLOG(ERROR) << "Failed to open " << procsfilepath;
		kill(-initialPid, signal);
		return false;
		}

		pid_t pid;
		bool file_is_empty = true;
		while (fscanf(fd.get(), "%d\n", &pid) == 1 && pid >= 0) {
		processes++;
		while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) {
		file_is_empty = false;
		if (pid == 0) {
		// Should never happen... but if it does, trying to kill this
		@@ -421,7 +440,8 @@ static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid,
		}
		}

		kill:
		pgids.emplace(initialPid);

		// Kill all process groups.
		for (const auto pgid : pgids) {
		LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
		@@ -442,101 +462,174 @@ kill:
		}
		}

		return (!fd \|\| feof(fd.get())) ? processes : -1;
		return true;
		}

		template <typename T>
		static std::chrono::milliseconds toMillisec(T&& duration) {
		return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
		}

		enum class populated_status
		{
		populated,
		not_populated,
		error
		};

		static populated_status cgroupIsPopulated(int events_fd) {
		const std::string POPULATED_KEY("populated ");
		const std::string::size_type MAX_EVENTS_FILE_SIZE = 32;

		std::string buf;
		buf.resize(MAX_EVENTS_FILE_SIZE);
		ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0));
		if (len == -1) {
		PLOG(ERROR) << "Could not read cgroup.events: ";
		// Potentially ENODEV if the cgroup has been removed since we opened this file, but that
		// shouldn't have happened yet.
		return populated_status::error;
		}

		if (len == 0) {
		LOG(ERROR) << "cgroup.events EOF";
		return populated_status::error;
		}

		static int KillProcessGroup(uid_t uid, int initialPid, int signal, int retries) {
		buf.resize(len);

		const std::string::size_type pos = buf.find(POPULATED_KEY);
		if (pos == std::string::npos) {
		LOG(ERROR) << "Could not find populated key in cgroup.events";
		return populated_status::error;
		}

		if (pos + POPULATED_KEY.size() + 1 > len) {
		LOG(ERROR) << "Partial read of cgroup.events";
		return populated_status::error;
		}

		return buf[pos + POPULATED_KEY.size()] == '1' ?
		populated_status::populated : populated_status::not_populated;
		}

		// The default timeout of 2200ms comes from the default number of retries in a previous
		// implementation of this function. The default retry value was 40 for killing and 400 for cgroup
		// removal with 5ms sleeps between each retry.
		static int KillProcessGroup(
		uid_t uid, int initialPid, int signal, bool once = false,
		std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
		CHECK_GE(uid, 0);
		CHECK_GT(initialPid, 0);

		// Always attempt to send a kill signal to at least the initialPid, at least once, regardless of
		// whether its cgroup exists or not. This should only be necessary if a bug results in the
		// migration of the targeted process out of its cgroup, which we will also attempt to kill.
		const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal);

		if (!CgroupsAvailable() \|\| !signal_ret) return signal_ret ? 0 : -1;

		std::string hierarchy_root_path;
		if (CgroupsAvailable()) {
		CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
		}
		const char* cgroup = hierarchy_root_path.c_str();

		std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
		const std::string cgroup_v2_path =
		ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);

		int retry = retries;
		int processes;
		while ((processes = DoKillProcessGroupOnce(cgroup, uid, initialPid, signal)) > 0) {
		LOG(VERBOSE) << "Killed " << processes << " processes for processgroup " << initialPid;
		if (!CgroupsAvailable()) {
		// makes no sense to retry, because there are no cgroup_procs file
		processes = 0; // no remaining processes
		break;
		const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE;
		android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY));
		if (events_fd.get() == -1) {
		PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup";
		return -1;
		}
		if (retry > 0) {
		std::this_thread::sleep_for(5ms);
		--retry;
		} else {

		struct pollfd fds = {
		.fd = events_fd,
		.events = POLLPRI,
		};

		const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();

		// The primary reason to loop here is to capture any new forks or migrations that could occur
		// after we send signals to the original set of processes, but before all of those processes
		// exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to
		// ensure this completes successfully to avoid permanent memory leaks, but we still place a
		// large default upper bound on the amount of time we spend in this loop. The amount of CPU
		// contention, and the amount of work that needs to be done in do_exit for each process
		// determines how long this will take.
		int ret;
		do {
		populated_status populated;
		while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated &&
		std::chrono::steady_clock::now() < until) {

		sendSignalToProcessGroup(uid, initialPid, signal);
		if (once) {
		populated = cgroupIsPopulated(events_fd.get());
		break;
		}
		}

		if (processes < 0) {
		PLOG(ERROR) << "Error encountered killing process cgroup uid " << uid << " pid "
		<< initialPid;
		return -1;
		}
		const std::chrono::steady_clock::time_point poll_start =
		std::chrono::steady_clock::now();

		std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
		auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
		if (poll_start < until)
		ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count()));

		// We only calculate the number of 'processes' when killing the processes.
		// In the retries == 0 case, we only kill the processes once and therefore
		// will not have waited then recalculated how many processes are remaining
		// after the first signals have been sent.
		// Logging anything regarding the number of 'processes' here does not make sense.
		if (ret == -1) {
		// Fallback to 5ms sleeps if poll fails
		PLOG(ERROR) << "Poll on " << eventsfile << "failed";
		const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
		if (now < until)
		std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now)));
		}

		if (processes == 0) {
		if (retries > 0) {
		LOG(INFO) << "Successfully killed process cgroup uid " << uid << " pid " << initialPid
		<< " in " << static_cast<int>(ms) << "ms";
		LOG(VERBOSE) << "Waited "
		<< toMillisec(std::chrono::steady_clock::now() - poll_start).count()
		<< " ms for " << eventsfile << " poll";
		}

		if (!CgroupsAvailable()) {
		// nothing to do here, if cgroups isn't available
		return 0;
		const std::chrono::milliseconds kill_duration =
		toMillisec(std::chrono::steady_clock::now() - start);

		if (populated == populated_status::populated) {
		LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path
		<< " after " << kill_duration.count() << " ms";
		// We'll still try the cgroup removal below which we expect to log an error.
		} else if (populated == populated_status::not_populated) {
		LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path
		<< " after " << kill_duration.count() << " ms";
		}

		// 400 retries correspond to 2 secs max timeout
		int err = RemoveCgroup(cgroup, uid, initialPid, 400);
		ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid);
		if (ret)
		PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path;
		else
		LOG(INFO) << "Removed cgroup " << cgroup_v2_path;

		if (isMemoryCgroupSupported() && UsePerAppMemcg()) {
		// This per-application memcg v1 case should eventually be removed after migration to
		// memcg v2.
		std::string memcg_apps_path;
		if (CgroupGetMemcgAppsPath(&memcg_apps_path) &&
		RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid, 400) < 0) {
		return -1;
		(ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) {
		const auto memcg_v1_cgroup_path =
		ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid);
		PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path;
		}
		}

		return err;
		} else {
		if (retries > 0) {
		LOG(ERROR) << "Failed to kill process cgroup uid " << uid << " pid " << initialPid
		<< " in " << static_cast<int>(ms) << "ms, " << processes
		<< " processes remain";
		}
		return -1;
		}
		if (once) break;
		if (std::chrono::steady_clock::now() >= until) break;
		} while (ret && errno == EBUSY);

		return ret;
		}

		int killProcessGroup(uid_t uid, int initialPid, int signal) {
		return KillProcessGroup(uid, initialPid, signal, 40 /retries/);
		return KillProcessGroup(uid, initialPid, signal);
		}

		int killProcessGroupOnce(uid_t uid, int initialPid, int signal) {
		return KillProcessGroup(uid, initialPid, signal, 0 /retries/);
		}

		int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
		std::string hierarchy_root_path;
		if (CgroupsAvailable()) {
		CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
		}
		const char* cgroup = hierarchy_root_path.c_str();
		return DoKillProcessGroupOnce(cgroup, uid, initialPid, signal);
		return KillProcessGroup(uid, initialPid, signal, true);
		}

		static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgroup,
		@@ -576,7 +669,7 @@ static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgr
		return -errno;
		}

		auto uid_pid_procs_file = uid_pid_path + PROCESSGROUP_CGROUP_PROCS_FILE;
		auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;

		if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) {
		ret = -errno;