Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 05951695 authored by Michal Koutný's avatar Michal Koutný Committed by Greg Kroah-Hartman
Browse files

cgroup: Optimize single thread migration



[ Upstream commit 9a3284fad42f66bb43629c6716709ff791aaa457 ]

There are reports of users who use thread migrations between cgroups and
they report performance drop after d59cfc09 ("sched, cgroup: replace
signal_struct->group_rwsem with a global percpu_rwsem"). The effect is
pronounced on machines with more CPUs.

The migration is affected by forking noise happening in the background,
after the mentioned commit a migrating thread must wait for all
(forking) processes on the system, not only of its threadgroup.

There are several places that need to synchronize with migration:
	a) do_exit,
	b) de_thread,
	c) copy_process,
	d) cgroup_update_dfl_csses,
	e) parallel migration (cgroup_{proc,thread}s_write).

In the case of self-migrating thread, we relax the synchronization on
cgroup_threadgroup_rwsem to avoid the cost of waiting. d) and e) are
excluded with cgroup_mutex, c) does not matter in case of single thread
migration and the executing thread cannot exec(2) or exit(2) while it is
writing into cgroup.threads. In case of do_exit because of signal
delivery, we either exit before the migration or finish the migration
(of not yet PF_EXITING thread) and die afterwards.

This patch handles only the case of self-migration by writing "0" into
cgroup.threads. For simplicity, we always take cgroup_threadgroup_rwsem
with numeric PIDs.

This change improves migration dependent workload performance similar
to per-signal_struct state.

Signed-off-by: default avatarMichal Koutný <mkoutny@suse.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent d0e7be0d
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -250,9 +250,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,

int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
		       bool threadgroup);
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
					     bool *locked)
	__acquires(&cgroup_threadgroup_rwsem);
void cgroup_procs_write_finish(struct task_struct *task)
void cgroup_procs_write_finish(struct task_struct *task, bool locked)
	__releases(&cgroup_threadgroup_rwsem);

void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
+3 −2
Original line number Diff line number Diff line
@@ -498,12 +498,13 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
	struct task_struct *task;
	const struct cred *cred, *tcred;
	ssize_t ret;
	bool locked;

	cgrp = cgroup_kn_lock_live(of->kn, false);
	if (!cgrp)
		return -ENODEV;

	task = cgroup_procs_write_start(buf, threadgroup);
	task = cgroup_procs_write_start(buf, threadgroup, &locked);
	ret = PTR_ERR_OR_ZERO(task);
	if (ret)
		goto out_unlock;
@@ -526,7 +527,7 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
	ret = cgroup_attach_task(cgrp, task, threadgroup);

out_finish:
	cgroup_procs_write_finish(task);
	cgroup_procs_write_finish(task, locked);
out_unlock:
	cgroup_kn_unlock(of->kn);

+30 −9
Original line number Diff line number Diff line
@@ -2856,7 +2856,8 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
	return ret;
}

struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
					     bool *locked)
	__acquires(&cgroup_threadgroup_rwsem)
{
	struct task_struct *tsk;
@@ -2865,7 +2866,21 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
		return ERR_PTR(-EINVAL);

	/*
	 * If we migrate a single thread, we don't care about threadgroup
	 * stability. If the thread is `current`, it won't exit(2) under our
	 * hands or change PID through exec(2). We exclude
	 * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write
	 * callers by cgroup_mutex.
	 * Therefore, we can skip the global lock.
	 */
	lockdep_assert_held(&cgroup_mutex);
	if (pid || threadgroup) {
		percpu_down_write(&cgroup_threadgroup_rwsem);
		*locked = true;
	} else {
		*locked = false;
	}

	rcu_read_lock();
	if (pid) {
@@ -2896,13 +2911,16 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
	goto out_unlock_rcu;

out_unlock_threadgroup:
	if (*locked) {
		percpu_up_write(&cgroup_threadgroup_rwsem);
		*locked = false;
	}
out_unlock_rcu:
	rcu_read_unlock();
	return tsk;
}

void cgroup_procs_write_finish(struct task_struct *task)
void cgroup_procs_write_finish(struct task_struct *task, bool locked)
	__releases(&cgroup_threadgroup_rwsem)
{
	struct cgroup_subsys *ss;
@@ -2911,6 +2929,7 @@ void cgroup_procs_write_finish(struct task_struct *task)
	/* release reference from cgroup_procs_write_start() */
	put_task_struct(task);

	if (locked)
		percpu_up_write(&cgroup_threadgroup_rwsem);
	for_each_subsys(ss, ssid)
		if (ss->post_attach)
@@ -4830,12 +4849,13 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
	struct task_struct *task;
	const struct cred *saved_cred;
	ssize_t ret;
	bool locked;

	dst_cgrp = cgroup_kn_lock_live(of->kn, false);
	if (!dst_cgrp)
		return -ENODEV;

	task = cgroup_procs_write_start(buf, true);
	task = cgroup_procs_write_start(buf, true, &locked);
	ret = PTR_ERR_OR_ZERO(task);
	if (ret)
		goto out_unlock;
@@ -4861,7 +4881,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
	ret = cgroup_attach_task(dst_cgrp, task, true);

out_finish:
	cgroup_procs_write_finish(task);
	cgroup_procs_write_finish(task, locked);
out_unlock:
	cgroup_kn_unlock(of->kn);

@@ -4881,6 +4901,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
	struct task_struct *task;
	const struct cred *saved_cred;
	ssize_t ret;
	bool locked;

	buf = strstrip(buf);

@@ -4888,7 +4909,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
	if (!dst_cgrp)
		return -ENODEV;

	task = cgroup_procs_write_start(buf, false);
	task = cgroup_procs_write_start(buf, false, &locked);
	ret = PTR_ERR_OR_ZERO(task);
	if (ret)
		goto out_unlock;
@@ -4919,7 +4940,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
	ret = cgroup_attach_task(dst_cgrp, task, false);

out_finish:
	cgroup_procs_write_finish(task);
	cgroup_procs_write_finish(task, locked);
out_unlock:
	cgroup_kn_unlock(of->kn);