Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f9f9e7b7 authored by Tejun Heo's avatar Tejun Heo
Browse files

Revert "cgroup: simplify threadgroup locking"



This reverts commit b5ba75b5.

d59cfc09 ("sched, cgroup: replace signal_struct->group_rwsem with
a global percpu_rwsem") and b5ba75b5 ("cgroup: simplify
threadgroup locking") changed how cgroup synchronizes against task
fork and exits so that it uses global percpu_rwsem instead of
per-process rwsem; unfortunately, the write [un]lock paths of
percpu_rwsem always involve synchronize_rcu_expedited() which turned
out to be too expensive.

Improvements for percpu_rwsem are scheduled to be merged in the coming
v4.4-rc1 merge window which alleviates this issue.  For now, revert
the two commits to restore per-process rwsem.  They will be re-applied
for the v4.4-rc1 merge window.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com


Reported-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org # v4.2+
parent 6ff33f39
Loading
Loading
Loading
Loading
+33 −12
Original line number Original line Diff line number Diff line
@@ -2460,13 +2460,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
	if (!cgrp)
	if (!cgrp)
		return -ENODEV;
		return -ENODEV;


	percpu_down_write(&cgroup_threadgroup_rwsem);
retry_find_task:
	rcu_read_lock();
	rcu_read_lock();
	if (pid) {
	if (pid) {
		tsk = find_task_by_vpid(pid);
		tsk = find_task_by_vpid(pid);
		if (!tsk) {
		if (!tsk) {
			rcu_read_unlock();
			ret = -ESRCH;
			ret = -ESRCH;
			goto out_unlock_rcu;
			goto out_unlock_cgroup;
		}
		}
	} else {
	} else {
		tsk = current;
		tsk = current;
@@ -2482,23 +2483,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
	 */
	 */
	if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
	if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
		ret = -EINVAL;
		ret = -EINVAL;
		goto out_unlock_rcu;
		rcu_read_unlock();
		goto out_unlock_cgroup;
	}
	}


	get_task_struct(tsk);
	get_task_struct(tsk);
	rcu_read_unlock();
	rcu_read_unlock();


	percpu_down_write(&cgroup_threadgroup_rwsem);
	if (threadgroup) {
		if (!thread_group_leader(tsk)) {
			/*
			 * a race with de_thread from another thread's exec()
			 * may strip us of our leadership, if this happens,
			 * there is no choice but to throw this task away and
			 * try again; this is
			 * "double-double-toil-and-trouble-check locking".
			 */
			percpu_up_write(&cgroup_threadgroup_rwsem);
			put_task_struct(tsk);
			goto retry_find_task;
		}
	}

	ret = cgroup_procs_write_permission(tsk, cgrp, of);
	ret = cgroup_procs_write_permission(tsk, cgrp, of);
	if (!ret)
	if (!ret)
		ret = cgroup_attach_task(cgrp, tsk, threadgroup);
		ret = cgroup_attach_task(cgrp, tsk, threadgroup);


	put_task_struct(tsk);
	goto out_unlock_threadgroup;

out_unlock_rcu:
	rcu_read_unlock();
out_unlock_threadgroup:
	percpu_up_write(&cgroup_threadgroup_rwsem);
	percpu_up_write(&cgroup_threadgroup_rwsem);

	put_task_struct(tsk);
out_unlock_cgroup:
	cgroup_kn_unlock(of->kn);
	cgroup_kn_unlock(of->kn);
	return ret ?: nbytes;
	return ret ?: nbytes;
}
}
@@ -2643,8 +2658,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)


	lockdep_assert_held(&cgroup_mutex);
	lockdep_assert_held(&cgroup_mutex);


	percpu_down_write(&cgroup_threadgroup_rwsem);

	/* look up all csses currently attached to @cgrp's subtree */
	/* look up all csses currently attached to @cgrp's subtree */
	down_read(&css_set_rwsem);
	down_read(&css_set_rwsem);
	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2700,8 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
				goto out_finish;
				goto out_finish;
			last_task = task;
			last_task = task;


			percpu_down_write(&cgroup_threadgroup_rwsem);
			/* raced against de_thread() from another thread? */
			if (!thread_group_leader(task)) {
				percpu_up_write(&cgroup_threadgroup_rwsem);
				put_task_struct(task);
				continue;
			}

			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);


			percpu_up_write(&cgroup_threadgroup_rwsem);
			put_task_struct(task);
			put_task_struct(task);


			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2711,7 +2733,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)


out_finish:
out_finish:
	cgroup_migrate_finish(&preloaded_csets);
	cgroup_migrate_finish(&preloaded_csets);
	percpu_up_write(&cgroup_threadgroup_rwsem);
	return ret;
	return ret;
}
}