Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3f95aa81 authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: Make TASKS_RCU handle tasks that are almost done exiting



Once a task has passed exit_notify() in the do_exit() code path, it
is no longer on the task lists, and is therefore no longer visible
to rcu_tasks_kthread().  This means that an almost-exited task might
be preempted while within a trampoline, and this task won't be waited
on by rcu_tasks_kthread().  This commit fixes this bug by adding an
srcu_struct.  An exiting task does srcu_read_lock() just before calling
exit_notify(), and does the corresponding srcu_read_unlock() after
doing the final preempt_disable().  This means that rcu_tasks_kthread()
can do synchronize_srcu() to wait for all mostly-exited tasks to reach
their final preempt_disable() region, and then use synchronize_sched()
to wait for those tasks to finish exiting.

Reported-by: default avatarOleg Nesterov <oleg@redhat.com>
Suggested-by: default avatarLai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent 53c6d4ed
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 * macro rather than an inline function to avoid #include hell.
 */
#ifdef CONFIG_TASKS_RCU
#define TASKS_RCU(x) x
extern struct srcu_struct tasks_rcu_exit_srcu;
#define rcu_note_voluntary_context_switch(t) \
	do { \
		preempt_disable(); /* Exclude synchronize_sched(); */ \
@@ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
		preempt_enable(); \
	} while (0)
#else /* #ifdef CONFIG_TASKS_RCU */
#define TASKS_RCU(x) do { } while (0)
#define rcu_note_voluntary_context_switch(t)	do { } while (0)
#endif /* #else #ifdef CONFIG_TASKS_RCU */

+3 −0
Original line number Diff line number Diff line
@@ -667,6 +667,7 @@ void do_exit(long code)
{
	struct task_struct *tsk = current;
	int group_dead;
	TASKS_RCU(int tasks_rcu_i);

	profile_task_exit(tsk);

@@ -775,6 +776,7 @@ void do_exit(long code)
	 */
	flush_ptrace_hw_breakpoint(tsk);

	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
	exit_notify(tsk, group_dead);
	proc_exit_connector(tsk);
#ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
	if (tsk->nr_dirtied)
		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
	exit_rcu();
	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));

	/*
	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+21 −0
Original line number Diff line number Diff line
@@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head;
static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);

/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_SRCU(tasks_rcu_exit_srcu);

/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3;
module_param(rcu_task_stall_timeout, int, 0644);

/* Post an RCU-tasks callback. */
void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
{
@@ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg)
		}
		rcu_read_unlock();

		/*
		 * Wait for tasks that are in the process of exiting.
		 * This does only part of the job, ensuring that all
		 * tasks that were previously exiting reach the point
		 * where they have disabled preemption, allowing the
		 * later synchronize_sched() to finish the job.
		 */
		synchronize_srcu(&tasks_rcu_exit_srcu);

		/*
		 * Each pass through the following loop scans the list
		 * of holdout tasks, removing any that are no longer
@@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
		 * ->rcu_tasks_holdout accesses to be within the grace
		 * period, avoiding the need for memory barriers for
		 * ->rcu_tasks_holdout accesses.
		 *
		 * In addition, this synchronize_sched() waits for exiting
		 * tasks to complete their final preempt_disable() region
		 * of execution, cleaning up after the synchronize_srcu()
		 * above.
		 */
		synchronize_sched();