Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 25502a6c authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: refactor RCU's context-switch handling



The addition of preemptible RCU to treercu resulted in a bit of
confusion and inefficiency surrounding the handling of context switches
for RCU-sched and for RCU-preempt.  For RCU-sched, a context switch
is a quiescent state, pure and simple, just like it always has been.
For RCU-preempt, a context switch is in no way a quiescent state, but
special handling is required when a task blocks in an RCU read-side
critical section.

However, the callout from the scheduler and the outer loop in ksoftirqd
still calls something named rcu_sched_qs(), whose name is no longer
accurate.  Furthermore, when rcu_check_callbacks() notes an RCU-sched
quiescent state, it ends up unnecessarily (though harmlessly, aside
from the performance hit) enqueuing the current task if it happens to
be running in an RCU-preempt read-side critical section.  This not only
increases the maximum latency of scheduler_tick(), it also needlessly
increases the overhead of the next outermost rcu_read_unlock() invocation.

This patch addresses this situation by separating the notion of RCU's
context-switch handling from that of RCU-sched's quiescent states.
The context-switch handling is covered by rcu_note_context_switch() in
general and by rcu_preempt_note_context_switch() for preemptible RCU.
This permits rcu_sched_qs() to handle quiescent states and only quiescent
states.  It also reduces the maximum latency of scheduler_tick(), though
probably by much less than a microsecond.  Finally, it means that tasks
within preemptible-RCU read-side critical sections avoid incurring the
overhead of queuing unless there really is a context switch.

Suggested-by: default avatarLai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: default avatarLai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
parent 99652b54
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -29,6 +29,10 @@

void rcu_sched_qs(int cpu);
void rcu_bh_qs(int cpu);
static inline void rcu_note_context_switch(int cpu)
{
	rcu_sched_qs(cpu);
}

#define __rcu_read_lock()	preempt_disable()
#define __rcu_read_unlock()	preempt_enable()
+1 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ struct notifier_block;

extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
extern int rcu_expedited_torture_stats(char *page);

+12 −5
Original line number Diff line number Diff line
@@ -97,25 +97,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
 */
void rcu_sched_qs(int cpu)
{
	struct rcu_data *rdp;
	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);

	rdp = &per_cpu(rcu_sched_data, cpu);
	rdp->passed_quiesc_completed = rdp->gpnum - 1;
	barrier();
	rdp->passed_quiesc = 1;
	rcu_preempt_note_context_switch(cpu);
}

void rcu_bh_qs(int cpu)
{
	struct rcu_data *rdp;
	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);

	rdp = &per_cpu(rcu_bh_data, cpu);
	rdp->passed_quiesc_completed = rdp->gpnum - 1;
	barrier();
	rdp->passed_quiesc = 1;
}

/*
 * Note a context switch.  This is a quiescent state for RCU-sched,
 * and requires special handling for preemptible RCU.
 */
void rcu_note_context_switch(int cpu)
{
	rcu_sched_qs(cpu);
	rcu_preempt_note_context_switch(cpu);
}

#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
	.dynticks_nesting = 1,
+7 −4
Original line number Diff line number Diff line
@@ -75,13 +75,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 * that this just means that the task currently running on the CPU is
 * not in a quiescent state.  There might be any number of tasks blocked
 * while in an RCU read-side critical section.
 *
 * Unlike the other rcu_*_qs() functions, callers to this function
 * must disable irqs in order to protect the assignment to
 * ->rcu_read_unlock_special.
 */
static void rcu_preempt_qs(int cpu)
{
	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);

	rdp->passed_quiesc_completed = rdp->gpnum - 1;
	barrier();
	rdp->passed_quiesc = 1;
	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
}

/*
@@ -144,9 +150,8 @@ static void rcu_preempt_note_context_switch(int cpu)
	 * grace period, then the fact that the task has been enqueued
	 * means that we continue to block the current grace period.
	 */
	rcu_preempt_qs(cpu);
	local_irq_save(flags);
	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
	rcu_preempt_qs(cpu);
	local_irq_restore(flags);
}

@@ -236,7 +241,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
	 */
	special = t->rcu_read_unlock_special;
	if (special & RCU_READ_UNLOCK_NEED_QS) {
		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
		rcu_preempt_qs(smp_processor_id());
	}

@@ -473,7 +477,6 @@ static void rcu_preempt_check_callbacks(int cpu)
	struct task_struct *t = current;

	if (t->rcu_read_lock_nesting == 0) {
		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
		rcu_preempt_qs(cpu);
		return;
	}
+1 −1
Original line number Diff line number Diff line
@@ -3706,7 +3706,7 @@ asmlinkage void __sched schedule(void)
	preempt_disable();
	cpu = smp_processor_id();
	rq = cpu_rq(cpu);
	rcu_sched_qs(cpu);
	rcu_note_context_switch(cpu);
	prev = rq->curr;
	switch_count = &prev->nivcsw;

Loading