Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3fbfbf7a authored by Paul E. McKenney's avatar Paul E. McKenney Committed by Paul E. McKenney
Browse files

rcu: Add callback-free CPUs



RCU callback execution can add significant OS jitter and also can
degrade both scheduling latency and, in asymmetric multiprocessors,
energy efficiency.  This commit therefore adds the ability for selected
CPUs ("rcu_nocbs=" boot parameter) to have their callbacks offloaded
to kthreads.  If the "rcu_nocb_poll" boot parameter is also specified,
these kthreads will do polling, removing the need for the offloaded
CPUs to do wakeups.  At least one CPU must be doing normal callback
processing: currently CPU 0 cannot be selected as a no-CBs CPU.
In addition, attempts to offline the last normal-CBs CPU will fail.

This feature was inspired by Jim Houston's and Joe Korty's JRCU, and
this commit includes fixes to problems located by Fengguang Wu's
kbuild test robot.

[ paulmck: Added gfp.h include file as suggested by Fengguang Wu. ]

Signed-off-by: default avatarPaul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent aac1cda3
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -2394,6 +2394,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
			See Documentation/blockdev/ramdisk.txt.

	rcu_nocbs=	[KNL,BOOT]
			In kernels built with CONFIG_RCU_NOCB_CPU=y, set
			the specified list of CPUs to be no-callback CPUs.
			Invocation of these CPUs' RCU callbacks will
			be offloaded to "rcuoN" kthreads created for
			that purpose.  This reduces OS jitter on the
			offloaded CPUs, which can be useful for HPC and
			real-time workloads.  It can also improve energy
			efficiency for asymmetric multiprocessors.

	rcu_nocbs_poll	[KNL,BOOT]
			Rather than requiring that offloaded CPUs
			(specified by rcu_nocbs= above) explicitly
			awaken the corresponding "rcuoN" kthreads,
			make these kthreads poll for callbacks.
			This improves the real-time response for the
			offloaded CPUs by relieving them of the need to
			wake up the corresponding kthread, but degrades
			energy efficiency by requiring that the kthreads
			periodically wake up to do the polling.

	rcutree.blimit=	[KNL,BOOT]
			Set maximum number of finished RCU callbacks to process
			in one batch.
+1 −0
Original line number Diff line number Diff line
@@ -549,6 +549,7 @@ TRACE_EVENT(rcu_torture_read,
 *	"EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
 *	"Inc1": rcu_barrier_callback() piggyback check counter incremented.
 *	"Offline": rcu_barrier_callback() found offline CPU
 *	"OnlineNoCB": rcu_barrier_callback() found online no-CBs CPU.
 *	"OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
 *	"OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
 *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
+22 −0
Original line number Diff line number Diff line
@@ -654,6 +654,28 @@ config RCU_BOOST_DELAY

	  Accept the default if unsure.

config RCU_NOCB_CPU
	bool "Offload RCU callback processing from boot-selected CPUs"
	depends on TREE_RCU || TREE_PREEMPT_RCU
	default n
	help
	  Use this option to reduce OS jitter for aggressive HPC or
	  real-time workloads.	It can also be used to offload RCU
	  callback invocation to energy-efficient CPUs in battery-powered
	  asymmetric multiprocessors.

	  This option offloads callback invocation from the set of
	  CPUs specified at boot time by the rcu_nocbs parameter.
	  For each such CPU, a kthread ("rcuoN") will be created to
	  invoke callbacks, where the "N" is the CPU being offloaded.
	  Nothing prevents this kthread from running on the specified
	  CPUs, but (1) the kthreads may be preempted between each
	  callback, and (2) affinity or cgroups can be used to force
	  the kthreads to run on whatever set of CPUs is desired.

	  Say Y here if you want reduced OS jitter on selected CPUs.
	  Say N here if you are unsure.

endmenu # "RCU Subsystem"

config IKCONFIG
+51 −12
Original line number Diff line number Diff line
@@ -303,7 +303,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
static int
cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
{
	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
	       rdp->nxttail[RCU_DONE_TAIL] != NULL;
}

/*
@@ -312,8 +313,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
static int
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
	return *rdp->nxttail[RCU_DONE_TAIL +
			     (ACCESS_ONCE(rsp->completed) != rdp->completed)] &&
	struct rcu_head **ntp;

	ntp = rdp->nxttail[RCU_DONE_TAIL +
			   (ACCESS_ONCE(rsp->completed) != rdp->completed)];
	return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
	       !rcu_gp_in_progress(rsp);
}

@@ -1123,6 +1127,7 @@ static void init_callback_list(struct rcu_data *rdp)
	rdp->nxtlist = NULL;
	for (i = 0; i < RCU_NEXT_SIZE; i++)
		rdp->nxttail[i] = &rdp->nxtlist;
	init_nocb_callback_list(rdp);
}

/*
@@ -1633,6 +1638,10 @@ static void
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
			  struct rcu_node *rnp, struct rcu_data *rdp)
{
	/* No-CBs CPUs do not have orphanable callbacks. */
	if (is_nocb_cpu(rdp->cpu))
		return;

	/*
	 * Orphan the callbacks.  First adjust the counts.  This is safe
	 * because _rcu_barrier() excludes CPU-hotplug operations, so it
@@ -1684,6 +1693,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
	int i;
	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);

	/* No-CBs CPUs are handled specially. */
	if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
		return;

	/* Do the accounting first. */
	rdp->qlen_lazy += rsp->qlen_lazy;
	rdp->qlen += rsp->qlen;
@@ -2162,9 +2175,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
	}
}

/*
 * Helper function for call_rcu() and friends.  The cpu argument will
 * normally be -1, indicating "currently running CPU".  It may specify
 * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier()
 * is expected to specify a CPU.
 */
static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
	   struct rcu_state *rsp, bool lazy)
	   struct rcu_state *rsp, int cpu, bool lazy)
{
	unsigned long flags;
	struct rcu_data *rdp;
@@ -2184,9 +2203,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
	rdp = this_cpu_ptr(rsp->rda);

	/* Add the callback to our list. */
	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
		int offline;

		if (cpu != -1)
			rdp = per_cpu_ptr(rsp->rda, cpu);
		offline = !__call_rcu_nocb(rdp, head, lazy);
		WARN_ON_ONCE(offline);
		/* _call_rcu() is illegal on offline CPU; leak the callback. */
		WARN_ON_ONCE(1);
		local_irq_restore(flags);
		return;
	}
@@ -2215,7 +2239,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 */
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
	__call_rcu(head, func, &rcu_sched_state, 0);
	__call_rcu(head, func, &rcu_sched_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_sched);

@@ -2224,7 +2248,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
 */
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
	__call_rcu(head, func, &rcu_bh_state, 0);
	__call_rcu(head, func, &rcu_bh_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);

@@ -2676,9 +2700,17 @@ static void _rcu_barrier(struct rcu_state *rsp)
	 * When that callback is invoked, we will know that all of the
	 * corresponding CPU's preceding callbacks have been invoked.
	 */
	for_each_online_cpu(cpu) {
	for_each_possible_cpu(cpu) {
		if (!cpu_online(cpu) && !is_nocb_cpu(cpu))
			continue;
		rdp = per_cpu_ptr(rsp->rda, cpu);
		if (ACCESS_ONCE(rdp->qlen)) {
		if (is_nocb_cpu(cpu)) {
			_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
					   rsp->n_barrier_done);
			atomic_inc(&rsp->barrier_cpu_count);
			__call_rcu(&rdp->barrier_head, rcu_barrier_callback,
				   rsp, cpu, 0);
		} else if (ACCESS_ONCE(rdp->qlen)) {
			_rcu_barrier_trace(rsp, "OnlineQ", cpu,
					   rsp->n_barrier_done);
			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -2752,6 +2784,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
#endif
	rdp->cpu = cpu;
	rdp->rsp = rsp;
	rcu_boot_init_nocb_percpu_data(rdp);
	raw_spin_unlock_irqrestore(&rnp->lock, flags);
}

@@ -2833,6 +2866,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
	struct rcu_node *rnp = rdp->mynode;
	struct rcu_state *rsp;
	int ret = NOTIFY_OK;

	trace_rcu_utilization("Start CPU hotplug");
	switch (action) {
@@ -2846,7 +2880,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
		rcu_boost_kthread_setaffinity(rnp, -1);
		break;
	case CPU_DOWN_PREPARE:
		if (nocb_cpu_expendable(cpu))
			rcu_boost_kthread_setaffinity(rnp, cpu);
		else
			ret = NOTIFY_BAD;
		break;
	case CPU_DYING:
	case CPU_DYING_FROZEN:
@@ -2870,7 +2907,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
		break;
	}
	trace_rcu_utilization("End CPU hotplug");
	return NOTIFY_OK;
	return ret;
}

/*
@@ -2890,6 +2927,7 @@ static int __init rcu_spawn_gp_kthread(void)
		raw_spin_lock_irqsave(&rnp->lock, flags);
		rsp->gp_kthread = t;
		raw_spin_unlock_irqrestore(&rnp->lock, flags);
		rcu_spawn_nocb_kthreads(rsp);
	}
	return 0;
}
@@ -3085,6 +3123,7 @@ void __init rcu_init(void)
	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
	__rcu_init_preempt();
	rcu_init_nocb();
	 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

	/*
+47 −0
Original line number Diff line number Diff line
@@ -317,6 +317,18 @@ struct rcu_data {
	struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */

	/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
	struct rcu_head *nocb_head;	/* CBs waiting for kthread. */
	struct rcu_head **nocb_tail;
	atomic_long_t nocb_q_count;	/* # CBs waiting for kthread */
	atomic_long_t nocb_q_count_lazy; /*  (approximate). */
	int nocb_p_count;		/* # CBs being invoked by kthread */
	int nocb_p_count_lazy;		/*  (approximate). */
	wait_queue_head_t nocb_wq;	/* For nocb kthreads to sleep on. */
	struct task_struct *nocb_kthread;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

	int cpu;
	struct rcu_state *rsp;
};
@@ -369,6 +381,12 @@ struct rcu_state {
	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */
		     void (*func)(struct rcu_head *head));
#ifdef CONFIG_RCU_NOCB_CPU
	void (*call_remote)(struct rcu_head *head,
		     void (*func)(struct rcu_head *head));
						/* call_rcu() flavor, but for */
						/*  placing on remote CPU. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

	/* The following fields are guarded by the root rcu_node's lock. */

@@ -439,6 +457,8 @@ struct rcu_state {
#define RCU_GP_FLAG_FQS  0x2	/* Need grace-period quiescent-state forcing. */

extern struct list_head rcu_struct_flavors;

/* Sequence through rcu_state structures for each RCU flavor. */
#define for_each_rcu_flavor(rsp) \
	list_for_each_entry((rsp), &rcu_struct_flavors, flavors)

@@ -515,5 +535,32 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
static bool is_nocb_cpu(int cpu);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
			    bool lazy);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
				      struct rcu_data *rdp);
static bool nocb_cpu_expendable(int cpu);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void init_nocb_callback_list(struct rcu_data *rdp);
static void __init rcu_init_nocb(void);

#endif /* #ifndef RCU_TREE_NONCORE */

#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_NOCB_CPU
/* Sum up queue lengths for tracing. */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
	*ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
	*qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
}
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
	*ql = 0;
	*qll = 0;
}
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
#endif /* #ifdef CONFIG_RCU_TRACE */
Loading