Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7a968bb2 authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

Merge branches 'consolidate.2019.01.26a' and 'fwd.2019.01.26a' into HEAD

consolidate.2019.01.26a: RCU flavor consolidation cleanups.
fwd.2019.01.26a: RCU grace-period forward-progress fixes.
parents 6ba7d681 13dc7d0c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -3721,6 +3721,11 @@
			This wake_up() will be accompanied by a
			WARN_ONCE() splat and an ftrace_dump().

	rcutree.sysrq_rcu= [KNL]
			Commandeer a sysrq key to dump out Tree RCU's
			rcu_node tree with an eye towards determining
			why a new grace period has not yet started.

	rcuperf.gp_async= [KNL]
			Measure performance of asynchronous
			grace-period primitives such as call_rcu().
+72 −30
Original line number Diff line number Diff line
@@ -62,6 +62,7 @@
#include <linux/suspend.h>
#include <linux/ftrace.h>
#include <linux/tick.h>
#include <linux/sysrq.h>

#include "tree.h"
#include "rcu.h"
@@ -115,6 +116,9 @@ int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/* panic() on RCU Stall sysctl. */
int sysctl_panic_on_rcu_stall __read_mostly;
/* Commandeer a sysrq key to dump RCU's tree. */
static bool sysrq_rcu;
module_param(sysrq_rcu, bool, 0444);

/*
 * The rcu_scheduler_active variable is initialized to the value
@@ -502,6 +506,14 @@ unsigned long rcu_exp_batches_completed(void)
}
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);

/*
 * Return the root node of the rcu_state structure.
 */
static struct rcu_node *rcu_get_root(void)
{
	return &rcu_state.node[0];
}

/*
 * Convert a ->gp_state value to a character string.
 */
@@ -519,19 +531,30 @@ void show_rcu_gp_kthreads(void)
{
	int cpu;
	unsigned long j;
	unsigned long ja;
	unsigned long jr;
	unsigned long jw;
	struct rcu_data *rdp;
	struct rcu_node *rnp;

	j = jiffies - READ_ONCE(rcu_state.gp_activity);
	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n",
	j = jiffies;
	ja = j - READ_ONCE(rcu_state.gp_activity);
	jr = j - READ_ONCE(rcu_state.gp_req_activity);
	jw = j - READ_ONCE(rcu_state.gp_wake_time);
	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
		rcu_state.name, gp_state_getname(rcu_state.gp_state),
		rcu_state.gp_state, rcu_state.gp_kthread->state, j);
		rcu_state.gp_state,
		rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
		ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
		(long)READ_ONCE(rcu_state.gp_seq),
		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
		READ_ONCE(rcu_state.gp_flags));
	rcu_for_each_node_breadth_first(rnp) {
		if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
			continue;
		pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",
			rnp->grplo, rnp->grphi, rnp->gp_seq,
			rnp->gp_seq_needed);
		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
			rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
			(long)rnp->gp_seq_needed);
		if (!rcu_is_leaf_node(rnp))
			continue;
		for_each_leaf_node_possible_cpu(rnp, cpu) {
@@ -540,14 +563,35 @@ void show_rcu_gp_kthreads(void)
			    ULONG_CMP_GE(rcu_state.gp_seq,
					 rdp->gp_seq_needed))
				continue;
			pr_info("\tcpu %d ->gp_seq_needed %lu\n",
				cpu, rdp->gp_seq_needed);
			pr_info("\tcpu %d ->gp_seq_needed %ld\n",
				cpu, (long)rdp->gp_seq_needed);
		}
	}
	/* sched_show_task(rcu_state.gp_kthread); */
}
EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);

/* Dump grace-period-request information due to commandeered sysrq. */
static void sysrq_show_rcu(int key)
{
	show_rcu_gp_kthreads();
}

static struct sysrq_key_op sysrq_rcudump_op = {
	.handler = sysrq_show_rcu,
	.help_msg = "show-rcu(y)",
	.action_msg = "Show RCU tree",
	.enable_mask = SYSRQ_ENABLE_DUMP,
};

static int __init rcu_sysrq_init(void)
{
	if (sysrq_rcu)
		return register_sysrq_key('y', &sysrq_rcudump_op);
	return 0;
}
early_initcall(rcu_sysrq_init);

/*
 * Send along grace-period-related data for rcutorture diagnostics.
 */
@@ -565,14 +609,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
}
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);

/*
 * Return the root node of the rcu_state structure.
 */
static struct rcu_node *rcu_get_root(void)
{
	return &rcu_state.node[0];
}

/*
 * Enter an RCU extended quiescent state, which can be either the
 * idle loop or adaptive-tickless usermode execution.
@@ -1169,7 +1205,7 @@ static void rcu_check_gp_kthread_starvation(void)
		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
		       rcu_state.name, j,
		       (long)rcu_seq_current(&rcu_state.gp_seq),
		       rcu_state.gp_flags,
		       READ_ONCE(rcu_state.gp_flags),
		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
		       gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
		if (gpk) {
@@ -1545,17 +1581,28 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
}

/*
 * Awaken the grace-period kthread.  Don't do a self-awaken, and don't
 * bother awakening when there is nothing for the grace-period kthread
 * to do (as in several CPUs raced to awaken, and we lost), and finally
 * don't try to awaken a kthread that has not yet been created.
 * Awaken the grace-period kthread.  Don't do a self-awaken (unless in
 * an interrupt or softirq handler), and don't bother awakening when there
 * is nothing for the grace-period kthread to do (as in several CPUs raced
 * to awaken, and we lost), and finally don't try to awaken a kthread that
 * has not yet been created.  If all those checks are passed, track some
 * debug information and awaken.
 *
 * So why do the self-wakeup when in an interrupt or softirq handler
 * in the grace-period kthread's context?  Because the kthread might have
 * been interrupted just as it was going to sleep, and just after the final
 * pre-sleep check of the awaken condition.  In this case, a wakeup really
 * is required, and is therefore supplied.
 */
static void rcu_gp_kthread_wake(void)
{
	if (current == rcu_state.gp_kthread ||
	if ((current == rcu_state.gp_kthread &&
	     !in_interrupt() && !in_serving_softirq()) ||
	    !READ_ONCE(rcu_state.gp_flags) ||
	    !rcu_state.gp_kthread)
		return;
	WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
	WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
	swake_up_one(&rcu_state.gp_wq);
}

@@ -1699,7 +1746,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
		zero_cpu_stall_ticks(rdp);
	}
	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
	if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap)
	if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
		rdp->gp_seq_needed = rnp->gp_seq_needed;
	WRITE_ONCE(rdp->gpwrap, false);
	rcu_gpnum_ovf(rnp, rdp);
@@ -1927,7 +1974,7 @@ static void rcu_gp_fqs_loop(void)
		if (!ret) {
			rcu_state.jiffies_force_qs = jiffies + j;
			WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
				   jiffies + 3 * j);
				   jiffies + (j ? 3 * j : 2));
		}
		trace_rcu_grace_period(rcu_state.name,
				       READ_ONCE(rcu_state.gp_seq),
@@ -2646,16 +2693,11 @@ rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		return;
	}
	pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
		 __func__, (long)READ_ONCE(rcu_state.gp_seq),
		 (long)READ_ONCE(rnp_root->gp_seq_needed),
		 j - rcu_state.gp_req_activity, j - rcu_state.gp_activity,
		 rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name,
		 rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL);
	WARN_ON(1);
	if (rnp_root != rnp)
		raw_spin_unlock_rcu_node(rnp_root);
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
	show_rcu_gp_kthreads();
}

/*
+9 −8
Original line number Diff line number Diff line
@@ -230,7 +230,13 @@ struct rcu_data {
					/* Leader CPU takes GP-end wakeups. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

	/* 6) Diagnostic data, including RCU CPU stall warnings. */
	/* 6) RCU priority boosting. */
	struct task_struct *rcu_cpu_kthread_task;
					/* rcuc per-CPU kthread or NULL. */
	unsigned int rcu_cpu_kthread_status;
	char rcu_cpu_has_work;

	/* 7) Diagnostic data, including RCU CPU stall warnings. */
	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
	/* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
	struct irq_work rcu_iw;		/* Check for non-irq activity. */
@@ -299,6 +305,8 @@ struct rcu_state {
	struct swait_queue_head gp_wq;		/* Where GP task waits. */
	short gp_flags;				/* Commands for GP task. */
	short gp_state;				/* GP kthread sleep state. */
	unsigned long gp_wake_time;		/* Last GP kthread wake. */
	unsigned long gp_wake_seq;		/* ->gp_seq at ^^^. */

	/* End of fields guarded by root rcu_node's lock. */

@@ -398,13 +406,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;

int rcu_dynticks_snap(struct rcu_data *rdp);

#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DECLARE_PER_CPU(char, rcu_cpu_has_work);
#endif /* #ifdef CONFIG_RCU_BOOST */

/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
static void rcu_qs(void);
+28 −40
Original line number Diff line number Diff line
@@ -34,17 +34,7 @@
#include "../time/tick-internal.h"

#ifdef CONFIG_RCU_BOOST

#include "../locking/rtmutex_common.h"

/*
 * Control variables for per-CPU and per-rcu_node kthreads.
 */
static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DEFINE_PER_CPU(char, rcu_cpu_has_work);

#else /* #ifdef CONFIG_RCU_BOOST */

/*
@@ -1243,11 +1233,11 @@ static void invoke_rcu_callbacks_kthread(void)
	unsigned long flags;

	local_irq_save(flags);
	__this_cpu_write(rcu_cpu_has_work, 1);
	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
			      __this_cpu_read(rcu_cpu_kthread_status));
	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
	}
	local_irq_restore(flags);
}
@@ -1258,7 +1248,7 @@ static void invoke_rcu_callbacks_kthread(void)
 */
static bool rcu_is_callbacks_kthread(void)
{
	return __this_cpu_read(rcu_cpu_kthread_task) == current;
	return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
}

#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
@@ -1315,12 +1305,12 @@ static void rcu_cpu_kthread_setup(unsigned int cpu)

static void rcu_cpu_kthread_park(unsigned int cpu)
{
	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
}

static int rcu_cpu_kthread_should_run(unsigned int cpu)
{
	return __this_cpu_read(rcu_cpu_has_work);
	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
}

/*
@@ -1330,15 +1320,14 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu)
 */
static void rcu_cpu_kthread(unsigned int cpu)
{
	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
	int spincnt;

	for (spincnt = 0; spincnt < 10; spincnt++) {
		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
		local_bh_disable();
		*statusp = RCU_KTHREAD_RUNNING;
		this_cpu_inc(rcu_cpu_kthread_loops);
		local_irq_disable();
		work = *workp;
		*workp = 0;
@@ -1390,7 +1379,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
}

static struct smp_hotplug_thread rcu_cpu_thread_spec = {
	.store			= &rcu_cpu_kthread_task,
	.store			= &rcu_data.rcu_cpu_kthread_task,
	.thread_should_run	= rcu_cpu_kthread_should_run,
	.thread_fn		= rcu_cpu_kthread,
	.thread_comm		= "rcuc/%u",
@@ -1407,7 +1396,7 @@ static void __init rcu_spawn_boost_kthreads(void)
	int cpu;

	for_each_possible_cpu(cpu)
		per_cpu(rcu_cpu_has_work, cpu) = 0;
		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
		return;
	rcu_for_each_leaf_node(rnp)
@@ -1773,22 +1762,24 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)

/*
 * Offload callback processing from the boot-time-specified set of CPUs
 * specified by rcu_nocb_mask.  For each CPU in the set, there is a
 * kthread created that pulls the callbacks from the corresponding CPU,
 * waits for a grace period to elapse, and invokes the callbacks.
 * The no-CBs CPUs do a wake_up() on their kthread when they insert
 * a callback into any empty list, unless the rcu_nocb_poll boot parameter
 * has been specified, in which case each kthread actively polls its
 * CPU.  (Which isn't so great for energy efficiency, but which does
 * reduce RCU's overhead on that CPU.)
 * specified by rcu_nocb_mask.  For the CPUs in the set, there are kthreads
 * created that pull the callbacks from the corresponding CPU, wait for
 * a grace period to elapse, and invoke the callbacks.  These kthreads
 * are organized into leaders, which manage incoming callbacks, wait for
 * grace periods, and awaken followers, and the followers, which only
 * invoke callbacks.  Each leader is its own follower.  The no-CBs CPUs
 * do a wake_up() on their kthread when they insert a callback into any
 * empty list, unless the rcu_nocb_poll boot parameter has been specified,
 * in which case each kthread actively polls its CPU.  (Which isn't so great
 * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
 *
 * This is intended to be used in conjunction with Frederic Weisbecker's
 * adaptive-idle work, which would seriously reduce OS jitter on CPUs
 * running CPU-bound user-mode computations.
 *
 * Offloading of callback processing could also in theory be used as
 * an energy-efficiency measure because CPUs with no RCU callbacks
 * queued are more aggressive about entering dyntick-idle mode.
 * Offloading of callbacks can also be used as an energy-efficiency
 * measure because CPUs with no RCU callbacks queued are more aggressive
 * about entering dyntick-idle mode.
 */


@@ -1892,10 +1883,7 @@ static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
}

/*
 * Does the specified CPU need an RCU callback for this invocation
 * of rcu_barrier()?
 */
/* Does rcu_barrier need to queue an RCU callback on the specified CPU?  */
static bool rcu_nocb_cpu_needs_barrier(int cpu)
{
	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
@@ -1911,8 +1899,8 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu)
	 * callbacks would be posted.  In the worst case, the first
	 * barrier in rcu_barrier() suffices (but the caller cannot
	 * necessarily rely on this, not a substitute for the caller
	 * getting the concurrency design right!).  There must also be
	 * a barrier between the following load an posting of a callback
	 * getting the concurrency design right!).  There must also be a
	 * barrier between the following load and posting of a callback
	 * (if a callback is in fact needed).  This is associated with an
	 * atomic_inc() in the caller.
	 */