Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26d950a9 authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: Diagnostics for grace-period startup hangs



This commit causes a splat if RCU is idle and a request for a new grace
period is ignored for more than one second.  This splat normally indicates
that some code path asked for a new grace period, but failed to wake up
the RCU grace-period kthread.

Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix bug located by Dan Carpenter and his static checker. ]
[ paulmck: Fix self-deadlock bug located 0day test robot. ]
[ paulmck: Disable unless CONFIG_PROVE_RCU=y. ]
parent 8c42b1f3
Loading
Loading
Loading
Loading
+64 −2
Original line number Diff line number Diff line
@@ -1681,6 +1681,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
	}
	trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
	rsp->gp_req_activity = jiffies;
	if (!rsp->gp_kthread) {
		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
		goto unlock_out;
@@ -2113,6 +2114,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
	/* Advance CBs to reduce false positives below. */
	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
		rsp->gp_req_activity = jiffies;
		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
				       TPS("newreq"));
	}
@@ -2744,6 +2746,65 @@ static void force_quiescent_state(struct rcu_state *rsp)
	rcu_gp_kthread_wake(rsp);
}

/*
 * This function checks for grace-period requests that fail to motivate
 * RCU to come out of its idle mode.
 */
static void
rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
			 struct rcu_data *rdp)
{
	unsigned long flags;
	unsigned long j;
	struct rcu_node *rnp_root = rcu_get_root(rsp);
	static atomic_t warned = ATOMIC_INIT(0);

	if (!IS_ENABLED(CONFIG_PROVE_RCU) ||
	    rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)))
		return;
	j = jiffies; /* Expensive access, and in common case don't get here. */
	if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
	    time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
	    atomic_read(&warned))
		return;

	raw_spin_lock_irqsave_rcu_node(rnp, flags);
	j = jiffies;
	if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
	    time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
	    time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
	    atomic_read(&warned)) {
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		return;
	}
	/* Hold onto the leaf lock to make others see warned==1. */

	if (rnp_root != rnp)
		raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
	j = jiffies;
	if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
	    time_before(j, rsp->gp_req_activity + HZ) ||
	    time_before(j, rsp->gp_activity + HZ) ||
	    atomic_xchg(&warned, 1)) {
		raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		return;
	}
	pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n",
		 __func__, READ_ONCE(rsp->gpnum),
		 need_future_gp_element(rcu_get_root(rsp), 0),
		 need_future_gp_element(rcu_get_root(rsp), 1),
		 need_future_gp_element(rcu_get_root(rsp), 2),
		 need_future_gp_element(rcu_get_root(rsp), 3),
		 j - rsp->gp_req_activity, j - rsp->gp_activity,
		 rsp->gp_flags, rsp->name,
		 rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
	WARN_ON(1);
	if (rnp_root != rnp)
		raw_spin_unlock_rcu_node(rnp_root);
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}

/*
 * This does the RCU core processing work for the specified rcu_state
 * and rcu_data structures.  This may be called only from the CPU to
@@ -2755,7 +2816,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
	unsigned long flags;
	bool needwake;
	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
	struct rcu_node *rnp;
	struct rcu_node *rnp = rdp->mynode;

	WARN_ON_ONCE(!rdp->beenonline);

@@ -2769,7 +2830,6 @@ __rcu_process_callbacks(struct rcu_state *rsp)
		if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
			local_irq_restore(flags);
		} else {
			rnp = rdp->mynode;
			raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
			needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2778,6 +2838,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
		}
	}

	rcu_check_gp_start_stall(rsp, rnp, rdp);

	/* If there are callbacks ready, invoke them. */
	if (rcu_segcblist_ready_cbs(&rdp->cblist))
		invoke_rcu_callbacks(rsp, rdp);
+2 −0
Original line number Diff line number Diff line
@@ -374,6 +374,8 @@ struct rcu_state {
						/*  but in jiffies. */
	unsigned long gp_activity;		/* Time of last GP kthread */
						/*  activity in jiffies. */
	unsigned long gp_req_activity;		/* Time of last GP request */
						/*  in jiffies. */
	unsigned long jiffies_stall;		/* Time at which to check */
						/*  for CPU stalls. */
	unsigned long jiffies_resched;		/* Time at which to resched */