Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9585e94 authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: Consolidate tree setup for synchronize_rcu_expedited()



This commit replaces sync_rcu_preempt_exp_init1(() and
sync_rcu_preempt_exp_init2() with sync_exp_reset_tree_hotplug()
and sync_exp_reset_tree(), which will also be used by
synchronize_sched_expedited(), and sync_rcu_exp_select_nodes(), which
contains code specific to synchronize_rcu_expedited().

Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent 7922cd0e
Loading
Loading
Loading
Loading
+85 −1
Original line number Diff line number Diff line
@@ -3379,6 +3379,87 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
	return rcu_seq_done(&rsp->expedited_sequence, s);
}

/*
 * Reset the ->expmaskinit values in the rcu_node tree to reflect any
 * recent CPU-online activity.  Note that these masks are not cleared
 * when CPUs go offline, so they reflect the union of all CPUs that have
 * ever been online.  This means that this function normally takes its
 * no-work-to-do fastpath.
 */
static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
{
	bool done;
	unsigned long flags;
	unsigned long mask;
	unsigned long oldmask;
	int ncpus = READ_ONCE(rsp->ncpus);
	struct rcu_node *rnp;
	struct rcu_node *rnp_up;

	/* If no new CPUs onlined since last time, nothing to do. */
	if (likely(ncpus == rsp->ncpus_snap))
		return;
	rsp->ncpus_snap = ncpus;

	/*
	 * Each pass through the following loop propagates newly onlined
	 * CPUs for the current rcu_node structure up the rcu_node tree.
	 */
	rcu_for_each_leaf_node(rsp, rnp) {
		raw_spin_lock_irqsave(&rnp->lock, flags);
		smp_mb__after_unlock_lock();
		if (rnp->expmaskinit == rnp->expmaskinitnext) {
			raw_spin_unlock_irqrestore(&rnp->lock, flags);
			continue;  /* No new CPUs, nothing to do. */
		}

		/* Update this node's mask, track old value for propagation. */
		oldmask = rnp->expmaskinit;
		rnp->expmaskinit = rnp->expmaskinitnext;
		raw_spin_unlock_irqrestore(&rnp->lock, flags);

		/* If was already nonzero, nothing to propagate. */
		if (oldmask)
			continue;

		/* Propagate the new CPU up the tree. */
		mask = rnp->grpmask;
		rnp_up = rnp->parent;
		done = false;
		while (rnp_up) {
			raw_spin_lock_irqsave(&rnp_up->lock, flags);
			smp_mb__after_unlock_lock();
			if (rnp_up->expmaskinit)
				done = true;
			rnp_up->expmaskinit |= mask;
			raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
			if (done)
				break;
			mask = rnp_up->grpmask;
			rnp_up = rnp_up->parent;
		}
	}
}

/*
 * Reset the ->expmask values in the rcu_node tree in preparation for
 * a new expedited grace period.
 */
static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
{
	unsigned long flags;
	struct rcu_node *rnp;

	sync_exp_reset_tree_hotplug(rsp);
	rcu_for_each_node_breadth_first(rsp, rnp) {
		raw_spin_lock_irqsave(&rnp->lock, flags);
		smp_mb__after_unlock_lock();
		WARN_ON_ONCE(rnp->expmask);
		rnp->expmask = rnp->expmaskinit;
		raw_spin_unlock_irqrestore(&rnp->lock, flags);
	}
}

/*
 * Return non-zero if there are any tasks in RCU read-side critical
 * sections blocking the current preemptible-RCU expedited grace period.
@@ -3971,7 +4052,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)

	/* Set up local state, ensuring consistent view of global state. */
	raw_spin_lock_irqsave(&rnp->lock, flags);
	rdp->beenonline = 1;	 /* We have now been online. */
	rdp->qlen_last_fqs_check = 0;
	rdp->n_force_qs_snap = rsp->n_force_qs;
	rdp->blimit = blimit;
@@ -3993,6 +4073,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
	raw_spin_lock(&rnp->lock);		/* irqs already disabled. */
	smp_mb__after_unlock_lock();
	rnp->qsmaskinitnext |= mask;
	rnp->expmaskinitnext |= mask;
	if (!rdp->beenonline)
		WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
	rdp->beenonline = true;	 /* We have now been online. */
	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
	rdp->completed = rnp->completed;
	rdp->passed_quiesce = false;
+12 −5
Original line number Diff line number Diff line
@@ -171,16 +171,21 @@ struct rcu_node {
				/*  an rcu_data structure, otherwise, each */
				/*  bit corresponds to a child rcu_node */
				/*  structure. */
	unsigned long expmask;	/* Groups that have ->blkd_tasks */
				/*  elements that need to drain to allow the */
				/*  current expedited grace period to */
				/*  complete (only for PREEMPT_RCU). */
	unsigned long qsmaskinit;
				/* Per-GP initial value for qsmask & expmask. */
				/* Per-GP initial value for qsmask. */
				/*  Initialized from ->qsmaskinitnext at the */
				/*  beginning of each grace period. */
	unsigned long qsmaskinitnext;
				/* Online CPUs for next grace period. */
	unsigned long expmask;	/* CPUs or groups that need to check in */
				/*  to allow the current expedited GP */
				/*  to complete. */
	unsigned long expmaskinit;
				/* Per-GP initial values for expmask. */
				/*  Initialized from ->expmaskinitnext at the */
				/*  beginning of each expedited GP. */
	unsigned long expmaskinitnext;
				/* Online CPUs for next expedited GP. */
	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
				/*  Only one bit will be set in this mask. */
	int	grplo;		/* lowest-numbered CPU or group here. */
@@ -466,6 +471,7 @@ struct rcu_state {
	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */
		     void (*func)(struct rcu_head *head));
	int ncpus;				/* # CPUs seen so far. */

	/* The following fields are guarded by the root rcu_node's lock. */

@@ -508,6 +514,7 @@ struct rcu_state {
	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
	int ncpus_snap;				/* # CPUs seen last time. */

	unsigned long jiffies_force_qs;		/* Time at which to invoke */
						/*  force_quiescent_state(). */
+18 −84
Original line number Diff line number Diff line
@@ -536,87 +536,29 @@ void synchronize_rcu(void)
EXPORT_SYMBOL_GPL(synchronize_rcu);

/*
 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
 * grace period for the specified rcu_node structure, phase 1.  If there
 * are such tasks, set the ->expmask bits up the rcu_node tree and also
 * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
 * that work is needed here.
 *
 * Caller must hold the root rcu_node's exp_funnel_mutex.
 * Select the nodes that the upcoming expedited grace period needs
 * to wait for.
 */
static void
sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
static void sync_rcu_exp_select_nodes(struct rcu_state *rsp)
{
	unsigned long flags;
	unsigned long mask;
	struct rcu_node *rnp_up;
	struct rcu_node *rnp;

	sync_exp_reset_tree(rsp);
	rcu_for_each_leaf_node(rsp, rnp) {
		raw_spin_lock_irqsave(&rnp->lock, flags);
		smp_mb__after_unlock_lock();
	WARN_ON_ONCE(rnp->expmask);
	WARN_ON_ONCE(rnp->exp_tasks);
		rnp->expmask = 0; /* No per-CPU component yet. */
		if (!rcu_preempt_has_tasks(rnp)) {
		/* No blocked tasks, nothing to do. */
		raw_spin_unlock_irqrestore(&rnp->lock, flags);
		return;
	}
	/* Call for Phase 2 and propagate ->expmask bits up the tree. */
	rnp->expmask = 1;
	rnp_up = rnp;
	while (rnp_up->parent) {
		mask = rnp_up->grpmask;
		rnp_up = rnp_up->parent;
		if (rnp_up->expmask & mask)
			break;
		raw_spin_lock(&rnp_up->lock); /* irqs already off */
		smp_mb__after_unlock_lock();
		rnp_up->expmask |= mask;
		raw_spin_unlock(&rnp_up->lock); /* irqs still off */
	}
	raw_spin_unlock_irqrestore(&rnp->lock, flags);
}

/*
 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
 * grace period for the specified rcu_node structure, phase 2.  If the
 * leaf rcu_node structure has its ->expmask field set, check for tasks.
 * If there are some, clear ->expmask and set ->exp_tasks accordingly,
 * then initiate RCU priority boosting.  Otherwise, clear ->expmask and
 * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
 * enabling rcu_read_unlock_special() to do the bit-clearing.
 *
 * Caller must hold the root rcu_node's exp_funnel_mutex.
 */
static void
sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
{
	unsigned long flags;

	raw_spin_lock_irqsave(&rnp->lock, flags);
	smp_mb__after_unlock_lock();
	if (!rnp->expmask) {
		/* Phase 1 didn't do anything, so Phase 2 doesn't either. */
			/* FIXME: Want __rcu_report_exp_rnp() here. */
			raw_spin_unlock_irqrestore(&rnp->lock, flags);
		return;
	}

	/* Phase 1 is over. */
	rnp->expmask = 0;

	/*
	 * If there are still blocked tasks, set up ->exp_tasks so that
	 * rcu_read_unlock_special() will wake us and then boost them.
	 */
	if (rcu_preempt_has_tasks(rnp)) {
		} else {
			rnp->exp_tasks = rnp->blkd_tasks.next;
		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
		return;
			rcu_initiate_boost(rnp, flags);
		}

	/* No longer any blocked tasks, so undo bit setting. */
	raw_spin_unlock_irqrestore(&rnp->lock, flags);
		rcu_report_exp_rnp(rsp, rnp, false);
	}
}

/**
 * synchronize_rcu_expedited - Brute-force RCU grace period
@@ -648,16 +590,8 @@ void synchronize_rcu_expedited(void)
	/* force all RCU readers onto ->blkd_tasks lists. */
	synchronize_sched_expedited();

	/*
	 * Snapshot current state of ->blkd_tasks lists into ->expmask.
	 * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
	 * to start clearing them.  Doing this in one phase leads to
	 * strange races between setting and clearing bits, so just say "no"!
	 */
	rcu_for_each_leaf_node(rsp, rnp)
		sync_rcu_preempt_exp_init1(rsp, rnp);
	rcu_for_each_leaf_node(rsp, rnp)
		sync_rcu_preempt_exp_init2(rsp, rnp);
	/* Initialize the rcu_node tree in preparation for the wait. */
	sync_rcu_exp_select_nodes(rsp);

	/* Wait for snapshotted ->blkd_tasks lists to drain. */
	rnp = rcu_get_root(rsp);