Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f13ef67 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

blkcg: use double locking instead of RCU for blkg synchronization



blkgs are chained from both blkcgs and request_queues and thus
subjected to two locks - blkcg->lock and q->queue_lock.  As both blkcg
and q can go away anytime, locking during removal is tricky.  It's
currently solved by wrapping removal inside RCU, which makes the
synchronization complex.  There are three locks to worry about - the
outer RCU, q lock and blkcg lock, and it leads to nasty subtle
complications like conditional synchronize_rcu() on queue exit paths.

For all other paths, blkcg lock is naturally nested inside q lock and
the only exception is blkcg removal path, which is a very cold path
and can be implemented as clumsy but conceptually-simple reverse
double lock dancing.

This patch updates blkg removal path such that blkgs are removed while
holding both q and blkcg locks, which is trivial for request queue
exit path - blkg_destroy_all().  The blkcg removal path,
blkiocg_pre_destroy(), implements reverse double lock dancing
essentially identical to ioc_release_fn().

This simplifies blkg locking - no half-dead blkgs to worry about.  Now
unnecessary RCU annotations will be removed by the next patch.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent e8989fae
Loading
Loading
Loading
Loading
+51 −85
Original line number Diff line number Diff line
@@ -620,32 +620,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
}
EXPORT_SYMBOL_GPL(blkg_lookup_create);

static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
{
	hlist_del_init_rcu(&blkg->blkcg_node);
}

/*
 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
 * indicating that blk_group was unhashed by the time we got to it.
 */
int blkiocg_del_blkio_group(struct blkio_group *blkg)
{
	struct blkio_cgroup *blkcg = blkg->blkcg;
	unsigned long flags;
	int ret = 1;

	spin_lock_irqsave(&blkcg->lock, flags);
	if (!hlist_unhashed(&blkg->blkcg_node)) {
		__blkiocg_del_blkio_group(blkg);
		ret = 0;
	}
	spin_unlock_irqrestore(&blkcg->lock, flags);

	return ret;
}
EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);

/* called under rcu_read_lock(). */
struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
				struct request_queue *q)
@@ -663,12 +637,16 @@ EXPORT_SYMBOL_GPL(blkg_lookup);
static void blkg_destroy(struct blkio_group *blkg)
{
	struct request_queue *q = blkg->q;
	struct blkio_cgroup *blkcg = blkg->blkcg;

	lockdep_assert_held(q->queue_lock);
	lockdep_assert_held(&blkcg->lock);

	/* Something wrong if we are trying to remove same group twice */
	WARN_ON_ONCE(list_empty(&blkg->q_node));
	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
	list_del_init(&blkg->q_node);
	hlist_del_init_rcu(&blkg->blkcg_node);

	WARN_ON_ONCE(q->nr_blkgs <= 0);
	q->nr_blkgs--;
@@ -713,45 +691,33 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
}
EXPORT_SYMBOL_GPL(update_root_blkg_pd);

/**
 * blkg_destroy_all - destroy all blkgs associated with a request_queue
 * @q: request_queue of interest
 * @destroy_root: whether to destroy root blkg or not
 *
 * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
 * destroyed; otherwise, root blkg is left alone.
 */
void blkg_destroy_all(struct request_queue *q, bool destroy_root)
{
	struct blkio_group *blkg, *n;

	while (true) {
		bool done = true;

	spin_lock_irq(q->queue_lock);

	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
		struct blkio_cgroup *blkcg = blkg->blkcg;

		/* skip root? */
		if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
			continue;

			/*
			 * If cgroup removal path got to blk_group first
			 * and removed it from cgroup list, then it will
			 * take care of destroying cfqg also.
			 */
			if (!blkiocg_del_blkio_group(blkg))
		spin_lock(&blkcg->lock);
		blkg_destroy(blkg);
			else
				done = false;
		spin_unlock(&blkcg->lock);
	}

	spin_unlock_irq(q->queue_lock);

		/*
		 * Group list may not be empty if we raced cgroup removal
		 * and lost.  cgroup removal is guaranteed to make forward
		 * progress and retrying after a while is enough.  This
		 * ugliness is scheduled to be removed after locking
		 * update.
		 */
		if (done)
			break;

		msleep(10);	/* just some random duration I like */
	}
}
EXPORT_SYMBOL_GPL(blkg_destroy_all);

@@ -1600,45 +1566,45 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
				ARRAY_SIZE(blkio_files));
}

/**
 * blkiocg_pre_destroy - cgroup pre_destroy callback
 * @subsys: cgroup subsys
 * @cgroup: cgroup of interest
 *
 * This function is called when @cgroup is about to go away and responsible
 * for shooting down all blkgs associated with @cgroup.  blkgs should be
 * removed while holding both q and blkcg locks.  As blkcg lock is nested
 * inside q lock, this function performs reverse double lock dancing.
 *
 * This is the blkcg counterpart of ioc_release_fn().
 */
static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
			       struct cgroup *cgroup)
{
	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
	unsigned long flags;
	struct blkio_group *blkg;
	struct request_queue *q;

	rcu_read_lock();
	spin_lock_irq(&blkcg->lock);

	do {
		spin_lock_irqsave(&blkcg->lock, flags);

		if (hlist_empty(&blkcg->blkg_list)) {
			spin_unlock_irqrestore(&blkcg->lock, flags);
			break;
		}

		blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
					blkcg_node);
		q = rcu_dereference(blkg->q);
		__blkiocg_del_blkio_group(blkg);

		spin_unlock_irqrestore(&blkcg->lock, flags);
	while (!hlist_empty(&blkcg->blkg_list)) {
		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
						struct blkio_group, blkcg_node);
		struct request_queue *q = rcu_dereference(blkg->q);

		/*
		 * This blkio_group is being unlinked as associated cgroup is
		 * going away. Let all the IO controlling policies know about
		 * this event.
		 */
		spin_lock(&blkio_list_lock);
		spin_lock_irqsave(q->queue_lock, flags);
		if (spin_trylock(q->queue_lock)) {
			blkg_destroy(blkg);
		spin_unlock_irqrestore(q->queue_lock, flags);
		spin_unlock(&blkio_list_lock);
	} while (1);

			spin_unlock(q->queue_lock);
		} else {
			spin_unlock_irq(&blkcg->lock);
			rcu_read_unlock();
			cpu_relax();
			rcu_read_lock();
			spin_lock(&blkcg->lock);
		}
	}

	spin_unlock_irq(&blkcg->lock);
	rcu_read_unlock();
	return 0;
}

+0 −4
Original line number Diff line number Diff line
@@ -380,7 +380,6 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
				       struct request_queue *q);
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
@@ -416,9 +415,6 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
static inline struct blkio_cgroup *
task_blkio_cgroup(struct task_struct *tsk) { return NULL; }

static inline int
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }

static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
					      void *key) { return NULL; }
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
+0 −10
Original line number Diff line number Diff line
@@ -79,11 +79,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
					direction, sync);
}

static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
{
	return blkiocg_del_blkio_group(blkg);
}

#else /* CFQ_GROUP_IOSCHED */
static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol,
@@ -119,10 +114,5 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol, uint64_t start_time,
			uint64_t io_start_time, bool direction, bool sync) { }

static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
{
	return 0;
}

#endif /* CFQ_GROUP_IOSCHED */
#endif