Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9d179b86 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

blkcg: Fix multiple bugs in blkcg_activate_policy()



blkcg_activate_policy() has the following bugs.

* cf09a8ee ("blkcg: pass @q and @blkcg into
  blkcg_pol_alloc_pd_fn()") added @blkcg to ->pd_alloc_fn(); however,
  blkcg_activate_policy() ends up using pd's allocated for the root
  blkcg for all preallocations, so ->pd_init_fn() for non-root blkcgs
  can be passed in pd's which are allocated for the root blkcg.

  For blk-iocost, this means that ->pd_init_fn() can write beyond the
  end of the allocated object as it determines the length of the flex
  array at the end based on the blkcg's nesting level.

* Each pd is initialized as they get allocated.  If alloc fails, the
  policy will get freed with pd's initialized on it.

* After the above partial failure, the partial pds are not freed.

This patch fixes all the above issues by

* Restructuring blkcg_activate_policy() so that alloc and init passes
  are separate.  Init takes place only after all allocs succeeded and
  on failure all allocated pds are freed.

* Unifying and fixing the cleanup of the remaining pd_prealloc.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Fixes: cf09a8ee ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()")
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5da0fb1a
Loading
Loading
Loading
Loading
+51 −18
Original line number Original line Diff line number Diff line
@@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q,
			  const struct blkcg_policy *pol)
			  const struct blkcg_policy *pol)
{
{
	struct blkg_policy_data *pd_prealloc = NULL;
	struct blkg_policy_data *pd_prealloc = NULL;
	struct blkcg_gq *blkg;
	struct blkcg_gq *blkg, *pinned_blkg = NULL;
	int ret;
	int ret;


	if (blkcg_policy_enabled(q, pol))
	if (blkcg_policy_enabled(q, pol))
@@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q,


	if (queue_is_mq(q))
	if (queue_is_mq(q))
		blk_mq_freeze_queue(q);
		blk_mq_freeze_queue(q);
pd_prealloc:
retry:
	if (!pd_prealloc) {
		pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
		if (!pd_prealloc) {
			ret = -ENOMEM;
			goto out_bypass_end;
		}
	}

	spin_lock_irq(&q->queue_lock);
	spin_lock_irq(&q->queue_lock);


	/* blkg_list is pushed at the head, reverse walk to init parents first */
	/* blkg_list is pushed at the head, reverse walk to allocate parents first */
	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
		struct blkg_policy_data *pd;
		struct blkg_policy_data *pd;


		if (blkg->pd[pol->plid])
		if (blkg->pd[pol->plid])
			continue;
			continue;


		pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
		/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
		if (!pd)
		if (blkg == pinned_blkg) {
			swap(pd, pd_prealloc);
			pd = pd_prealloc;
			pd_prealloc = NULL;
		} else {
			pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
					      blkg->blkcg);
		}

		if (!pd) {
		if (!pd) {
			/*
			 * GFP_NOWAIT failed.  Free the existing one and
			 * prealloc for @blkg w/ GFP_KERNEL.
			 */
			if (pinned_blkg)
				blkg_put(pinned_blkg);
			blkg_get(blkg);
			pinned_blkg = blkg;

			spin_unlock_irq(&q->queue_lock);
			spin_unlock_irq(&q->queue_lock);
			goto pd_prealloc;

			if (pd_prealloc)
				pol->pd_free_fn(pd_prealloc);
			pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
						       blkg->blkcg);
			if (pd_prealloc)
				goto retry;
			else
				goto enomem;
		}
		}


		blkg->pd[pol->plid] = pd;
		blkg->pd[pol->plid] = pd;
		pd->blkg = blkg;
		pd->blkg = blkg;
		pd->plid = pol->plid;
		pd->plid = pol->plid;
		if (pol->pd_init_fn)
			pol->pd_init_fn(pd);
	}
	}


	/* all allocated, init in the same order */
	if (pol->pd_init_fn)
		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
			pol->pd_init_fn(blkg->pd[pol->plid]);

	__set_bit(pol->plid, q->blkcg_pols);
	__set_bit(pol->plid, q->blkcg_pols);
	ret = 0;
	ret = 0;


	spin_unlock_irq(&q->queue_lock);
	spin_unlock_irq(&q->queue_lock);
out_bypass_end:
out:
	if (queue_is_mq(q))
	if (queue_is_mq(q))
		blk_mq_unfreeze_queue(q);
		blk_mq_unfreeze_queue(q);
	if (pinned_blkg)
		blkg_put(pinned_blkg);
	if (pd_prealloc)
	if (pd_prealloc)
		pol->pd_free_fn(pd_prealloc);
		pol->pd_free_fn(pd_prealloc);
	return ret;
	return ret;

enomem:
	/* alloc failed, nothing's initialized yet, free everything */
	spin_lock_irq(&q->queue_lock);
	list_for_each_entry(blkg, &q->blkg_list, q_node) {
		if (blkg->pd[pol->plid]) {
			pol->pd_free_fn(blkg->pd[pol->plid]);
			blkg->pd[pol->plid] = NULL;
		}
	}
	spin_unlock_irq(&q->queue_lock);
	ret = -ENOMEM;
	goto out;
}
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
EXPORT_SYMBOL_GPL(blkcg_activate_policy);