Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8cf1a3fc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block

Pull core block IO bits from Jens Axboe:
 "The most complicated part if this is the request allocation rework by
  Tejun, which has been queued up for a long time and has been in
  for-next ditto as well.

  There are a few commits from yesterday and today, mostly trivial and
  obvious fixes.  So I'm pretty confident that it is sound.  It's also
  smaller than usual."

* 'for-3.6/core' of git://git.kernel.dk/linux-block:
  block: remove dead func declaration
  block: add partition resize function to blkpg ioctl
  block: uninitialized ioc->nr_tasks triggers WARN_ON
  block: do not artificially constrain max_sectors for stacking drivers
  blkcg: implement per-blkg request allocation
  block: prepare for multiple request_lists
  block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv
  blkcg: inline bio_blkcg() and friends
  block: allocate io_context upfront
  block: refactor get_request[_wait]()
  block: drop custom queue draining used by scsi_transport_{iscsi|fc}
  mempool: add @gfp_mask to mempool_create_node()
  blkcg: make root blkcg allocation use %GFP_KERNEL
  blkcg: __blkg_lookup_create() doesn't need radix preload
parents fcff06c4 80799fbb
Loading
Loading
Loading
Loading
+7 −0
Original line number Original line Diff line number Diff line
@@ -38,6 +38,13 @@ read or write requests. Note that the total allocated number may be twice
this amount, since it applies only to reads or writes (not the accumulated
this amount, since it applies only to reads or writes (not the accumulated
sum).
sum).


To avoid priority inversion through request starvation, a request
queue maintains a separate request pool per each cgroup when
CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
per-block-cgroup request pool.  IOW, if there are N block cgroups,
each request queue may have upto N request pools, each independently
regulated by nr_requests.

read_ahead_kb (RW)
read_ahead_kb (RW)
------------------
------------------
Maximum number of kilobytes to read-ahead for filesystems on this block
Maximum number of kilobytes to read-ahead for filesystems on this block
+90 −49
Original line number Original line Diff line number Diff line
@@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);


static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];


struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
{
	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
			    struct blkcg, css);
}
EXPORT_SYMBOL_GPL(cgroup_to_blkcg);

static struct blkcg *task_blkcg(struct task_struct *tsk)
{
	return container_of(task_subsys_state(tsk, blkio_subsys_id),
			    struct blkcg, css);
}

struct blkcg *bio_blkcg(struct bio *bio)
{
	if (bio && bio->bi_css)
		return container_of(bio->bi_css, struct blkcg, css);
	return task_blkcg(current);
}
EXPORT_SYMBOL_GPL(bio_blkcg);

static bool blkcg_policy_enabled(struct request_queue *q,
static bool blkcg_policy_enabled(struct request_queue *q,
				 const struct blkcg_policy *pol)
				 const struct blkcg_policy *pol)
{
{
@@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg)
		kfree(pd);
		kfree(pd);
	}
	}


	blk_exit_rl(&blkg->rl);
	kfree(blkg);
	kfree(blkg);
}
}


@@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg)
 * blkg_alloc - allocate a blkg
 * blkg_alloc - allocate a blkg
 * @blkcg: block cgroup the new blkg is associated with
 * @blkcg: block cgroup the new blkg is associated with
 * @q: request_queue the new blkg is associated with
 * @q: request_queue the new blkg is associated with
 * @gfp_mask: allocation mask to use
 *
 *
 * Allocate a new blkg assocating @blkcg and @q.
 * Allocate a new blkg assocating @blkcg and @q.
 */
 */
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
				   gfp_t gfp_mask)
{
{
	struct blkcg_gq *blkg;
	struct blkcg_gq *blkg;
	int i;
	int i;


	/* alloc and init base part */
	/* alloc and init base part */
	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
	blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
	if (!blkg)
	if (!blkg)
		return NULL;
		return NULL;


@@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
	blkg->blkcg = blkcg;
	blkg->blkcg = blkcg;
	blkg->refcnt = 1;
	blkg->refcnt = 1;


	/* root blkg uses @q->root_rl, init rl only for !root blkgs */
	if (blkcg != &blkcg_root) {
		if (blk_init_rl(&blkg->rl, q, gfp_mask))
			goto err_free;
		blkg->rl.blkg = blkg;
	}

	for (i = 0; i < BLKCG_MAX_POLS; i++) {
	for (i = 0; i < BLKCG_MAX_POLS; i++) {
		struct blkcg_policy *pol = blkcg_policy[i];
		struct blkcg_policy *pol = blkcg_policy[i];
		struct blkg_policy_data *pd;
		struct blkg_policy_data *pd;
@@ -117,11 +106,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
			continue;
			continue;


		/* alloc per-policy data and attach it to blkg */
		/* alloc per-policy data and attach it to blkg */
		pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);
		pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
		if (!pd) {
		if (!pd)
			blkg_free(blkg);
			goto err_free;
			return NULL;
		}


		blkg->pd[i] = pd;
		blkg->pd[i] = pd;
		pd->blkg = blkg;
		pd->blkg = blkg;
@@ -132,6 +119,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
	}
	}


	return blkg;
	return blkg;

err_free:
	blkg_free(blkg);
	return NULL;
}
}


static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
@@ -175,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
}
}
EXPORT_SYMBOL_GPL(blkg_lookup);
EXPORT_SYMBOL_GPL(blkg_lookup);


/*
 * If @new_blkg is %NULL, this function tries to allocate a new one as
 * necessary using %GFP_ATOMIC.  @new_blkg is always consumed on return.
 */
static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
					     struct request_queue *q)
					     struct request_queue *q,
	__releases(q->queue_lock) __acquires(q->queue_lock)
					     struct blkcg_gq *new_blkg)
{
{
	struct blkcg_gq *blkg;
	struct blkcg_gq *blkg;
	int ret;
	int ret;
@@ -189,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
	blkg = __blkg_lookup(blkcg, q);
	blkg = __blkg_lookup(blkcg, q);
	if (blkg) {
	if (blkg) {
		rcu_assign_pointer(blkcg->blkg_hint, blkg);
		rcu_assign_pointer(blkcg->blkg_hint, blkg);
		return blkg;
		goto out_free;
	}
	}


	/* blkg holds a reference to blkcg */
	/* blkg holds a reference to blkcg */
	if (!css_tryget(&blkcg->css))
	if (!css_tryget(&blkcg->css)) {
		return ERR_PTR(-EINVAL);
		blkg = ERR_PTR(-EINVAL);
		goto out_free;
	}


	/* allocate */
	/* allocate */
	ret = -ENOMEM;
	if (!new_blkg) {
	blkg = blkg_alloc(blkcg, q);
		new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
	if (unlikely(!blkg))
		if (unlikely(!new_blkg)) {
		goto err_put;
			blkg = ERR_PTR(-ENOMEM);
			goto out_put;
		}
	}
	blkg = new_blkg;


	/* insert */
	/* insert */
	ret = radix_tree_preload(GFP_ATOMIC);
	if (ret)
		goto err_free;

	spin_lock(&blkcg->lock);
	spin_lock(&blkcg->lock);
	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
	if (likely(!ret)) {
	if (likely(!ret)) {
@@ -215,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
	}
	}
	spin_unlock(&blkcg->lock);
	spin_unlock(&blkcg->lock);


	radix_tree_preload_end();

	if (!ret)
	if (!ret)
		return blkg;
		return blkg;
err_free:

	blkg_free(blkg);
	blkg = ERR_PTR(ret);
err_put:
out_put:
	css_put(&blkcg->css);
	css_put(&blkcg->css);
	return ERR_PTR(ret);
out_free:
	blkg_free(new_blkg);
	return blkg;
}
}


struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
@@ -235,7 +232,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
	 */
	 */
	if (unlikely(blk_queue_bypass(q)))
	if (unlikely(blk_queue_bypass(q)))
		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
	return __blkg_lookup_create(blkcg, q);
	return __blkg_lookup_create(blkcg, q, NULL);
}
}
EXPORT_SYMBOL_GPL(blkg_lookup_create);
EXPORT_SYMBOL_GPL(blkg_lookup_create);


@@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg)
}
}
EXPORT_SYMBOL_GPL(__blkg_release);
EXPORT_SYMBOL_GPL(__blkg_release);


/*
 * The next function used by blk_queue_for_each_rl().  It's a bit tricky
 * because the root blkg uses @q->root_rl instead of its own rl.
 */
struct request_list *__blk_queue_next_rl(struct request_list *rl,
					 struct request_queue *q)
{
	struct list_head *ent;
	struct blkcg_gq *blkg;

	/*
	 * Determine the current blkg list_head.  The first entry is
	 * root_rl which is off @q->blkg_list and mapped to the head.
	 */
	if (rl == &q->root_rl) {
		ent = &q->blkg_list;
	} else {
		blkg = container_of(rl, struct blkcg_gq, rl);
		ent = &blkg->q_node;
	}

	/* walk to the next list_head, skip root blkcg */
	ent = ent->next;
	if (ent == &q->root_blkg->q_node)
		ent = ent->next;
	if (ent == &q->blkg_list)
		return NULL;

	blkg = container_of(ent, struct blkcg_gq, q_node);
	return &blkg->rl;
}

static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
			     u64 val)
			     u64 val)
{
{
@@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q,
	struct blkcg_gq *blkg;
	struct blkcg_gq *blkg;
	struct blkg_policy_data *pd, *n;
	struct blkg_policy_data *pd, *n;
	int cnt = 0, ret;
	int cnt = 0, ret;
	bool preloaded;


	if (blkcg_policy_enabled(q, pol))
	if (blkcg_policy_enabled(q, pol))
		return 0;
		return 0;


	/* preallocations for root blkg */
	blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
	if (!blkg)
		return -ENOMEM;

	preloaded = !radix_tree_preload(GFP_KERNEL);

	blk_queue_bypass_start(q);
	blk_queue_bypass_start(q);


	/* make sure the root blkg exists and count the existing blkgs */
	/* make sure the root blkg exists and count the existing blkgs */
	spin_lock_irq(q->queue_lock);
	spin_lock_irq(q->queue_lock);


	rcu_read_lock();
	rcu_read_lock();
	blkg = __blkg_lookup_create(&blkcg_root, q);
	blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
	rcu_read_unlock();
	rcu_read_unlock();


	if (preloaded)
		radix_tree_preload_end();

	if (IS_ERR(blkg)) {
	if (IS_ERR(blkg)) {
		ret = PTR_ERR(blkg);
		ret = PTR_ERR(blkg);
		goto out_unlock;
		goto out_unlock;
	}
	}
	q->root_blkg = blkg;
	q->root_blkg = blkg;
	q->root_rl.blkg = blkg;


	list_for_each_entry(blkg, &q->blkg_list, q_node)
	list_for_each_entry(blkg, &q->blkg_list, q_node)
		cnt++;
		cnt++;
+124 −4
Original line number Original line Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/u64_stats_sync.h>
#include <linux/seq_file.h>
#include <linux/seq_file.h>
#include <linux/radix-tree.h>
#include <linux/radix-tree.h>
#include <linux/blkdev.h>


/* Max limits for throttle policy */
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX
#define THROTL_IOPS_MAX		UINT_MAX
@@ -93,6 +94,8 @@ struct blkcg_gq {
	struct list_head		q_node;
	struct list_head		q_node;
	struct hlist_node		blkcg_node;
	struct hlist_node		blkcg_node;
	struct blkcg			*blkcg;
	struct blkcg			*blkcg;
	/* request allocation list for this blkcg-q pair */
	struct request_list		rl;
	/* reference count */
	/* reference count */
	int				refcnt;
	int				refcnt;


@@ -120,8 +123,6 @@ struct blkcg_policy {


extern struct blkcg blkcg_root;
extern struct blkcg blkcg_root;


struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup);
struct blkcg *bio_blkcg(struct bio *bio);
struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
				    struct request_queue *q);
				    struct request_queue *q);
@@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);




static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
{
	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
			    struct blkcg, css);
}

static inline struct blkcg *task_blkcg(struct task_struct *tsk)
{
	return container_of(task_subsys_state(tsk, blkio_subsys_id),
			    struct blkcg, css);
}

static inline struct blkcg *bio_blkcg(struct bio *bio)
{
	if (bio && bio->bi_css)
		return container_of(bio->bi_css, struct blkcg, css);
	return task_blkcg(current);
}

/**
/**
 * blkg_to_pdata - get policy private data
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @blkg: blkg of interest
@@ -233,6 +253,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
		__blkg_release(blkg);
		__blkg_release(blkg);
}
}


/**
 * blk_get_rl - get request_list to use
 * @q: request_queue of interest
 * @bio: bio which will be attached to the allocated request (may be %NULL)
 *
 * The caller wants to allocate a request from @q to use for @bio.  Find
 * the request_list to use and obtain a reference on it.  Should be called
 * under queue_lock.  This function is guaranteed to return non-%NULL
 * request_list.
 */
static inline struct request_list *blk_get_rl(struct request_queue *q,
					      struct bio *bio)
{
	struct blkcg *blkcg;
	struct blkcg_gq *blkg;

	rcu_read_lock();

	blkcg = bio_blkcg(bio);

	/* bypass blkg lookup and use @q->root_rl directly for root */
	if (blkcg == &blkcg_root)
		goto root_rl;

	/*
	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
	 * or if either the blkcg or queue is going away.  Fall back to
	 * root_rl in such cases.
	 */
	blkg = blkg_lookup_create(blkcg, q);
	if (unlikely(IS_ERR(blkg)))
		goto root_rl;

	blkg_get(blkg);
	rcu_read_unlock();
	return &blkg->rl;
root_rl:
	rcu_read_unlock();
	return &q->root_rl;
}

/**
 * blk_put_rl - put request_list
 * @rl: request_list to put
 *
 * Put the reference acquired by blk_get_rl().  Should be called under
 * queue_lock.
 */
static inline void blk_put_rl(struct request_list *rl)
{
	/* root_rl may not have blkg set */
	if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
		blkg_put(rl->blkg);
}

/**
 * blk_rq_set_rl - associate a request with a request_list
 * @rq: request of interest
 * @rl: target request_list
 *
 * Associate @rq with @rl so that accounting and freeing can know the
 * request_list @rq came from.
 */
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
{
	rq->rl = rl;
}

/**
 * blk_rq_rl - return the request_list a request came from
 * @rq: request of interest
 *
 * Return the request_list @rq is allocated from.
 */
static inline struct request_list *blk_rq_rl(struct request *rq)
{
	return rq->rl;
}

struct request_list *__blk_queue_next_rl(struct request_list *rl,
					 struct request_queue *q);
/**
 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
 *
 * Should be used under queue_lock.
 */
#define blk_queue_for_each_rl(rl, q)	\
	for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))

/**
/**
 * blkg_stat_add - add a value to a blkg_stat
 * blkg_stat_add - add a value to a blkg_stat
 * @stat: target blkg_stat
 * @stat: target blkg_stat
@@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
#else	/* CONFIG_BLK_CGROUP */
#else	/* CONFIG_BLK_CGROUP */


struct cgroup;
struct cgroup;
struct blkcg;


struct blkg_policy_data {
struct blkg_policy_data {
};
};
@@ -361,8 +471,6 @@ struct blkcg_gq {
struct blkcg_policy {
struct blkcg_policy {
};
};


static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_drain_queue(struct request_queue *q) { }
@@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
					   const struct blkcg_policy *pol) { }
					   const struct blkcg_policy *pol) { }


static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }

static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
						  struct blkcg_policy *pol) { return NULL; }
						  struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -381,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }


static inline struct request_list *blk_get_rl(struct request_queue *q,
					      struct bio *bio) { return &q->root_rl; }
static inline void blk_put_rl(struct request_list *rl) { }
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }

#define blk_queue_for_each_rl(rl, q)	\
	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)

#endif	/* CONFIG_BLK_CGROUP */
#endif	/* CONFIG_BLK_CGROUP */
#endif	/* _BLK_CGROUP_H */
#endif	/* _BLK_CGROUP_H */
+111 −98
Original line number Original line Diff line number Diff line
@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
		if (!list_empty(&q->queue_head) && q->request_fn)
		if (!list_empty(&q->queue_head) && q->request_fn)
			__blk_run_queue(q);
			__blk_run_queue(q);


		drain |= q->rq.elvpriv;
		drain |= q->nr_rqs_elvpriv;


		/*
		/*
		 * Unfortunately, requests are queued at and tracked from
		 * Unfortunately, requests are queued at and tracked from
@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
		if (drain_all) {
		if (drain_all) {
			drain |= !list_empty(&q->queue_head);
			drain |= !list_empty(&q->queue_head);
			for (i = 0; i < 2; i++) {
			for (i = 0; i < 2; i++) {
				drain |= q->rq.count[i];
				drain |= q->nr_rqs[i];
				drain |= q->in_flight[i];
				drain |= q->in_flight[i];
				drain |= !list_empty(&q->flush_queue[i]);
				drain |= !list_empty(&q->flush_queue[i]);
			}
			}
@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
	 * left with hung waiters. We need to wake up those waiters.
	 * left with hung waiters. We need to wake up those waiters.
	 */
	 */
	if (q->request_fn) {
	if (q->request_fn) {
		struct request_list *rl;

		spin_lock_irq(q->queue_lock);
		spin_lock_irq(q->queue_lock);
		for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)

			wake_up_all(&q->rq.wait[i]);
		blk_queue_for_each_rl(rl, q)
			for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
				wake_up_all(&rl->wait[i]);

		spin_unlock_irq(q->queue_lock);
		spin_unlock_irq(q->queue_lock);
	}
	}
}
}
@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
}
}
EXPORT_SYMBOL(blk_cleanup_queue);
EXPORT_SYMBOL(blk_cleanup_queue);


static int blk_init_free_list(struct request_queue *q)
int blk_init_rl(struct request_list *rl, struct request_queue *q,
		gfp_t gfp_mask)
{
{
	struct request_list *rl = &q->rq;

	if (unlikely(rl->rq_pool))
	if (unlikely(rl->rq_pool))
		return 0;
		return 0;


	rl->q = q;
	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
	rl->elvpriv = 0;
	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);


	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
				mempool_free_slab, request_cachep, q->node);
					  mempool_free_slab, request_cachep,

					  gfp_mask, q->node);
	if (!rl->rq_pool)
	if (!rl->rq_pool)
		return -ENOMEM;
		return -ENOMEM;


	return 0;
	return 0;
}
}


void blk_exit_rl(struct request_list *rl)
{
	if (rl->rq_pool)
		mempool_destroy(rl->rq_pool);
}

struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
{
	return blk_alloc_queue_node(gfp_mask, -1);
	return blk_alloc_queue_node(gfp_mask, -1);
@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
	if (!q)
	if (!q)
		return NULL;
		return NULL;


	if (blk_init_free_list(q))
	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
		return NULL;
		return NULL;


	q->request_fn		= rfn;
	q->request_fn		= rfn;
@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
}
}
EXPORT_SYMBOL(blk_get_queue);
EXPORT_SYMBOL(blk_get_queue);


static inline void blk_free_request(struct request_queue *q, struct request *rq)
static inline void blk_free_request(struct request_list *rl, struct request *rq)
{
{
	if (rq->cmd_flags & REQ_ELVPRIV) {
	if (rq->cmd_flags & REQ_ELVPRIV) {
		elv_put_request(q, rq);
		elv_put_request(rl->q, rq);
		if (rq->elv.icq)
		if (rq->elv.icq)
			put_io_context(rq->elv.icq->ioc);
			put_io_context(rq->elv.icq->ioc);
	}
	}


	mempool_free(rq, q->rq.rq_pool);
	mempool_free(rq, rl->rq_pool);
}
}


/*
/*
@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
	ioc->last_waited = jiffies;
	ioc->last_waited = jiffies;
}
}


static void __freed_request(struct request_queue *q, int sync)
static void __freed_request(struct request_list *rl, int sync)
{
{
	struct request_list *rl = &q->rq;
	struct request_queue *q = rl->q;


	if (rl->count[sync] < queue_congestion_off_threshold(q))
	/*
	 * bdi isn't aware of blkcg yet.  As all async IOs end up root
	 * blkcg anyway, just use root blkcg state.
	 */
	if (rl == &q->root_rl &&
	    rl->count[sync] < queue_congestion_off_threshold(q))
		blk_clear_queue_congested(q, sync);
		blk_clear_queue_congested(q, sync);


	if (rl->count[sync] + 1 <= q->nr_requests) {
	if (rl->count[sync] + 1 <= q->nr_requests) {
		if (waitqueue_active(&rl->wait[sync]))
		if (waitqueue_active(&rl->wait[sync]))
			wake_up(&rl->wait[sync]);
			wake_up(&rl->wait[sync]);


		blk_clear_queue_full(q, sync);
		blk_clear_rl_full(rl, sync);
	}
	}
}
}


@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
 * A request has just been released.  Account for it, update the full and
 * A request has just been released.  Account for it, update the full and
 * congestion status, wake up any waiters.   Called under q->queue_lock.
 * congestion status, wake up any waiters.   Called under q->queue_lock.
 */
 */
static void freed_request(struct request_queue *q, unsigned int flags)
static void freed_request(struct request_list *rl, unsigned int flags)
{
{
	struct request_list *rl = &q->rq;
	struct request_queue *q = rl->q;
	int sync = rw_is_sync(flags);
	int sync = rw_is_sync(flags);


	q->nr_rqs[sync]--;
	rl->count[sync]--;
	rl->count[sync]--;
	if (flags & REQ_ELVPRIV)
	if (flags & REQ_ELVPRIV)
		rl->elvpriv--;
		q->nr_rqs_elvpriv--;


	__freed_request(q, sync);
	__freed_request(rl, sync);


	if (unlikely(rl->starved[sync ^ 1]))
	if (unlikely(rl->starved[sync ^ 1]))
		__freed_request(q, sync ^ 1);
		__freed_request(rl, sync ^ 1);
}
}


/*
/*
@@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio)
}
}


/**
/**
 * get_request - get a free request
 * __get_request - get a free request
 * @q: request_queue to allocate request from
 * @rl: request list to allocate from
 * @rw_flags: RW and SYNC flags
 * @rw_flags: RW and SYNC flags
 * @bio: bio to allocate request for (can be %NULL)
 * @bio: bio to allocate request for (can be %NULL)
 * @gfp_mask: allocation mask
 * @gfp_mask: allocation mask
@@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio)
 * Returns %NULL on failure, with @q->queue_lock held.
 * Returns %NULL on failure, with @q->queue_lock held.
 * Returns !%NULL on success, with @q->queue_lock *not held*.
 * Returns !%NULL on success, with @q->queue_lock *not held*.
 */
 */
static struct request *get_request(struct request_queue *q, int rw_flags,
static struct request *__get_request(struct request_list *rl, int rw_flags,
				     struct bio *bio, gfp_t gfp_mask)
				     struct bio *bio, gfp_t gfp_mask)
{
{
	struct request_queue *q = rl->q;
	struct request *rq;
	struct request *rq;
	struct request_list *rl = &q->rq;
	struct elevator_type *et = q->elevator->type;
	struct elevator_type *et;
	struct io_context *ioc = rq_ioc(bio);
	struct io_context *ioc;
	struct io_cq *icq = NULL;
	struct io_cq *icq = NULL;
	const bool is_sync = rw_is_sync(rw_flags) != 0;
	const bool is_sync = rw_is_sync(rw_flags) != 0;
	bool retried = false;
	int may_queue;
	int may_queue;
retry:
	et = q->elevator->type;
	ioc = rq_ioc(bio);


	if (unlikely(blk_queue_dead(q)))
	if (unlikely(blk_queue_dead(q)))
		return NULL;
		return NULL;
@@ -874,29 +886,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,


	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
		if (rl->count[is_sync]+1 >= q->nr_requests) {
		if (rl->count[is_sync]+1 >= q->nr_requests) {
			/*
			 * We want ioc to record batching state.  If it's
			 * not already there, creating a new one requires
			 * dropping queue_lock, which in turn requires
			 * retesting conditions to avoid queue hang.
			 */
			if (!ioc && !retried) {
				spin_unlock_irq(q->queue_lock);
				create_io_context(gfp_mask, q->node);
				spin_lock_irq(q->queue_lock);
				retried = true;
				goto retry;
			}

			/*
			/*
			 * The queue will fill after this allocation, so set
			 * The queue will fill after this allocation, so set
			 * it as full, and mark this process as "batching".
			 * it as full, and mark this process as "batching".
			 * This process will be allowed to complete a batch of
			 * This process will be allowed to complete a batch of
			 * requests, others will be blocked.
			 * requests, others will be blocked.
			 */
			 */
			if (!blk_queue_full(q, is_sync)) {
			if (!blk_rl_full(rl, is_sync)) {
				ioc_set_batching(q, ioc);
				ioc_set_batching(q, ioc);
				blk_set_queue_full(q, is_sync);
				blk_set_rl_full(rl, is_sync);
			} else {
			} else {
				if (may_queue != ELV_MQUEUE_MUST
				if (may_queue != ELV_MQUEUE_MUST
						&& !ioc_batching(q, ioc)) {
						&& !ioc_batching(q, ioc)) {
@@ -909,6 +907,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
				}
				}
			}
			}
		}
		}
		/*
		 * bdi isn't aware of blkcg yet.  As all async IOs end up
		 * root blkcg anyway, just use root blkcg state.
		 */
		if (rl == &q->root_rl)
			blk_set_queue_congested(q, is_sync);
			blk_set_queue_congested(q, is_sync);
	}
	}


@@ -920,6 +923,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
		return NULL;
		return NULL;


	q->nr_rqs[is_sync]++;
	rl->count[is_sync]++;
	rl->count[is_sync]++;
	rl->starved[is_sync] = 0;
	rl->starved[is_sync] = 0;


@@ -935,7 +939,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
	 */
	 */
	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
		rw_flags |= REQ_ELVPRIV;
		rw_flags |= REQ_ELVPRIV;
		rl->elvpriv++;
		q->nr_rqs_elvpriv++;
		if (et->icq_cache && ioc)
		if (et->icq_cache && ioc)
			icq = ioc_lookup_icq(ioc, q);
			icq = ioc_lookup_icq(ioc, q);
	}
	}
@@ -945,21 +949,18 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
	spin_unlock_irq(q->queue_lock);
	spin_unlock_irq(q->queue_lock);


	/* allocate and init request */
	/* allocate and init request */
	rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
	rq = mempool_alloc(rl->rq_pool, gfp_mask);
	if (!rq)
	if (!rq)
		goto fail_alloc;
		goto fail_alloc;


	blk_rq_init(q, rq);
	blk_rq_init(q, rq);
	blk_rq_set_rl(rq, rl);
	rq->cmd_flags = rw_flags | REQ_ALLOCED;
	rq->cmd_flags = rw_flags | REQ_ALLOCED;


	/* init elvpriv */
	/* init elvpriv */
	if (rw_flags & REQ_ELVPRIV) {
	if (rw_flags & REQ_ELVPRIV) {
		if (unlikely(et->icq_cache && !icq)) {
		if (unlikely(et->icq_cache && !icq)) {
			create_io_context(gfp_mask, q->node);
			if (ioc)
			ioc = rq_ioc(bio);
			if (!ioc)
				goto fail_elvpriv;

				icq = ioc_create_icq(ioc, q, gfp_mask);
				icq = ioc_create_icq(ioc, q, gfp_mask);
			if (!icq)
			if (!icq)
				goto fail_elvpriv;
				goto fail_elvpriv;
@@ -1000,7 +1001,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
	rq->elv.icq = NULL;
	rq->elv.icq = NULL;


	spin_lock_irq(q->queue_lock);
	spin_lock_irq(q->queue_lock);
	rl->elvpriv--;
	q->nr_rqs_elvpriv--;
	spin_unlock_irq(q->queue_lock);
	spin_unlock_irq(q->queue_lock);
	goto out;
	goto out;


@@ -1013,7 +1014,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
	 * queue, but this is pretty rare.
	 * queue, but this is pretty rare.
	 */
	 */
	spin_lock_irq(q->queue_lock);
	spin_lock_irq(q->queue_lock);
	freed_request(q, rw_flags);
	freed_request(rl, rw_flags);


	/*
	/*
	 * in the very unlikely event that allocation failed and no
	 * in the very unlikely event that allocation failed and no
@@ -1029,32 +1030,39 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
}
}


/**
/**
 * get_request_wait - get a free request with retry
 * get_request - get a free request
 * @q: request_queue to allocate request from
 * @q: request_queue to allocate request from
 * @rw_flags: RW and SYNC flags
 * @rw_flags: RW and SYNC flags
 * @bio: bio to allocate request for (can be %NULL)
 * @bio: bio to allocate request for (can be %NULL)
 * @gfp_mask: allocation mask
 *
 *
 * Get a free request from @q.  This function keeps retrying under memory
 * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
 * pressure and fails iff @q is dead.
 * function keeps retrying under memory pressure and fails iff @q is dead.
 *
 *
 * Must be callled with @q->queue_lock held and,
 * Must be callled with @q->queue_lock held and,
 * Returns %NULL on failure, with @q->queue_lock held.
 * Returns %NULL on failure, with @q->queue_lock held.
 * Returns !%NULL on success, with @q->queue_lock *not held*.
 * Returns !%NULL on success, with @q->queue_lock *not held*.
 */
 */
static struct request *get_request_wait(struct request_queue *q, int rw_flags,
static struct request *get_request(struct request_queue *q, int rw_flags,
					struct bio *bio)
				   struct bio *bio, gfp_t gfp_mask)
{
{
	const bool is_sync = rw_is_sync(rw_flags) != 0;
	const bool is_sync = rw_is_sync(rw_flags) != 0;
	DEFINE_WAIT(wait);
	struct request_list *rl;
	struct request *rq;
	struct request *rq;


	rq = get_request(q, rw_flags, bio, GFP_NOIO);
	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
	while (!rq) {
retry:
		DEFINE_WAIT(wait);
	rq = __get_request(rl, rw_flags, bio, gfp_mask);
		struct request_list *rl = &q->rq;
	if (rq)
		return rq;


		if (unlikely(blk_queue_dead(q)))
	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
		blk_put_rl(rl);
		return NULL;
		return NULL;
	}


	/* wait on @rl and retry */
	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
				  TASK_UNINTERRUPTIBLE);
				  TASK_UNINTERRUPTIBLE);


@@ -1064,21 +1072,16 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
	io_schedule();
	io_schedule();


	/*
	/*
		 * After sleeping, we become a "batching" process and
	 * After sleeping, we become a "batching" process and will be able
		 * will be able to allocate at least one request, and
	 * to allocate at least one request, and up to a big batch of them
		 * up to a big batch of them for a small period time.
	 * for a small period time.  See ioc_batching, ioc_set_batching
		 * See ioc_batching, ioc_set_batching
	 */
	 */
		create_io_context(GFP_NOIO, q->node);
	ioc_set_batching(q, current->io_context);
	ioc_set_batching(q, current->io_context);


	spin_lock_irq(q->queue_lock);
	spin_lock_irq(q->queue_lock);
	finish_wait(&rl->wait[is_sync], &wait);
	finish_wait(&rl->wait[is_sync], &wait);


		rq = get_request(q, rw_flags, bio, GFP_NOIO);
	goto retry;
	};

	return rq;
}
}


struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
@@ -1087,10 +1090,10 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)


	BUG_ON(rw != READ && rw != WRITE);
	BUG_ON(rw != READ && rw != WRITE);


	/* create ioc upfront */
	create_io_context(gfp_mask, q->node);

	spin_lock_irq(q->queue_lock);
	spin_lock_irq(q->queue_lock);
	if (gfp_mask & __GFP_WAIT)
		rq = get_request_wait(q, rw, NULL);
	else
	rq = get_request(q, rw, NULL, gfp_mask);
	rq = get_request(q, rw, NULL, gfp_mask);
	if (!rq)
	if (!rq)
		spin_unlock_irq(q->queue_lock);
		spin_unlock_irq(q->queue_lock);
@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
	 */
	 */
	if (req->cmd_flags & REQ_ALLOCED) {
	if (req->cmd_flags & REQ_ALLOCED) {
		unsigned int flags = req->cmd_flags;
		unsigned int flags = req->cmd_flags;
		struct request_list *rl = blk_rq_rl(req);


		BUG_ON(!list_empty(&req->queuelist));
		BUG_ON(!list_empty(&req->queuelist));
		BUG_ON(!hlist_unhashed(&req->hash));
		BUG_ON(!hlist_unhashed(&req->hash));


		blk_free_request(q, req);
		blk_free_request(rl, req);
		freed_request(q, flags);
		freed_request(rl, flags);
		blk_put_rl(rl);
	}
	}
}
}
EXPORT_SYMBOL_GPL(__blk_put_request);
EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1481,7 +1486,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
	 * Grab a free request. This is might sleep but can not fail.
	 * Grab a free request. This is might sleep but can not fail.
	 * Returns with the queue unlocked.
	 * Returns with the queue unlocked.
	 */
	 */
	req = get_request_wait(q, rw_flags, bio);
	req = get_request(q, rw_flags, bio, GFP_NOIO);
	if (unlikely(!req)) {
	if (unlikely(!req)) {
		bio_endio(bio, -ENODEV);	/* @q is dead */
		bio_endio(bio, -ENODEV);	/* @q is dead */
		goto out_unlock;
		goto out_unlock;
@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
		goto end_io;
		goto end_io;
	}
	}


	/*
	 * Various block parts want %current->io_context and lazy ioc
	 * allocation ends up trading a lot of pain for a small amount of
	 * memory.  Just allocate it upfront.  This may fail and block
	 * layer knows how to live with it.
	 */
	create_io_context(GFP_ATOMIC, q->node);

	if (blk_throtl_bio(q, bio))
	if (blk_throtl_bio(q, bio))
		return false;	/* throttled, will be resubmitted later */
		return false;	/* throttled, will be resubmitted later */


+1 −0
Original line number Original line Diff line number Diff line
@@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)


	/* initialize */
	/* initialize */
	atomic_long_set(&ioc->refcount, 1);
	atomic_long_set(&ioc->refcount, 1);
	atomic_set(&ioc->nr_tasks, 1);
	atomic_set(&ioc->active_ref, 1);
	atomic_set(&ioc->active_ref, 1);
	spin_lock_init(&ioc->lock);
	spin_lock_init(&ioc->lock);
	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
Loading