Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block (8cf1a3fc) · Commits · e / devices / android_kernel_fairphone_FP5

Documentation/block/queue-sysfs.txt

+7 −0

Original line number	Original line	Diff line number	Diff line
	@@ -38,6 +38,13 @@ read or write requests. Note that the total allocated number may be twice
	this amount, since it applies only to reads or writes (not the accumulated		this amount, since it applies only to reads or writes (not the accumulated
	sum).		sum).

			To avoid priority inversion through request starvation, a request
			queue maintains a separate request pool per each cgroup when
			CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
			per-block-cgroup request pool. IOW, if there are N block cgroups,
			each request queue may have upto N request pools, each independently
			regulated by nr_requests.

	read_ahead_kb (RW)		read_ahead_kb (RW)
	------------------		------------------
	Maximum number of kilobytes to read-ahead for filesystems on this block		Maximum number of kilobytes to read-ahead for filesystems on this block

block/blk-cgroup.c

+90 −49

Original line number	Original line	Diff line number	Diff line
	@@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);

	static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];		static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];

	struct blkcg cgroup_to_blkcg(struct cgroup cgroup)
	{
	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
	struct blkcg, css);
	}
	EXPORT_SYMBOL_GPL(cgroup_to_blkcg);

	static struct blkcg task_blkcg(struct task_struct tsk)
	{
	return container_of(task_subsys_state(tsk, blkio_subsys_id),
	struct blkcg, css);
	}

	struct blkcg bio_blkcg(struct bio bio)
	{
	if (bio && bio->bi_css)
	return container_of(bio->bi_css, struct blkcg, css);
	return task_blkcg(current);
	}
	EXPORT_SYMBOL_GPL(bio_blkcg);

	static bool blkcg_policy_enabled(struct request_queue *q,		static bool blkcg_policy_enabled(struct request_queue *q,
	const struct blkcg_policy *pol)		const struct blkcg_policy *pol)
	{		{
	@@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg)
	kfree(pd);		kfree(pd);
	}		}

			blk_exit_rl(&blkg->rl);
	kfree(blkg);		kfree(blkg);
	}		}

	@@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg)
	* blkg_alloc - allocate a blkg		* blkg_alloc - allocate a blkg
	* @blkcg: block cgroup the new blkg is associated with		* @blkcg: block cgroup the new blkg is associated with
	* @q: request_queue the new blkg is associated with		* @q: request_queue the new blkg is associated with
			* @gfp_mask: allocation mask to use
	*		*
	* Allocate a new blkg assocating @blkcg and @q.		* Allocate a new blkg assocating @blkcg and @q.
	*/		*/
	static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q)		static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q,
			gfp_t gfp_mask)
	{		{
	struct blkcg_gq *blkg;		struct blkcg_gq *blkg;
	int i;		int i;

	/* alloc and init base part */		/* alloc and init base part */
	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);		blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
	if (!blkg)		if (!blkg)
	return NULL;		return NULL;

	@@ -109,6 +91,13 @@ static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q)
	blkg->blkcg = blkcg;		blkg->blkcg = blkcg;
	blkg->refcnt = 1;		blkg->refcnt = 1;

			/* root blkg uses @q->root_rl, init rl only for !root blkgs */
			if (blkcg != &blkcg_root) {
			if (blk_init_rl(&blkg->rl, q, gfp_mask))
			goto err_free;
			blkg->rl.blkg = blkg;
			}

	for (i = 0; i < BLKCG_MAX_POLS; i++) {		for (i = 0; i < BLKCG_MAX_POLS; i++) {
	struct blkcg_policy *pol = blkcg_policy[i];		struct blkcg_policy *pol = blkcg_policy[i];
	struct blkg_policy_data *pd;		struct blkg_policy_data *pd;
	@@ -117,11 +106,9 @@ static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q)
	continue;		continue;

	/* alloc per-policy data and attach it to blkg */		/* alloc per-policy data and attach it to blkg */
	pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);		pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
	if (!pd) {		if (!pd)
	blkg_free(blkg);		goto err_free;
	return NULL;
	}

	blkg->pd[i] = pd;		blkg->pd[i] = pd;
	pd->blkg = blkg;		pd->blkg = blkg;
	@@ -132,6 +119,10 @@ static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q)
	}		}

	return blkg;		return blkg;

			err_free:
			blkg_free(blkg);
			return NULL;
	}		}

	static struct blkcg_gq __blkg_lookup(struct blkcg blkcg,		static struct blkcg_gq __blkg_lookup(struct blkcg blkcg,
	@@ -175,9 +166,13 @@ struct blkcg_gq blkg_lookup(struct blkcg blkcg, struct request_queue *q)
	}		}
	EXPORT_SYMBOL_GPL(blkg_lookup);		EXPORT_SYMBOL_GPL(blkg_lookup);

			/*
			* If @new_blkg is %NULL, this function tries to allocate a new one as
			* necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
			*/
	static struct blkcg_gq __blkg_lookup_create(struct blkcg blkcg,		static struct blkcg_gq __blkg_lookup_create(struct blkcg blkcg,
	struct request_queue *q)		struct request_queue *q,
	__releases(q->queue_lock) __acquires(q->queue_lock)		struct blkcg_gq *new_blkg)
	{		{
	struct blkcg_gq *blkg;		struct blkcg_gq *blkg;
	int ret;		int ret;
	@@ -189,24 +184,26 @@ static struct blkcg_gq __blkg_lookup_create(struct blkcg blkcg,
	blkg = __blkg_lookup(blkcg, q);		blkg = __blkg_lookup(blkcg, q);
	if (blkg) {		if (blkg) {
	rcu_assign_pointer(blkcg->blkg_hint, blkg);		rcu_assign_pointer(blkcg->blkg_hint, blkg);
	return blkg;		goto out_free;
	}		}

	/* blkg holds a reference to blkcg */		/* blkg holds a reference to blkcg */
	if (!css_tryget(&blkcg->css))		if (!css_tryget(&blkcg->css)) {
	return ERR_PTR(-EINVAL);		blkg = ERR_PTR(-EINVAL);
			goto out_free;
			}

	/* allocate */		/* allocate */
	ret = -ENOMEM;		if (!new_blkg) {
	blkg = blkg_alloc(blkcg, q);		new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
	if (unlikely(!blkg))		if (unlikely(!new_blkg)) {
	goto err_put;		blkg = ERR_PTR(-ENOMEM);
			goto out_put;
			}
			}
			blkg = new_blkg;

	/* insert */		/* insert */
	ret = radix_tree_preload(GFP_ATOMIC);
	if (ret)
	goto err_free;

	spin_lock(&blkcg->lock);		spin_lock(&blkcg->lock);
	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);		ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
	if (likely(!ret)) {		if (likely(!ret)) {
	@@ -215,15 +212,15 @@ static struct blkcg_gq __blkg_lookup_create(struct blkcg blkcg,
	}		}
	spin_unlock(&blkcg->lock);		spin_unlock(&blkcg->lock);

	radix_tree_preload_end();

	if (!ret)		if (!ret)
	return blkg;		return blkg;
	err_free:
	blkg_free(blkg);		blkg = ERR_PTR(ret);
	err_put:		out_put:
	css_put(&blkcg->css);		css_put(&blkcg->css);
	return ERR_PTR(ret);		out_free:
			blkg_free(new_blkg);
			return blkg;
	}		}

	struct blkcg_gq blkg_lookup_create(struct blkcg blkcg,		struct blkcg_gq blkg_lookup_create(struct blkcg blkcg,
	@@ -235,7 +232,7 @@ struct blkcg_gq blkg_lookup_create(struct blkcg blkcg,
	*/		*/
	if (unlikely(blk_queue_bypass(q)))		if (unlikely(blk_queue_bypass(q)))
	return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
	return __blkg_lookup_create(blkcg, q);		return __blkg_lookup_create(blkcg, q, NULL);
	}		}
	EXPORT_SYMBOL_GPL(blkg_lookup_create);		EXPORT_SYMBOL_GPL(blkg_lookup_create);

	@@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg)
	}		}
	EXPORT_SYMBOL_GPL(__blkg_release);		EXPORT_SYMBOL_GPL(__blkg_release);

			/*
			* The next function used by blk_queue_for_each_rl(). It's a bit tricky
			* because the root blkg uses @q->root_rl instead of its own rl.
			*/
			struct request_list __blk_queue_next_rl(struct request_list rl,
			struct request_queue *q)
			{
			struct list_head *ent;
			struct blkcg_gq *blkg;

			/*
			* Determine the current blkg list_head. The first entry is
			* root_rl which is off @q->blkg_list and mapped to the head.
			*/
			if (rl == &q->root_rl) {
			ent = &q->blkg_list;
			} else {
			blkg = container_of(rl, struct blkcg_gq, rl);
			ent = &blkg->q_node;
			}

			/* walk to the next list_head, skip root blkcg */
			ent = ent->next;
			if (ent == &q->root_blkg->q_node)
			ent = ent->next;
			if (ent == &q->blkg_list)
			return NULL;

			blkg = container_of(ent, struct blkcg_gq, q_node);
			return &blkg->rl;
			}

	static int blkcg_reset_stats(struct cgroup cgroup, struct cftype cftype,		static int blkcg_reset_stats(struct cgroup cgroup, struct cftype cftype,
	u64 val)		u64 val)
	{		{
	@@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q,
	struct blkcg_gq *blkg;		struct blkcg_gq *blkg;
	struct blkg_policy_data pd, n;		struct blkg_policy_data pd, n;
	int cnt = 0, ret;		int cnt = 0, ret;
			bool preloaded;

	if (blkcg_policy_enabled(q, pol))		if (blkcg_policy_enabled(q, pol))
	return 0;		return 0;

			/* preallocations for root blkg */
			blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
			if (!blkg)
			return -ENOMEM;

			preloaded = !radix_tree_preload(GFP_KERNEL);

	blk_queue_bypass_start(q);		blk_queue_bypass_start(q);

	/* make sure the root blkg exists and count the existing blkgs */		/* make sure the root blkg exists and count the existing blkgs */
	spin_lock_irq(q->queue_lock);		spin_lock_irq(q->queue_lock);

	rcu_read_lock();		rcu_read_lock();
	blkg = __blkg_lookup_create(&blkcg_root, q);		blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
	rcu_read_unlock();		rcu_read_unlock();

			if (preloaded)
			radix_tree_preload_end();

	if (IS_ERR(blkg)) {		if (IS_ERR(blkg)) {
	ret = PTR_ERR(blkg);		ret = PTR_ERR(blkg);
	goto out_unlock;		goto out_unlock;
	}		}
	q->root_blkg = blkg;		q->root_blkg = blkg;
			q->root_rl.blkg = blkg;

	list_for_each_entry(blkg, &q->blkg_list, q_node)		list_for_each_entry(blkg, &q->blkg_list, q_node)
	cnt++;		cnt++;

block/blk-cgroup.h

+124 −4

Original line number	Original line	Diff line number	Diff line
	@@ -17,6 +17,7 @@
	#include <linux/u64_stats_sync.h>		#include <linux/u64_stats_sync.h>
	#include <linux/seq_file.h>		#include <linux/seq_file.h>
	#include <linux/radix-tree.h>		#include <linux/radix-tree.h>
			#include <linux/blkdev.h>

	/* Max limits for throttle policy */		/* Max limits for throttle policy */
	#define THROTL_IOPS_MAX UINT_MAX		#define THROTL_IOPS_MAX UINT_MAX
	@@ -93,6 +94,8 @@ struct blkcg_gq {
	struct list_head q_node;		struct list_head q_node;
	struct hlist_node blkcg_node;		struct hlist_node blkcg_node;
	struct blkcg *blkcg;		struct blkcg *blkcg;
			/* request allocation list for this blkcg-q pair */
			struct request_list rl;
	/* reference count */		/* reference count */
	int refcnt;		int refcnt;

	@@ -120,8 +123,6 @@ struct blkcg_policy {

	extern struct blkcg blkcg_root;		extern struct blkcg blkcg_root;

	struct blkcg cgroup_to_blkcg(struct cgroup cgroup);
	struct blkcg bio_blkcg(struct bio bio);
	struct blkcg_gq blkg_lookup(struct blkcg blkcg, struct request_queue *q);		struct blkcg_gq blkg_lookup(struct blkcg blkcg, struct request_queue *q);
	struct blkcg_gq blkg_lookup_create(struct blkcg blkcg,		struct blkcg_gq blkg_lookup_create(struct blkcg blkcg,
	struct request_queue *q);		struct request_queue *q);
	@@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg blkcg, const struct blkcg_policy pol,
	void blkg_conf_finish(struct blkg_conf_ctx *ctx);		void blkg_conf_finish(struct blkg_conf_ctx *ctx);


			static inline struct blkcg cgroup_to_blkcg(struct cgroup cgroup)
			{
			return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
			struct blkcg, css);
			}

			static inline struct blkcg task_blkcg(struct task_struct tsk)
			{
			return container_of(task_subsys_state(tsk, blkio_subsys_id),
			struct blkcg, css);
			}

			static inline struct blkcg bio_blkcg(struct bio bio)
			{
			if (bio && bio->bi_css)
			return container_of(bio->bi_css, struct blkcg, css);
			return task_blkcg(current);
			}

	/**		/**
	* blkg_to_pdata - get policy private data		* blkg_to_pdata - get policy private data
	* @blkg: blkg of interest		* @blkg: blkg of interest
	@@ -233,6 +253,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
	__blkg_release(blkg);		__blkg_release(blkg);
	}		}

			/**
			* blk_get_rl - get request_list to use
			* @q: request_queue of interest
			* @bio: bio which will be attached to the allocated request (may be %NULL)
			*
			* The caller wants to allocate a request from @q to use for @bio. Find
			* the request_list to use and obtain a reference on it. Should be called
			* under queue_lock. This function is guaranteed to return non-%NULL
			* request_list.
			*/
			static inline struct request_list blk_get_rl(struct request_queue q,
			struct bio *bio)
			{
			struct blkcg *blkcg;
			struct blkcg_gq *blkg;

			rcu_read_lock();

			blkcg = bio_blkcg(bio);

			/* bypass blkg lookup and use @q->root_rl directly for root */
			if (blkcg == &blkcg_root)
			goto root_rl;

			/*
			* Try to use blkg->rl. blkg lookup may fail under memory pressure
			* or if either the blkcg or queue is going away. Fall back to
			* root_rl in such cases.
			*/
			blkg = blkg_lookup_create(blkcg, q);
			if (unlikely(IS_ERR(blkg)))
			goto root_rl;

			blkg_get(blkg);
			rcu_read_unlock();
			return &blkg->rl;
			root_rl:
			rcu_read_unlock();
			return &q->root_rl;
			}

			/**
			* blk_put_rl - put request_list
			* @rl: request_list to put
			*
			* Put the reference acquired by blk_get_rl(). Should be called under
			* queue_lock.
			*/
			static inline void blk_put_rl(struct request_list *rl)
			{
			/* root_rl may not have blkg set */
			if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
			blkg_put(rl->blkg);
			}

			/**
			* blk_rq_set_rl - associate a request with a request_list
			* @rq: request of interest
			* @rl: target request_list
			*
			* Associate @rq with @rl so that accounting and freeing can know the
			* request_list @rq came from.
			*/
			static inline void blk_rq_set_rl(struct request rq, struct request_list rl)
			{
			rq->rl = rl;
			}

			/**
			* blk_rq_rl - return the request_list a request came from
			* @rq: request of interest
			*
			* Return the request_list @rq is allocated from.
			*/
			static inline struct request_list blk_rq_rl(struct request rq)
			{
			return rq->rl;
			}

			struct request_list __blk_queue_next_rl(struct request_list rl,
			struct request_queue *q);
			/**
			* blk_queue_for_each_rl - iterate through all request_lists of a request_queue
			*
			* Should be used under queue_lock.
			*/
			#define blk_queue_for_each_rl(rl, q) \
			for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))

	/**		/**
	* blkg_stat_add - add a value to a blkg_stat		* blkg_stat_add - add a value to a blkg_stat
	* @stat: target blkg_stat		* @stat: target blkg_stat
	@@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
	#else /* CONFIG_BLK_CGROUP */		#else /* CONFIG_BLK_CGROUP */

	struct cgroup;		struct cgroup;
			struct blkcg;

	struct blkg_policy_data {		struct blkg_policy_data {
	};		};
	@@ -361,8 +471,6 @@ struct blkcg_gq {
	struct blkcg_policy {		struct blkcg_policy {
	};		};

	static inline struct blkcg cgroup_to_blkcg(struct cgroup cgroup) { return NULL; }
	static inline struct blkcg bio_blkcg(struct bio bio) { return NULL; }
	static inline struct blkcg_gq blkg_lookup(struct blkcg blkcg, void *key) { return NULL; }		static inline struct blkcg_gq blkg_lookup(struct blkcg blkcg, void *key) { return NULL; }
	static inline int blkcg_init_queue(struct request_queue *q) { return 0; }		static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
	static inline void blkcg_drain_queue(struct request_queue *q) { }		static inline void blkcg_drain_queue(struct request_queue *q) { }
	@@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q,
	static inline void blkcg_deactivate_policy(struct request_queue *q,		static inline void blkcg_deactivate_policy(struct request_queue *q,
	const struct blkcg_policy *pol) { }		const struct blkcg_policy *pol) { }

			static inline struct blkcg cgroup_to_blkcg(struct cgroup cgroup) { return NULL; }
			static inline struct blkcg bio_blkcg(struct bio bio) { return NULL; }

	static inline struct blkg_policy_data blkg_to_pd(struct blkcg_gq blkg,		static inline struct blkg_policy_data blkg_to_pd(struct blkcg_gq blkg,
	struct blkcg_policy *pol) { return NULL; }		struct blkcg_policy *pol) { return NULL; }
	static inline struct blkcg_gq pd_to_blkg(struct blkg_policy_data pd) { return NULL; }		static inline struct blkcg_gq pd_to_blkg(struct blkg_policy_data pd) { return NULL; }
	@@ -381,5 +492,14 @@ static inline char blkg_path(struct blkcg_gq blkg) { return NULL; }
	static inline void blkg_get(struct blkcg_gq *blkg) { }		static inline void blkg_get(struct blkcg_gq *blkg) { }
	static inline void blkg_put(struct blkcg_gq *blkg) { }		static inline void blkg_put(struct blkcg_gq *blkg) { }

			static inline struct request_list blk_get_rl(struct request_queue q,
			struct bio *bio) { return &q->root_rl; }
			static inline void blk_put_rl(struct request_list *rl) { }
			static inline void blk_rq_set_rl(struct request rq, struct request_list rl) { }
			static inline struct request_list blk_rq_rl(struct request rq) { return &rq->q->root_rl; }

			#define blk_queue_for_each_rl(rl, q) \
			for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)

	#endif /* CONFIG_BLK_CGROUP */		#endif /* CONFIG_BLK_CGROUP */
	#endif /* _BLK_CGROUP_H */		#endif /* _BLK_CGROUP_H */

block/blk-core.c

+111 −98

Original line number	Original line	Diff line number	Diff line
	@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
	if (!list_empty(&q->queue_head) && q->request_fn)		if (!list_empty(&q->queue_head) && q->request_fn)
	__blk_run_queue(q);		__blk_run_queue(q);

	drain \|= q->rq.elvpriv;		drain \|= q->nr_rqs_elvpriv;

	/*		/*
	* Unfortunately, requests are queued at and tracked from		* Unfortunately, requests are queued at and tracked from
	@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
	if (drain_all) {		if (drain_all) {
	drain \|= !list_empty(&q->queue_head);		drain \|= !list_empty(&q->queue_head);
	for (i = 0; i < 2; i++) {		for (i = 0; i < 2; i++) {
	drain \|= q->rq.count[i];		drain \|= q->nr_rqs[i];
	drain \|= q->in_flight[i];		drain \|= q->in_flight[i];
	drain \|= !list_empty(&q->flush_queue[i]);		drain \|= !list_empty(&q->flush_queue[i]);
	}		}
	@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
	* left with hung waiters. We need to wake up those waiters.		* left with hung waiters. We need to wake up those waiters.
	*/		*/
	if (q->request_fn) {		if (q->request_fn) {
			struct request_list *rl;

	spin_lock_irq(q->queue_lock);		spin_lock_irq(q->queue_lock);
	for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)
	wake_up_all(&q->rq.wait[i]);		blk_queue_for_each_rl(rl, q)
			for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
			wake_up_all(&rl->wait[i]);

	spin_unlock_irq(q->queue_lock);		spin_unlock_irq(q->queue_lock);
	}		}
	}		}
	@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
	}		}
	EXPORT_SYMBOL(blk_cleanup_queue);		EXPORT_SYMBOL(blk_cleanup_queue);

	static int blk_init_free_list(struct request_queue *q)		int blk_init_rl(struct request_list rl, struct request_queue q,
			gfp_t gfp_mask)
	{		{
	struct request_list *rl = &q->rq;

	if (unlikely(rl->rq_pool))		if (unlikely(rl->rq_pool))
	return 0;		return 0;

			rl->q = q;
	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;		rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;		rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
	rl->elvpriv = 0;
	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);		init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);		init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,		rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
	mempool_free_slab, request_cachep, q->node);		mempool_free_slab, request_cachep,
			gfp_mask, q->node);
	if (!rl->rq_pool)		if (!rl->rq_pool)
	return -ENOMEM;		return -ENOMEM;

	return 0;		return 0;
	}		}

			void blk_exit_rl(struct request_list *rl)
			{
			if (rl->rq_pool)
			mempool_destroy(rl->rq_pool);
			}

	struct request_queue *blk_alloc_queue(gfp_t gfp_mask)		struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
	{		{
	return blk_alloc_queue_node(gfp_mask, -1);		return blk_alloc_queue_node(gfp_mask, -1);
	@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue q, request_fn_proc rfn,
	if (!q)		if (!q)
	return NULL;		return NULL;

	if (blk_init_free_list(q))		if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
	return NULL;		return NULL;

	q->request_fn = rfn;		q->request_fn = rfn;
	@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
	}		}
	EXPORT_SYMBOL(blk_get_queue);		EXPORT_SYMBOL(blk_get_queue);

	static inline void blk_free_request(struct request_queue q, struct request rq)		static inline void blk_free_request(struct request_list rl, struct request rq)
	{		{
	if (rq->cmd_flags & REQ_ELVPRIV) {		if (rq->cmd_flags & REQ_ELVPRIV) {
	elv_put_request(q, rq);		elv_put_request(rl->q, rq);
	if (rq->elv.icq)		if (rq->elv.icq)
	put_io_context(rq->elv.icq->ioc);		put_io_context(rq->elv.icq->ioc);
	}		}

	mempool_free(rq, q->rq.rq_pool);		mempool_free(rq, rl->rq_pool);
	}		}

	/*		/*
	@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue q, struct io_context ioc)
	ioc->last_waited = jiffies;		ioc->last_waited = jiffies;
	}		}

	static void __freed_request(struct request_queue *q, int sync)		static void __freed_request(struct request_list *rl, int sync)
	{		{
	struct request_list *rl = &q->rq;		struct request_queue *q = rl->q;

	if (rl->count[sync] < queue_congestion_off_threshold(q))		/*
			* bdi isn't aware of blkcg yet. As all async IOs end up root
			* blkcg anyway, just use root blkcg state.
			*/
			if (rl == &q->root_rl &&
			rl->count[sync] < queue_congestion_off_threshold(q))
	blk_clear_queue_congested(q, sync);		blk_clear_queue_congested(q, sync);

	if (rl->count[sync] + 1 <= q->nr_requests) {		if (rl->count[sync] + 1 <= q->nr_requests) {
	if (waitqueue_active(&rl->wait[sync]))		if (waitqueue_active(&rl->wait[sync]))
	wake_up(&rl->wait[sync]);		wake_up(&rl->wait[sync]);

	blk_clear_queue_full(q, sync);		blk_clear_rl_full(rl, sync);
	}		}
	}		}

	@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
	* A request has just been released. Account for it, update the full and		* A request has just been released. Account for it, update the full and
	* congestion status, wake up any waiters. Called under q->queue_lock.		* congestion status, wake up any waiters. Called under q->queue_lock.
	*/		*/
	static void freed_request(struct request_queue *q, unsigned int flags)		static void freed_request(struct request_list *rl, unsigned int flags)
	{		{
	struct request_list *rl = &q->rq;		struct request_queue *q = rl->q;
	int sync = rw_is_sync(flags);		int sync = rw_is_sync(flags);

			q->nr_rqs[sync]--;
	rl->count[sync]--;		rl->count[sync]--;
	if (flags & REQ_ELVPRIV)		if (flags & REQ_ELVPRIV)
	rl->elvpriv--;		q->nr_rqs_elvpriv--;

	__freed_request(q, sync);		__freed_request(rl, sync);

	if (unlikely(rl->starved[sync ^ 1]))		if (unlikely(rl->starved[sync ^ 1]))
	__freed_request(q, sync ^ 1);		__freed_request(rl, sync ^ 1);
	}		}

	/*		/*
	@@ -837,8 +853,8 @@ static struct io_context rq_ioc(struct bio bio)
	}		}

	/**		/**
	* get_request - get a free request		* __get_request - get a free request
	* @q: request_queue to allocate request from		* @rl: request list to allocate from
	* @rw_flags: RW and SYNC flags		* @rw_flags: RW and SYNC flags
	* @bio: bio to allocate request for (can be %NULL)		* @bio: bio to allocate request for (can be %NULL)
	* @gfp_mask: allocation mask		* @gfp_mask: allocation mask
	@@ -850,20 +866,16 @@ static struct io_context rq_ioc(struct bio bio)
	* Returns %NULL on failure, with @q->queue_lock held.		* Returns %NULL on failure, with @q->queue_lock held.
	* Returns !%NULL on success, with @q->queue_lock not held.		* Returns !%NULL on success, with @q->queue_lock not held.
	*/		*/
	static struct request get_request(struct request_queue q, int rw_flags,		static struct request __get_request(struct request_list rl, int rw_flags,
	struct bio *bio, gfp_t gfp_mask)		struct bio *bio, gfp_t gfp_mask)
	{		{
			struct request_queue *q = rl->q;
	struct request *rq;		struct request *rq;
	struct request_list *rl = &q->rq;		struct elevator_type *et = q->elevator->type;
	struct elevator_type *et;		struct io_context *ioc = rq_ioc(bio);
	struct io_context *ioc;
	struct io_cq *icq = NULL;		struct io_cq *icq = NULL;
	const bool is_sync = rw_is_sync(rw_flags) != 0;		const bool is_sync = rw_is_sync(rw_flags) != 0;
	bool retried = false;
	int may_queue;		int may_queue;
	retry:
	et = q->elevator->type;
	ioc = rq_ioc(bio);

	if (unlikely(blk_queue_dead(q)))		if (unlikely(blk_queue_dead(q)))
	return NULL;		return NULL;
	@@ -874,29 +886,15 @@ static struct request get_request(struct request_queue q, int rw_flags,

	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {		if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
	if (rl->count[is_sync]+1 >= q->nr_requests) {		if (rl->count[is_sync]+1 >= q->nr_requests) {
	/*
	* We want ioc to record batching state. If it's
	* not already there, creating a new one requires
	* dropping queue_lock, which in turn requires
	* retesting conditions to avoid queue hang.
	*/
	if (!ioc && !retried) {
	spin_unlock_irq(q->queue_lock);
	create_io_context(gfp_mask, q->node);
	spin_lock_irq(q->queue_lock);
	retried = true;
	goto retry;
	}

	/*		/*
	* The queue will fill after this allocation, so set		* The queue will fill after this allocation, so set
	* it as full, and mark this process as "batching".		* it as full, and mark this process as "batching".
	* This process will be allowed to complete a batch of		* This process will be allowed to complete a batch of
	* requests, others will be blocked.		* requests, others will be blocked.
	*/		*/
	if (!blk_queue_full(q, is_sync)) {		if (!blk_rl_full(rl, is_sync)) {
	ioc_set_batching(q, ioc);		ioc_set_batching(q, ioc);
	blk_set_queue_full(q, is_sync);		blk_set_rl_full(rl, is_sync);
	} else {		} else {
	if (may_queue != ELV_MQUEUE_MUST		if (may_queue != ELV_MQUEUE_MUST
	&& !ioc_batching(q, ioc)) {		&& !ioc_batching(q, ioc)) {
	@@ -909,6 +907,11 @@ static struct request get_request(struct request_queue q, int rw_flags,
	}		}
	}		}
	}		}
			/*
			* bdi isn't aware of blkcg yet. As all async IOs end up
			* root blkcg anyway, just use root blkcg state.
			*/
			if (rl == &q->root_rl)
	blk_set_queue_congested(q, is_sync);		blk_set_queue_congested(q, is_sync);
	}		}

	@@ -920,6 +923,7 @@ static struct request get_request(struct request_queue q, int rw_flags,
	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))		if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
	return NULL;		return NULL;

			q->nr_rqs[is_sync]++;
	rl->count[is_sync]++;		rl->count[is_sync]++;
	rl->starved[is_sync] = 0;		rl->starved[is_sync] = 0;

	@@ -935,7 +939,7 @@ static struct request get_request(struct request_queue q, int rw_flags,
	*/		*/
	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {		if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
	rw_flags \|= REQ_ELVPRIV;		rw_flags \|= REQ_ELVPRIV;
	rl->elvpriv++;		q->nr_rqs_elvpriv++;
	if (et->icq_cache && ioc)		if (et->icq_cache && ioc)
	icq = ioc_lookup_icq(ioc, q);		icq = ioc_lookup_icq(ioc, q);
	}		}
	@@ -945,21 +949,18 @@ static struct request get_request(struct request_queue q, int rw_flags,
	spin_unlock_irq(q->queue_lock);		spin_unlock_irq(q->queue_lock);

	/* allocate and init request */		/* allocate and init request */
	rq = mempool_alloc(q->rq.rq_pool, gfp_mask);		rq = mempool_alloc(rl->rq_pool, gfp_mask);
	if (!rq)		if (!rq)
	goto fail_alloc;		goto fail_alloc;

	blk_rq_init(q, rq);		blk_rq_init(q, rq);
			blk_rq_set_rl(rq, rl);
	rq->cmd_flags = rw_flags \| REQ_ALLOCED;		rq->cmd_flags = rw_flags \| REQ_ALLOCED;

	/* init elvpriv */		/* init elvpriv */
	if (rw_flags & REQ_ELVPRIV) {		if (rw_flags & REQ_ELVPRIV) {
	if (unlikely(et->icq_cache && !icq)) {		if (unlikely(et->icq_cache && !icq)) {
	create_io_context(gfp_mask, q->node);		if (ioc)
	ioc = rq_ioc(bio);
	if (!ioc)
	goto fail_elvpriv;

	icq = ioc_create_icq(ioc, q, gfp_mask);		icq = ioc_create_icq(ioc, q, gfp_mask);
	if (!icq)		if (!icq)
	goto fail_elvpriv;		goto fail_elvpriv;
	@@ -1000,7 +1001,7 @@ static struct request get_request(struct request_queue q, int rw_flags,
	rq->elv.icq = NULL;		rq->elv.icq = NULL;

	spin_lock_irq(q->queue_lock);		spin_lock_irq(q->queue_lock);
	rl->elvpriv--;		q->nr_rqs_elvpriv--;
	spin_unlock_irq(q->queue_lock);		spin_unlock_irq(q->queue_lock);
	goto out;		goto out;

	@@ -1013,7 +1014,7 @@ static struct request get_request(struct request_queue q, int rw_flags,
	* queue, but this is pretty rare.		* queue, but this is pretty rare.
	*/		*/
	spin_lock_irq(q->queue_lock);		spin_lock_irq(q->queue_lock);
	freed_request(q, rw_flags);		freed_request(rl, rw_flags);

	/*		/*
	* in the very unlikely event that allocation failed and no		* in the very unlikely event that allocation failed and no
	@@ -1029,32 +1030,39 @@ static struct request get_request(struct request_queue q, int rw_flags,
	}		}

	/**		/**
	* get_request_wait - get a free request with retry		* get_request - get a free request
	* @q: request_queue to allocate request from		* @q: request_queue to allocate request from
	* @rw_flags: RW and SYNC flags		* @rw_flags: RW and SYNC flags
	* @bio: bio to allocate request for (can be %NULL)		* @bio: bio to allocate request for (can be %NULL)
			* @gfp_mask: allocation mask
	*		*
	* Get a free request from @q. This function keeps retrying under memory		* Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
	* pressure and fails iff @q is dead.		* function keeps retrying under memory pressure and fails iff @q is dead.
	*		*
	* Must be callled with @q->queue_lock held and,		* Must be callled with @q->queue_lock held and,
	* Returns %NULL on failure, with @q->queue_lock held.		* Returns %NULL on failure, with @q->queue_lock held.
	* Returns !%NULL on success, with @q->queue_lock not held.		* Returns !%NULL on success, with @q->queue_lock not held.
	*/		*/
	static struct request get_request_wait(struct request_queue q, int rw_flags,		static struct request get_request(struct request_queue q, int rw_flags,
	struct bio *bio)		struct bio *bio, gfp_t gfp_mask)
	{		{
	const bool is_sync = rw_is_sync(rw_flags) != 0;		const bool is_sync = rw_is_sync(rw_flags) != 0;
			DEFINE_WAIT(wait);
			struct request_list *rl;
	struct request *rq;		struct request *rq;

	rq = get_request(q, rw_flags, bio, GFP_NOIO);		rl = blk_get_rl(q, bio); /* transferred to @rq on success */
	while (!rq) {		retry:
	DEFINE_WAIT(wait);		rq = __get_request(rl, rw_flags, bio, gfp_mask);
	struct request_list *rl = &q->rq;		if (rq)
			return rq;

	if (unlikely(blk_queue_dead(q)))		if (!(gfp_mask & __GFP_WAIT) \|\| unlikely(blk_queue_dead(q))) {
			blk_put_rl(rl);
	return NULL;		return NULL;
			}

			/* wait on @rl and retry */
	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
	TASK_UNINTERRUPTIBLE);		TASK_UNINTERRUPTIBLE);

	@@ -1064,21 +1072,16 @@ static struct request get_request_wait(struct request_queue q, int rw_flags,
	io_schedule();		io_schedule();

	/*		/*
	* After sleeping, we become a "batching" process and		* After sleeping, we become a "batching" process and will be able
	* will be able to allocate at least one request, and		* to allocate at least one request, and up to a big batch of them
	* up to a big batch of them for a small period time.		* for a small period time. See ioc_batching, ioc_set_batching
	* See ioc_batching, ioc_set_batching
	*/		*/
	create_io_context(GFP_NOIO, q->node);
	ioc_set_batching(q, current->io_context);		ioc_set_batching(q, current->io_context);

	spin_lock_irq(q->queue_lock);		spin_lock_irq(q->queue_lock);
	finish_wait(&rl->wait[is_sync], &wait);		finish_wait(&rl->wait[is_sync], &wait);

	rq = get_request(q, rw_flags, bio, GFP_NOIO);		goto retry;
	};

	return rq;
	}		}

	struct request blk_get_request(struct request_queue q, int rw, gfp_t gfp_mask)		struct request blk_get_request(struct request_queue q, int rw, gfp_t gfp_mask)
	@@ -1087,10 +1090,10 @@ struct request blk_get_request(struct request_queue q, int rw, gfp_t gfp_mask)

	BUG_ON(rw != READ && rw != WRITE);		BUG_ON(rw != READ && rw != WRITE);

			/* create ioc upfront */
			create_io_context(gfp_mask, q->node);

	spin_lock_irq(q->queue_lock);		spin_lock_irq(q->queue_lock);
	if (gfp_mask & __GFP_WAIT)
	rq = get_request_wait(q, rw, NULL);
	else
	rq = get_request(q, rw, NULL, gfp_mask);		rq = get_request(q, rw, NULL, gfp_mask);
	if (!rq)		if (!rq)
	spin_unlock_irq(q->queue_lock);		spin_unlock_irq(q->queue_lock);
	@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue q, struct request req)
	*/		*/
	if (req->cmd_flags & REQ_ALLOCED) {		if (req->cmd_flags & REQ_ALLOCED) {
	unsigned int flags = req->cmd_flags;		unsigned int flags = req->cmd_flags;
			struct request_list *rl = blk_rq_rl(req);

	BUG_ON(!list_empty(&req->queuelist));		BUG_ON(!list_empty(&req->queuelist));
	BUG_ON(!hlist_unhashed(&req->hash));		BUG_ON(!hlist_unhashed(&req->hash));

	blk_free_request(q, req);		blk_free_request(rl, req);
	freed_request(q, flags);		freed_request(rl, flags);
			blk_put_rl(rl);
	}		}
	}		}
	EXPORT_SYMBOL_GPL(__blk_put_request);		EXPORT_SYMBOL_GPL(__blk_put_request);
	@@ -1481,7 +1486,7 @@ void blk_queue_bio(struct request_queue q, struct bio bio)
	* Grab a free request. This is might sleep but can not fail.		* Grab a free request. This is might sleep but can not fail.
	* Returns with the queue unlocked.		* Returns with the queue unlocked.
	*/		*/
	req = get_request_wait(q, rw_flags, bio);		req = get_request(q, rw_flags, bio, GFP_NOIO);
	if (unlikely(!req)) {		if (unlikely(!req)) {
	bio_endio(bio, -ENODEV); /* @q is dead */		bio_endio(bio, -ENODEV); /* @q is dead */
	goto out_unlock;		goto out_unlock;
	@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
	goto end_io;		goto end_io;
	}		}

			/*
			* Various block parts want %current->io_context and lazy ioc
			* allocation ends up trading a lot of pain for a small amount of
			* memory. Just allocate it upfront. This may fail and block
			* layer knows how to live with it.
			*/
			create_io_context(GFP_ATOMIC, q->node);

	if (blk_throtl_bio(q, bio))		if (blk_throtl_bio(q, bio))
	return false; /* throttled, will be resubmitted later */		return false; /* throttled, will be resubmitted later */

block/blk-ioc.c

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)

	/* initialize */		/* initialize */
	atomic_long_set(&ioc->refcount, 1);		atomic_long_set(&ioc->refcount, 1);
			atomic_set(&ioc->nr_tasks, 1);
	atomic_set(&ioc->active_ref, 1);		atomic_set(&ioc->active_ref, 1);
	spin_lock_init(&ioc->lock);		spin_lock_init(&ioc->lock);
	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC \| __GFP_HIGH);		INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC \| __GFP_HIGH);