Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block (36805aae) · Commits · e / devices / android_kernel_teracube_2e

Documentation/cgroups/blkio-controller.txt

+15 −14

Original line number	Original line	Diff line number	Diff line
	@@ -94,9 +94,11 @@ Throttling/Upper Limit policy

	Hierarchical Cgroups		Hierarchical Cgroups
	====================		====================
	- Currently only CFQ supports hierarchical groups. For throttling,
	cgroup interface does allow creation of hierarchical cgroups and		Both CFQ and throttling implement hierarchy support; however,
	internally it treats them as flat hierarchy.		throttling's hierarchy support is enabled iff "sane_behavior" is
			enabled from cgroup side, which currently is a development option and
			not publicly available.

	If somebody created a hierarchy like as follows.		If somebody created a hierarchy like as follows.

	@@ -106,21 +108,20 @@ Hierarchical Cgroups
	\|		\|
	test3		test3

	CFQ will handle the hierarchy correctly but and throttling will		CFQ by default and throttling with "sane_behavior" will handle the
	practically treat all groups at same level. For details on CFQ		hierarchy correctly. For details on CFQ hierarchy support, refer to
	hierarchy support, refer to Documentation/block/cfq-iosched.txt.		Documentation/block/cfq-iosched.txt. For throttling, all limits apply
	Throttling will treat the hierarchy as if it looks like the		to the whole subtree while all statistics are local to the IOs
			directly generated by tasks in that cgroup.

			Throttling without "sane_behavior" enabled from cgroup side will
			practically treat all groups at same level as if it looks like the
	following.		following.

	pivot		pivot
	/ / \ \		/ / \ \
	root test1 test2 test3		root test1 test2 test3

	Nesting cgroups, while allowed, isn't officially supported and blkio
	genereates warning when cgroups nest. Once throttling implements
	hierarchy support, hierarchy will be supported and the warning will
	be removed.

	Various user visible config options		Various user visible config options
	===================================		===================================
	CONFIG_BLK_CGROUP		CONFIG_BLK_CGROUP

block/blk-cgroup.c

+40 −65

Original line number	Original line	Diff line number	Diff line
	@@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);

	static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];		static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];

	static struct blkcg_gq __blkg_lookup(struct blkcg blkcg,
	struct request_queue *q, bool update_hint);

	/**
	* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
	* @d_blkg: loop cursor pointing to the current descendant
	* @pos_cgrp: used for iteration
	* @p_blkg: target blkg to walk descendants of
	*
	* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
	* read locked. If called under either blkcg or queue lock, the iteration
	* is guaranteed to include all and only online blkgs. The caller may
	* update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
	* subtree.
	*/
	#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
	cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
	if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
	(p_blkg)->q, false)))

	static bool blkcg_policy_enabled(struct request_queue *q,		static bool blkcg_policy_enabled(struct request_queue *q,
	const struct blkcg_policy *pol)		const struct blkcg_policy *pol)
	{		{
	@@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg)
	if (!blkg)		if (!blkg)
	return;		return;

	for (i = 0; i < BLKCG_MAX_POLS; i++) {		for (i = 0; i < BLKCG_MAX_POLS; i++)
	struct blkcg_policy *pol = blkcg_policy[i];		kfree(blkg->pd[i]);
	struct blkg_policy_data *pd = blkg->pd[i];

	if (!pd)
	continue;

	if (pol && pol->pd_exit_fn)
	pol->pd_exit_fn(blkg);

	kfree(pd);
	}

	blk_exit_rl(&blkg->rl);		blk_exit_rl(&blkg->rl);
	kfree(blkg);		kfree(blkg);
	@@ -134,10 +104,6 @@ static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q,
	blkg->pd[i] = pd;		blkg->pd[i] = pd;
	pd->blkg = blkg;		pd->blkg = blkg;
	pd->plid = i;		pd->plid = i;

	/* invoke per-policy init */
	if (pol->pd_init_fn)
	pol->pd_init_fn(blkg);
	}		}

	return blkg;		return blkg;
	@@ -158,8 +124,8 @@ static struct blkcg_gq blkg_alloc(struct blkcg blkcg, struct request_queue *q,
	* @q's bypass state. If @update_hint is %true, the caller should be		* @q's bypass state. If @update_hint is %true, the caller should be
	* holding @q->queue_lock and lookup hint is updated on success.		* holding @q->queue_lock and lookup hint is updated on success.
	*/		*/
	static struct blkcg_gq __blkg_lookup(struct blkcg blkcg,		struct blkcg_gq __blkg_lookup(struct blkcg blkcg, struct request_queue *q,
	struct request_queue *q, bool update_hint)		bool update_hint)
	{		{
	struct blkcg_gq *blkg;		struct blkcg_gq *blkg;

	@@ -234,16 +200,25 @@ static struct blkcg_gq blkg_create(struct blkcg blkcg,
	}		}
	blkg = new_blkg;		blkg = new_blkg;

	/* link parent and insert */		/* link parent */
	if (blkcg_parent(blkcg)) {		if (blkcg_parent(blkcg)) {
	blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);		blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
	if (WARN_ON_ONCE(!blkg->parent)) {		if (WARN_ON_ONCE(!blkg->parent)) {
	blkg = ERR_PTR(-EINVAL);		ret = -EINVAL;
	goto err_put_css;		goto err_put_css;
	}		}
	blkg_get(blkg->parent);		blkg_get(blkg->parent);
	}		}

			/* invoke per-policy init */
			for (i = 0; i < BLKCG_MAX_POLS; i++) {
			struct blkcg_policy *pol = blkcg_policy[i];

			if (blkg->pd[i] && pol->pd_init_fn)
			pol->pd_init_fn(blkg);
			}

			/* insert */
	spin_lock(&blkcg->lock);		spin_lock(&blkcg->lock);
	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);		ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
	if (likely(!ret)) {		if (likely(!ret)) {
	@@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q)
	q->root_rl.blkg = NULL;		q->root_rl.blkg = NULL;
	}		}

	static void blkg_rcu_free(struct rcu_head *rcu_head)		/*
			* A group is RCU protected, but having an rcu lock does not mean that one
			* can access all the fields of blkg and assume these are valid. For
			* example, don't try to follow throtl_data and request queue links.
			*
			* Having a reference to blkg under an rcu allows accesses to only values
			* local to groups like group stats and group rate limits.
			*/
			void __blkg_release_rcu(struct rcu_head *rcu_head)
	{		{
	blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));		struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
			int i;

			/* tell policies that this one is being freed */
			for (i = 0; i < BLKCG_MAX_POLS; i++) {
			struct blkcg_policy *pol = blkcg_policy[i];

			if (blkg->pd[i] && pol->pd_exit_fn)
			pol->pd_exit_fn(blkg);
	}		}

	void __blkg_release(struct blkcg_gq *blkg)
	{
	/* release the blkcg and parent blkg refs this blkg has been holding */		/* release the blkcg and parent blkg refs this blkg has been holding */
	css_put(&blkg->blkcg->css);		css_put(&blkg->blkcg->css);
	if (blkg->parent)		if (blkg->parent) {
			spin_lock_irq(blkg->q->queue_lock);
	blkg_put(blkg->parent);		blkg_put(blkg->parent);
			spin_unlock_irq(blkg->q->queue_lock);
			}

	/*		blkg_free(blkg);
	* A group is freed in rcu manner. But having an rcu lock does not
	* mean that one can access all the fields of blkg and assume these
	* are valid. For example, don't try to follow throtl_data and
	* request queue links.
	*
	* Having a reference to blkg under an rcu allows acess to only
	* values local to groups like group stats and group rate limits
	*/
	call_rcu(&blkg->rcu_head, blkg_rcu_free);
	}		}
	EXPORT_SYMBOL_GPL(__blkg_release);		EXPORT_SYMBOL_GPL(__blkg_release_rcu);

	/*		/*
	* The next function used by blk_queue_for_each_rl(). It's a bit tricky		* The next function used by blk_queue_for_each_rl(). It's a bit tricky
	@@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = {
	.subsys_id = blkio_subsys_id,		.subsys_id = blkio_subsys_id,
	.base_cftypes = blkcg_files,		.base_cftypes = blkcg_files,
	.module = THIS_MODULE,		.module = THIS_MODULE,

	/*
	* blkio subsystem is utterly broken in terms of hierarchy support.
	* It treats all cgroups equally regardless of where they're
	* located in the hierarchy - all cgroups are treated as if they're
	* right below the root. Fix it and remove the following.
	*/
	.broken_hierarchy = true,
	};		};
	EXPORT_SYMBOL_GPL(blkio_subsys);		EXPORT_SYMBOL_GPL(blkio_subsys);

block/blk-cgroup.h

+36 −2

Original line number	Original line	Diff line number	Diff line
	@@ -266,7 +266,7 @@ static inline void blkg_get(struct blkcg_gq *blkg)
	blkg->refcnt++;		blkg->refcnt++;
	}		}

	void __blkg_release(struct blkcg_gq *blkg);		void __blkg_release_rcu(struct rcu_head *rcu);

	/**		/**
	* blkg_put - put a blkg reference		* blkg_put - put a blkg reference
	@@ -279,9 +279,43 @@ static inline void blkg_put(struct blkcg_gq *blkg)
	lockdep_assert_held(blkg->q->queue_lock);		lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(blkg->refcnt <= 0);		WARN_ON_ONCE(blkg->refcnt <= 0);
	if (!--blkg->refcnt)		if (!--blkg->refcnt)
	__blkg_release(blkg);		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
	}		}

			struct blkcg_gq __blkg_lookup(struct blkcg blkcg, struct request_queue *q,
			bool update_hint);

			/**
			* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
			* @d_blkg: loop cursor pointing to the current descendant
			* @pos_cgrp: used for iteration
			* @p_blkg: target blkg to walk descendants of
			*
			* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
			* read locked. If called under either blkcg or queue lock, the iteration
			* is guaranteed to include all and only online blkgs. The caller may
			* update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
			* subtree.
			*/
			#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
			cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
			if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
			(p_blkg)->q, false)))

			/**
			* blkg_for_each_descendant_post - post-order walk of a blkg's descendants
			* @d_blkg: loop cursor pointing to the current descendant
			* @pos_cgrp: used for iteration
			* @p_blkg: target blkg to walk descendants of
			*
			* Similar to blkg_for_each_descendant_pre() but performs post-order
			* traversal instead. Synchronization rules are the same.
			*/
			#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \
			cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
			if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
			(p_blkg)->q, false)))

	/**		/**
	* blk_get_rl - get request_list to use		* blk_get_rl - get request_list to use
	* @q: request_queue of interest		* @q: request_queue of interest

block/blk-tag.c

+9 −2

Original line number	Original line	Diff line number	Diff line
	@@ -348,9 +348,16 @@ int blk_queue_start_tag(struct request_queue q, struct request rq)
	*/		*/
	max_depth = bqt->max_depth;		max_depth = bqt->max_depth;
	if (!rq_is_sync(rq) && max_depth > 1) {		if (!rq_is_sync(rq) && max_depth > 1) {
	max_depth -= 2;		switch (max_depth) {
	if (!max_depth)		case 2:
	max_depth = 1;		max_depth = 1;
			break;
			case 3:
			max_depth = 2;
			break;
			default:
			max_depth -= 2;
			}
	if (q->in_flight[BLK_RW_ASYNC] > max_depth)		if (q->in_flight[BLK_RW_ASYNC] > max_depth)
	return 1;		return 1;
	}		}

block/blk-throttle.c

+744 −320

File changed.

Preview size limit exceeded, changes collapsed.