Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block (4de13d7a) · Commits · e / devices / android_kernel_fairphone_FP3

Documentation/block/cfq-iosched.txt

+44 −3

Original line number	Original line	Diff line number	Diff line
	@@ -5,7 +5,7 @@ The main aim of CFQ scheduler is to provide a fair allocation of the disk
	I/O bandwidth for all the processes which requests an I/O operation.		I/O bandwidth for all the processes which requests an I/O operation.

	CFQ maintains the per process queue for the processes which request I/O		CFQ maintains the per process queue for the processes which request I/O
	operation(syncronous requests). In case of asynchronous requests, all the		operation(synchronous requests). In case of asynchronous requests, all the
	requests from all the processes are batched together according to their		requests from all the processes are batched together according to their
	process's I/O priority.		process's I/O priority.

	@@ -66,6 +66,47 @@ This parameter is used to set the timeout of synchronous requests. Default
	value of this is 124ms. In case to favor synchronous requests over asynchronous		value of this is 124ms. In case to favor synchronous requests over asynchronous
	one, this value should be decreased relative to fifo_expire_async.		one, this value should be decreased relative to fifo_expire_async.

			group_idle
			-----------
			This parameter forces idling at the CFQ group level instead of CFQ
			queue level. This was introduced after after a bottleneck was observed
			in higher end storage due to idle on sequential queue and allow dispatch
			from a single queue. The idea with this parameter is that it can be run with
			slice_idle=0 and group_idle=8, so that idling does not happen on individual
			queues in the group but happens overall on the group and thus still keeps the
			IO controller working.
			Not idling on individual queues in the group will dispatch requests from
			multiple queues in the group at the same time and achieve higher throughput
			on higher end storage.

			Default value for this parameter is 8ms.

			latency
			-------
			This parameter is used to enable/disable the latency mode of the CFQ
			scheduler. If latency mode (called low_latency) is enabled, CFQ tries
			to recompute the slice time for each process based on the target_latency set
			for the system. This favors fairness over throughput. Disabling low
			latency (setting it to 0) ignores target latency, allowing each process in the
			system to get a full time slice.

			By default low latency mode is enabled.

			target_latency
			--------------
			This parameter is used to calculate the time slice for a process if cfq's
			latency mode is enabled. It will ensure that sync requests have an estimated
			latency. But if sequential workload is higher(e.g. sequential read),
			then to meet the latency constraints, throughput may decrease because of less
			time for each process to issue I/O request before the cfq queue is switched.

			Though this can be overcome by disabling the latency_mode, it may increase
			the read latency for some applications. This parameter allows for changing
			target_latency through the sysfs interface which can provide the balanced
			throughput and read latency.

			Default value for target_latency is 300ms.

	slice_async		slice_async
	-----------		-----------
	This parameter is same as of slice_sync but for asynchronous queue. The		This parameter is same as of slice_sync but for asynchronous queue. The
	@@ -98,8 +139,8 @@ in the device exceeds this parameter. This parameter is used for synchronous
	request.		request.

	In case of storage with several disk, this setting can limit the parallel		In case of storage with several disk, this setting can limit the parallel
	processing of request. Therefore, increasing the value can imporve the		processing of request. Therefore, increasing the value can improve the
	performace although this can cause the latency of some I/O to increase due		performance although this can cause the latency of some I/O to increase due
	to more number of requests.		to more number of requests.

	CFQ Group scheduling		CFQ Group scheduling

block/blk-cgroup.c

+2 −2

Original line number	Original line	Diff line number	Diff line
	@@ -972,10 +972,10 @@ int blkcg_activate_policy(struct request_queue *q,
	if (!new_blkg)		if (!new_blkg)
	return -ENOMEM;		return -ENOMEM;

	preloaded = !radix_tree_preload(GFP_KERNEL);

	blk_queue_bypass_start(q);		blk_queue_bypass_start(q);

			preloaded = !radix_tree_preload(GFP_KERNEL);

	/*		/*
	* Make sure the root blkg exists and count the existing blkgs. As		* Make sure the root blkg exists and count the existing blkgs. As
	* @q is bypassing at this point, blkg_lookup_create() can't be		* @q is bypassing at this point, blkg_lookup_create() can't be

block/blk-core.c

+196 −69

Original line number	Original line	Diff line number	Diff line
	@@ -30,6 +30,7 @@
	#include <linux/list_sort.h>		#include <linux/list_sort.h>
	#include <linux/delay.h>		#include <linux/delay.h>
	#include <linux/ratelimit.h>		#include <linux/ratelimit.h>
			#include <linux/pm_runtime.h>

	#define CREATE_TRACE_POINTS		#define CREATE_TRACE_POINTS
	#include <trace/events/block.h>		#include <trace/events/block.h>
	@@ -159,20 +160,10 @@ static void req_bio_endio(struct request rq, struct bio bio,
	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
	error = -EIO;		error = -EIO;

	if (unlikely(nbytes > bio->bi_size)) {
	printk(KERN_ERR "%s: want %u bytes done, %u left\n",
	__func__, nbytes, bio->bi_size);
	nbytes = bio->bi_size;
	}

	if (unlikely(rq->cmd_flags & REQ_QUIET))		if (unlikely(rq->cmd_flags & REQ_QUIET))
	set_bit(BIO_QUIET, &bio->bi_flags);		set_bit(BIO_QUIET, &bio->bi_flags);

	bio->bi_size -= nbytes;		bio_advance(bio, nbytes);
	bio->bi_sector += (nbytes >> 9);

	if (bio_integrity(bio))
	bio_integrity_advance(bio, nbytes);

	/* don't actually finish bio if it's part of flush sequence */		/* don't actually finish bio if it's part of flush sequence */
	if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))		if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
	@@ -1264,6 +1255,16 @@ void part_round_stats(int cpu, struct hd_struct *part)
	}		}
	EXPORT_SYMBOL_GPL(part_round_stats);		EXPORT_SYMBOL_GPL(part_round_stats);

			#ifdef CONFIG_PM_RUNTIME
			static void blk_pm_put_request(struct request *rq)
			{
			if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
			pm_runtime_mark_last_busy(rq->q->dev);
			}
			#else
			static inline void blk_pm_put_request(struct request *rq) {}
			#endif

	/*		/*
	* queue lock must be held		* queue lock must be held
	*/		*/
	@@ -1274,6 +1275,8 @@ void __blk_put_request(struct request_queue q, struct request req)
	if (unlikely(--req->ref_count))		if (unlikely(--req->ref_count))
	return;		return;

			blk_pm_put_request(req);

	elv_completed_request(q, req);		elv_completed_request(q, req);

	/* this is a bio leak */		/* this is a bio leak */
	@@ -1597,7 +1600,7 @@ static void handle_bad_sector(struct bio *bio)
	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",		printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
	bdevname(bio->bi_bdev, b),		bdevname(bio->bi_bdev, b),
	bio->bi_rw,		bio->bi_rw,
	(unsigned long long)bio->bi_sector + bio_sectors(bio),		(unsigned long long)bio_end_sector(bio),
	(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));		(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));

	set_bit(BIO_EOF, &bio->bi_flags);		set_bit(BIO_EOF, &bio->bi_flags);
	@@ -2053,6 +2056,28 @@ static void blk_account_io_done(struct request *req)
	}		}
	}		}

			#ifdef CONFIG_PM_RUNTIME
			/*
			* Don't process normal requests when queue is suspended
			* or in the process of suspending/resuming
			*/
			static struct request blk_pm_peek_request(struct request_queue q,
			struct request *rq)
			{
			if (q->dev && (q->rpm_status == RPM_SUSPENDED \|\|
			(q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM))))
			return NULL;
			else
			return rq;
			}
			#else
			static inline struct request blk_pm_peek_request(struct request_queue q,
			struct request *rq)
			{
			return rq;
			}
			#endif

	/**		/**
	* blk_peek_request - peek at the top of a request queue		* blk_peek_request - peek at the top of a request queue
	* @q: request queue to peek at		* @q: request queue to peek at
	@@ -2075,6 +2100,11 @@ struct request blk_peek_request(struct request_queue q)
	int ret;		int ret;

	while ((rq = __elv_next_request(q)) != NULL) {		while ((rq = __elv_next_request(q)) != NULL) {

			rq = blk_pm_peek_request(q, rq);
			if (!rq)
			break;

	if (!(rq->cmd_flags & REQ_STARTED)) {		if (!(rq->cmd_flags & REQ_STARTED)) {
	/*		/*
	* This is the first time the device driver		* This is the first time the device driver
	@@ -2253,8 +2283,7 @@ EXPORT_SYMBOL(blk_fetch_request);
	**/		**/
	bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)		bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
	{		{
	int total_bytes, bio_nbytes, next_idx = 0;		int total_bytes;
	struct bio *bio;

	if (!req->bio)		if (!req->bio)
	return false;		return false;
	@@ -2300,57 +2329,22 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

	blk_account_io_completion(req, nr_bytes);		blk_account_io_completion(req, nr_bytes);

	total_bytes = bio_nbytes = 0;		total_bytes = 0;
	while ((bio = req->bio) != NULL) {		while (req->bio) {
	int nbytes;		struct bio *bio = req->bio;
			unsigned bio_bytes = min(bio->bi_size, nr_bytes);

	if (nr_bytes >= bio->bi_size) {		if (bio_bytes == bio->bi_size)
	req->bio = bio->bi_next;		req->bio = bio->bi_next;
	nbytes = bio->bi_size;
	req_bio_endio(req, bio, nbytes, error);
	next_idx = 0;
	bio_nbytes = 0;
	} else {
	int idx = bio->bi_idx + next_idx;

	if (unlikely(idx >= bio->bi_vcnt)) {
	blk_dump_rq_flags(req, "__end_that");
	printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
	__func__, idx, bio->bi_vcnt);
	break;
	}

	nbytes = bio_iovec_idx(bio, idx)->bv_len;
	BIO_BUG_ON(nbytes > bio->bi_size);

	/*
	* not a complete bvec done
	*/
	if (unlikely(nbytes > nr_bytes)) {
	bio_nbytes += nr_bytes;
	total_bytes += nr_bytes;
	break;
	}

	/*		req_bio_endio(req, bio, bio_bytes, error);
	* advance to the next vector
	*/
	next_idx++;
	bio_nbytes += nbytes;
	}

	total_bytes += nbytes;		total_bytes += bio_bytes;
	nr_bytes -= nbytes;		nr_bytes -= bio_bytes;

	bio = req->bio;		if (!nr_bytes)
	if (bio) {
	/*
	* end more in this run, or just return 'not-done'
	*/
	if (unlikely(nr_bytes <= 0))
	break;		break;
	}		}
	}

	/*		/*
	* completely done		* completely done
	@@ -2365,16 +2359,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
	return false;		return false;
	}		}

	/*
	* if the request wasn't completed, update state
	*/
	if (bio_nbytes) {
	req_bio_endio(req, bio, bio_nbytes, error);
	bio->bi_idx += next_idx;
	bio_iovec(bio)->bv_offset += nr_bytes;
	bio_iovec(bio)->bv_len -= nr_bytes;
	}

	req->__data_len -= total_bytes;		req->__data_len -= total_bytes;
	req->buffer = bio_data(req->bio);		req->buffer = bio_data(req->bio);

	@@ -3046,6 +3030,149 @@ void blk_finish_plug(struct blk_plug *plug)
	}		}
	EXPORT_SYMBOL(blk_finish_plug);		EXPORT_SYMBOL(blk_finish_plug);

			#ifdef CONFIG_PM_RUNTIME
			/**
			* blk_pm_runtime_init - Block layer runtime PM initialization routine
			* @q: the queue of the device
			* @dev: the device the queue belongs to
			*
			* Description:
			* Initialize runtime-PM-related fields for @q and start auto suspend for
			* @dev. Drivers that want to take advantage of request-based runtime PM
			* should call this function after @dev has been initialized, and its
			* request queue @q has been allocated, and runtime PM for it can not happen
			* yet(either due to disabled/forbidden or its usage_count > 0). In most
			* cases, driver should call this function before any I/O has taken place.
			*
			* This function takes care of setting up using auto suspend for the device,
			* the autosuspend delay is set to -1 to make runtime suspend impossible
			* until an updated value is either set by user or by driver. Drivers do
			* not need to touch other autosuspend settings.
			*
			* The block layer runtime PM is request based, so only works for drivers
			* that use request as their IO unit instead of those directly use bio's.
			*/
			void blk_pm_runtime_init(struct request_queue q, struct device dev)
			{
			q->dev = dev;
			q->rpm_status = RPM_ACTIVE;
			pm_runtime_set_autosuspend_delay(q->dev, -1);
			pm_runtime_use_autosuspend(q->dev);
			}
			EXPORT_SYMBOL(blk_pm_runtime_init);

			/**
			* blk_pre_runtime_suspend - Pre runtime suspend check
			* @q: the queue of the device
			*
			* Description:
			* This function will check if runtime suspend is allowed for the device
			* by examining if there are any requests pending in the queue. If there
			* are requests pending, the device can not be runtime suspended; otherwise,
			* the queue's status will be updated to SUSPENDING and the driver can
			* proceed to suspend the device.
			*
			* For the not allowed case, we mark last busy for the device so that
			* runtime PM core will try to autosuspend it some time later.
			*
			* This function should be called near the start of the device's
			* runtime_suspend callback.
			*
			* Return:
			* 0 - OK to runtime suspend the device
			* -EBUSY - Device should not be runtime suspended
			*/
			int blk_pre_runtime_suspend(struct request_queue *q)
			{
			int ret = 0;

			spin_lock_irq(q->queue_lock);
			if (q->nr_pending) {
			ret = -EBUSY;
			pm_runtime_mark_last_busy(q->dev);
			} else {
			q->rpm_status = RPM_SUSPENDING;
			}
			spin_unlock_irq(q->queue_lock);
			return ret;
			}
			EXPORT_SYMBOL(blk_pre_runtime_suspend);

			/**
			* blk_post_runtime_suspend - Post runtime suspend processing
			* @q: the queue of the device
			* @err: return value of the device's runtime_suspend function
			*
			* Description:
			* Update the queue's runtime status according to the return value of the
			* device's runtime suspend function and mark last busy for the device so
			* that PM core will try to auto suspend the device at a later time.
			*
			* This function should be called near the end of the device's
			* runtime_suspend callback.
			*/
			void blk_post_runtime_suspend(struct request_queue *q, int err)
			{
			spin_lock_irq(q->queue_lock);
			if (!err) {
			q->rpm_status = RPM_SUSPENDED;
			} else {
			q->rpm_status = RPM_ACTIVE;
			pm_runtime_mark_last_busy(q->dev);
			}
			spin_unlock_irq(q->queue_lock);
			}
			EXPORT_SYMBOL(blk_post_runtime_suspend);

			/**
			* blk_pre_runtime_resume - Pre runtime resume processing
			* @q: the queue of the device
			*
			* Description:
			* Update the queue's runtime status to RESUMING in preparation for the
			* runtime resume of the device.
			*
			* This function should be called near the start of the device's
			* runtime_resume callback.
			*/
			void blk_pre_runtime_resume(struct request_queue *q)
			{
			spin_lock_irq(q->queue_lock);
			q->rpm_status = RPM_RESUMING;
			spin_unlock_irq(q->queue_lock);
			}
			EXPORT_SYMBOL(blk_pre_runtime_resume);

			/**
			* blk_post_runtime_resume - Post runtime resume processing
			* @q: the queue of the device
			* @err: return value of the device's runtime_resume function
			*
			* Description:
			* Update the queue's runtime status according to the return value of the
			* device's runtime_resume function. If it is successfully resumed, process
			* the requests that are queued into the device's queue when it is resuming
			* and then mark last busy and initiate autosuspend for it.
			*
			* This function should be called near the end of the device's
			* runtime_resume callback.
			*/
			void blk_post_runtime_resume(struct request_queue *q, int err)
			{
			spin_lock_irq(q->queue_lock);
			if (!err) {
			q->rpm_status = RPM_ACTIVE;
			__blk_run_queue(q);
			pm_runtime_mark_last_busy(q->dev);
			pm_runtime_autosuspend(q->dev);
			} else {
			q->rpm_status = RPM_SUSPENDED;
			}
			spin_unlock_irq(q->queue_lock);
			}
			EXPORT_SYMBOL(blk_post_runtime_resume);
			#endif

	int __init blk_dev_init(void)		int __init blk_dev_init(void)
	{		{
	BUILD_BUG_ON(__REQ_NR_BITS > 8 *		BUILD_BUG_ON(__REQ_NR_BITS > 8 *

block/cfq-iosched.c

+2 −5

Original line number	Original line	Diff line number	Diff line
	@@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data cfqd, struct bio bio)
	return NULL;		return NULL;

	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));		cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
	if (cfqq) {		if (cfqq)
	sector_t sector = bio->bi_sector + bio_sectors(bio);		return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));

	return elv_rb_find(&cfqq->sort_list, sector);
	}

	return NULL;		return NULL;
	}		}

block/deadline-iosched.c

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -132,7 +132,7 @@ deadline_merge(struct request_queue q, struct request req, struct bio bio)
	* check for front merge		* check for front merge
	*/		*/
	if (dd->front_merges) {		if (dd->front_merges) {
	sector_t sector = bio->bi_sector + bio_sectors(bio);		sector_t sector = bio_end_sector(bio);

	__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
	if (__rq) {		if (__rq) {