Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 23da64b4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (28 commits)
  cfq-iosched: add close cooperator code
  cfq-iosched: log responsible 'cfqq' in idle timer arm
  cfq-iosched: tweak kick logic a bit more
  cfq-iosched: no need to save interrupts in cfq_kick_queue()
  brd: fix cacheflushing
  brd: support barriers
  swap: Remove code handling bio_alloc failure with __GFP_WAIT
  gfs2: Remove code handling bio_alloc failure with __GFP_WAIT
  ext4: Remove code handling bio_alloc failure with __GFP_WAIT
  dio: Remove code handling bio_alloc failure with __GFP_WAIT
  block: Remove code handling bio_alloc failure with __GFP_WAIT
  bio: add documentation to bio_alloc()
  splice: add helpers for locking pipe inode
  splice: remove generic_file_splice_write_nolock()
  ocfs2: fix i_mutex locking in ocfs2_splice_to_file()
  splice: fix i_mutex locking in generic_splice_write()
  splice: remove i_mutex locking in splice_from_pipe()
  splice: split up __splice_from_pipe()
  block: fix SG_IO to return a proper error value
  cfq-iosched: don't delay queue kick for a merged request
  ...
parents a23c218b a36e71f9
Loading
Loading
Loading
Loading
+6 −13
Original line number Diff line number Diff line
@@ -1040,23 +1040,21 @@ Front merges are handled by the binary trees in AS and deadline schedulers.
iii. Plugging the queue to batch requests in anticipation of opportunities for
     merge/sort optimizations

This is just the same as in 2.4 so far, though per-device unplugging
support is anticipated for 2.5. Also with a priority-based i/o scheduler,
such decisions could be based on request priorities.

Plugging is an approach that the current i/o scheduling algorithm resorts to so
that it collects up enough requests in the queue to be able to take
advantage of the sorting/merging logic in the elevator. If the
queue is empty when a request comes in, then it plugs the request queue
(sort of like plugging the bottom of a vessel to get fluid to build up)
(sort of like plugging the bath tub of a vessel to get fluid to build up)
till it fills up with a few more requests, before starting to service
the requests. This provides an opportunity to merge/sort the requests before
passing them down to the device. There are various conditions when the queue is
unplugged (to open up the flow again), either through a scheduled task or
could be on demand. For example wait_on_buffer sets the unplugging going
(by running tq_disk) so the read gets satisfied soon. So in the read case,
the queue gets explicitly unplugged as part of waiting for completion,
in fact all queues get unplugged as a side-effect.
through sync_buffer() running blk_run_address_space(mapping). Or the caller
can do it explicity through blk_unplug(bdev). So in the read case,
the queue gets explicitly unplugged as part of waiting for completion on that
buffer. For page driven IO, the address space ->sync_page() takes care of
doing the blk_run_address_space().

Aside:
  This is kind of controversial territory, as it's not clear if plugging is
@@ -1067,11 +1065,6 @@ Aside:
  multi-page bios being queued in one shot, we may not need to wait to merge
  a big request from the broken up pieces coming by.

  Per-queue granularity unplugging (still a Todo) may help reduce some of the
  concerns with just a single tq_disk flush approach. Something like
  blk_kick_queue() to unplug a specific queue (right away ?)
  or optionally, all queues, is in the plan.

4.4 I/O contexts
I/O contexts provide a dynamically allocated per process data area. They may
be used in I/O schedulers, and in the block layer (could be used for IO statis,
+57 −59
Original line number Diff line number Diff line
@@ -17,9 +17,6 @@
#include <linux/rbtree.h>
#include <linux/interrupt.h>

#define REQ_SYNC	1
#define REQ_ASYNC	0

/*
 * See Documentation/block/as-iosched.txt
 */
@@ -93,7 +90,7 @@ struct as_data {
	struct list_head fifo_list[2];

	struct request *next_rq[2];	/* next in sort order */
	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
	sector_t last_sector[2];	/* last SYNC & ASYNC sectors */

	unsigned long exit_prob;	/* probability a task will exit while
					   being waited on */
@@ -109,7 +106,7 @@ struct as_data {
	unsigned long last_check_fifo[2];
	int changed_batch;		/* 1: waiting for old batch to end */
	int new_batch;			/* 1: waiting on first read complete */
	int batch_data_dir;		/* current batch REQ_SYNC / REQ_ASYNC */
	int batch_data_dir;		/* current batch SYNC / ASYNC */
	int write_batch_count;		/* max # of reqs in a write batch */
	int current_write_count;	/* how many requests left this batch */
	int write_batch_idled;		/* has the write batch gone idle? */
@@ -554,7 +551,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
	if (aic == NULL)
		return;

	if (data_dir == REQ_SYNC) {
	if (data_dir == BLK_RW_SYNC) {
		unsigned long in_flight = atomic_read(&aic->nr_queued)
					+ atomic_read(&aic->nr_dispatched);
		spin_lock(&aic->lock);
@@ -811,7 +808,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
 */
static void update_write_batch(struct as_data *ad)
{
	unsigned long batch = ad->batch_expire[REQ_ASYNC];
	unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
	long write_time;

	write_time = (jiffies - ad->current_batch_expires) + batch;
@@ -855,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
		kblockd_schedule_work(q, &ad->antic_work);
		ad->changed_batch = 0;

		if (ad->batch_data_dir == REQ_SYNC)
		if (ad->batch_data_dir == BLK_RW_SYNC)
			ad->new_batch = 1;
	}
	WARN_ON(ad->nr_dispatched == 0);
@@ -869,7 +866,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
		update_write_batch(ad);
		ad->current_batch_expires = jiffies +
				ad->batch_expire[REQ_SYNC];
				ad->batch_expire[BLK_RW_SYNC];
		ad->new_batch = 0;
	}

@@ -960,7 +957,7 @@ static inline int as_batch_expired(struct as_data *ad)
	if (ad->changed_batch || ad->new_batch)
		return 0;

	if (ad->batch_data_dir == REQ_SYNC)
	if (ad->batch_data_dir == BLK_RW_SYNC)
		/* TODO! add a check so a complete fifo gets written? */
		return time_after(jiffies, ad->current_batch_expires);

@@ -986,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
	 */
	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;

	if (data_dir == REQ_SYNC) {
	if (data_dir == BLK_RW_SYNC) {
		struct io_context *ioc = RQ_IOC(rq);
		/* In case we have to anticipate after this */
		copy_io_context(&ad->io_context, &ioc);
@@ -1025,41 +1022,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
static int as_dispatch_request(struct request_queue *q, int force)
{
	struct as_data *ad = q->elevator->elevator_data;
	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
	const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]);
	const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]);
	struct request *rq;

	if (unlikely(force)) {
		/*
		 * Forced dispatch, accounting is useless.  Reset
		 * accounting states and dump fifo_lists.  Note that
		 * batch_data_dir is reset to REQ_SYNC to avoid
		 * batch_data_dir is reset to BLK_RW_SYNC to avoid
		 * screwing write batch accounting as write batch
		 * accounting occurs on W->R transition.
		 */
		int dispatched = 0;

		ad->batch_data_dir = REQ_SYNC;
		ad->batch_data_dir = BLK_RW_SYNC;
		ad->changed_batch = 0;
		ad->new_batch = 0;

		while (ad->next_rq[REQ_SYNC]) {
			as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
		while (ad->next_rq[BLK_RW_SYNC]) {
			as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]);
			dispatched++;
		}
		ad->last_check_fifo[REQ_SYNC] = jiffies;
		ad->last_check_fifo[BLK_RW_SYNC] = jiffies;

		while (ad->next_rq[REQ_ASYNC]) {
			as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
		while (ad->next_rq[BLK_RW_ASYNC]) {
			as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]);
			dispatched++;
		}
		ad->last_check_fifo[REQ_ASYNC] = jiffies;
		ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;

		return dispatched;
	}

	/* Signal that the write batch was uncontended, so we can't time it */
	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
	if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) {
		if (ad->current_write_count == 0 || !writes)
			ad->write_batch_idled = 1;
	}
@@ -1076,8 +1073,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
		 */
		rq = ad->next_rq[ad->batch_data_dir];

		if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
			if (as_fifo_expired(ad, REQ_SYNC))
		if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) {
			if (as_fifo_expired(ad, BLK_RW_SYNC))
				goto fifo_expired;

			if (as_can_anticipate(ad, rq)) {
@@ -1090,7 +1087,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
			/* we have a "next request" */
			if (reads && !writes)
				ad->current_batch_expires =
					jiffies + ad->batch_expire[REQ_SYNC];
					jiffies + ad->batch_expire[BLK_RW_SYNC];
			goto dispatch_request;
		}
	}
@@ -1101,20 +1098,20 @@ static int as_dispatch_request(struct request_queue *q, int force)
	 */

	if (reads) {
		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC]));
		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC]));

		if (writes && ad->batch_data_dir == REQ_SYNC)
		if (writes && ad->batch_data_dir == BLK_RW_SYNC)
			/*
			 * Last batch was a read, switch to writes
			 */
			goto dispatch_writes;

		if (ad->batch_data_dir == REQ_ASYNC) {
		if (ad->batch_data_dir == BLK_RW_ASYNC) {
			WARN_ON(ad->new_batch);
			ad->changed_batch = 1;
		}
		ad->batch_data_dir = REQ_SYNC;
		rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
		ad->batch_data_dir = BLK_RW_SYNC;
		rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next);
		ad->last_check_fifo[ad->batch_data_dir] = jiffies;
		goto dispatch_request;
	}
@@ -1125,9 +1122,9 @@ static int as_dispatch_request(struct request_queue *q, int force)

	if (writes) {
dispatch_writes:
		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC]));
		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC]));

		if (ad->batch_data_dir == REQ_SYNC) {
		if (ad->batch_data_dir == BLK_RW_SYNC) {
			ad->changed_batch = 1;

			/*
@@ -1137,11 +1134,11 @@ static int as_dispatch_request(struct request_queue *q, int force)
			 */
			ad->new_batch = 0;
		}
		ad->batch_data_dir = REQ_ASYNC;
		ad->batch_data_dir = BLK_RW_ASYNC;
		ad->current_write_count = ad->write_batch_count;
		ad->write_batch_idled = 0;
		rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next);
		ad->last_check_fifo[REQ_ASYNC] = jiffies;
		rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next);
		ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
		goto dispatch_request;
	}

@@ -1164,9 +1161,9 @@ static int as_dispatch_request(struct request_queue *q, int force)
		if (ad->nr_dispatched)
			return 0;

		if (ad->batch_data_dir == REQ_ASYNC)
		if (ad->batch_data_dir == BLK_RW_ASYNC)
			ad->current_batch_expires = jiffies +
					ad->batch_expire[REQ_ASYNC];
					ad->batch_expire[BLK_RW_ASYNC];
		else
			ad->new_batch = 1;

@@ -1238,8 +1235,8 @@ static int as_queue_empty(struct request_queue *q)
{
	struct as_data *ad = q->elevator->elevator_data;

	return list_empty(&ad->fifo_list[REQ_ASYNC])
		&& list_empty(&ad->fifo_list[REQ_SYNC]);
	return list_empty(&ad->fifo_list[BLK_RW_ASYNC])
		&& list_empty(&ad->fifo_list[BLK_RW_SYNC]);
}

static int
@@ -1346,8 +1343,8 @@ static void as_exit_queue(struct elevator_queue *e)
	del_timer_sync(&ad->antic_timer);
	cancel_work_sync(&ad->antic_work);

	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
	BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC]));
	BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC]));

	put_io_context(ad->io_context);
	kfree(ad);
@@ -1372,18 +1369,18 @@ static void *as_init_queue(struct request_queue *q)
	init_timer(&ad->antic_timer);
	INIT_WORK(&ad->antic_work, as_work_handler);

	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
	ad->sort_list[REQ_SYNC] = RB_ROOT;
	ad->sort_list[REQ_ASYNC] = RB_ROOT;
	ad->fifo_expire[REQ_SYNC] = default_read_expire;
	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
	INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]);
	INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]);
	ad->sort_list[BLK_RW_SYNC] = RB_ROOT;
	ad->sort_list[BLK_RW_ASYNC] = RB_ROOT;
	ad->fifo_expire[BLK_RW_SYNC] = default_read_expire;
	ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire;
	ad->antic_expire = default_antic_expire;
	ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
	ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
	ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire;
	ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;

	ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
	ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
	ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
	ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10;
	if (ad->write_batch_count < 2)
		ad->write_batch_count = 2;

@@ -1432,11 +1429,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
	struct as_data *ad = e->elevator_data;			\
	return as_var_show(jiffies_to_msecs((__VAR)), (page));	\
}
SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]);
SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]);
SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]);
SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]);
SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]);
SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]);
SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]);
#undef SHOW_FUNCTION

#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
@@ -1451,13 +1448,14 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
	*(__PTR) = msecs_to_jiffies(*(__PTR));				\
	return ret;							\
}
STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX);
STORE_FUNCTION(as_write_expire_store,
			&ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX);
STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
STORE_FUNCTION(as_read_batch_expire_store,
			&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
			&ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX);
STORE_FUNCTION(as_write_batch_expire_store,
			&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
			&ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX);
#undef STORE_FUNCTION

#define AS_ATTR(name) \
+0 −3
Original line number Diff line number Diff line
@@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
		return -ENXIO;

	bio = bio_alloc(GFP_KERNEL, 0);
	if (!bio)
		return -ENOMEM;

	bio->bi_end_io = bio_end_empty_barrier;
	bio->bi_private = &wait;
	bio->bi_bdev = bdev;
+2 −2
Original line number Diff line number Diff line
@@ -209,14 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
	ssize_t ret = queue_var_store(&stats, page, count);

	spin_lock_irq(q->queue_lock);
	elv_quisce_start(q);
	elv_quiesce_start(q);

	if (stats)
		queue_flag_set(QUEUE_FLAG_IO_STAT, q);
	else
		queue_flag_clear(QUEUE_FLAG_IO_STAT, q);

	elv_quisce_end(q);
	elv_quiesce_end(q);
	spin_unlock_irq(q->queue_lock);

	return ret;
+2 −2
Original line number Diff line number Diff line
@@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q);

int blk_dev_init(void);

void elv_quisce_start(struct request_queue *q);
void elv_quisce_end(struct request_queue *q);
void elv_quiesce_start(struct request_queue *q);
void elv_quiesce_end(struct request_queue *q);


/*
Loading