Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d87f4c14 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

dm: implement REQ_FLUSH/FUA support for bio-based dm



This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
now deprecated REQ_HARDBARRIER.

* -EOPNOTSUPP handling logic dropped.

* Preflush is handled as before but postflush is dropped and replaced
  with passing down REQ_FUA to member request_queues.  This replaces
  one array wide cache flush w/ member specific FUA writes.

* __split_and_process_bio() now calls __clone_and_map_flush() directly
  for flushes and guarantees all FLUSH bio's going to targets are zero
`  length.

* It's now guaranteed that all FLUSH bio's which are passed onto dm
  targets are zero length.  bio_empty_barrier() tests are replaced
  with REQ_FLUSH tests.

* Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.

* Dropped unlikely() around REQ_FLUSH tests.  Flushes are not unlikely
  enough to be marked with unlikely().

* Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
  doesn't support cache flushing.  Advertise REQ_FLUSH | REQ_FUA
  capability.

* Request based dm isn't converted yet.  dm_init_request_based_queue()
  resets flush support to 0 for now.  To avoid disturbing request
  based dm code, dm->flush_error is added for bio based dm while
  requested based dm continues to use dm->barrier_error.

Lightly tested linear, stripe, raid1, snap and crypt targets.  Please
proceed with caution as I'm not familiar with the code base.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: dm-devel@redhat.com
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <jaxboe@fusionio.com>
parent 3a2edd0d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
	struct dm_crypt_io *io;
	struct crypt_config *cc;

	if (unlikely(bio_empty_barrier(bio))) {
	if (bio->bi_rw & REQ_FLUSH) {
		cc = ti->private;
		bio->bi_bdev = cc->dev->bdev;
		return DM_MAPIO_REMAPPED;
+4 −16
Original line number Diff line number Diff line
@@ -31,7 +31,6 @@ struct dm_io_client {
 */
struct io {
	unsigned long error_bits;
	unsigned long eopnotsupp_bits;
	atomic_t count;
	struct task_struct *sleeper;
	struct dm_io_client *client;
@@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
 *---------------------------------------------------------------*/
static void dec_count(struct io *io, unsigned int region, int error)
{
	if (error) {
	if (error)
		set_bit(region, &io->error_bits);
		if (error == -EOPNOTSUPP)
			set_bit(region, &io->eopnotsupp_bits);
	}

	if (atomic_dec_and_test(&io->count)) {
		if (io->sleeper)
@@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
	sector_t remaining = where->count;

	/*
	 * where->count may be zero if rw holds a write barrier and we
	 * need to send a zero-sized barrier.
	 * where->count may be zero if rw holds a flush and we need to
	 * send a zero-sized flush.
	 */
	do {
		/*
@@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
	 */
	for (i = 0; i < num_regions; i++) {
		*dp = old_pages;
		if (where[i].count || (rw & REQ_HARDBARRIER))
		if (where[i].count || (rw & REQ_FLUSH))
			do_region(rw, i, where + i, dp, io);
	}

@@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
		return -EIO;
	}

retry:
	io->error_bits = 0;
	io->eopnotsupp_bits = 0;
	atomic_set(&io->count, 1); /* see dispatch_io() */
	io->sleeper = current;
	io->client = client;
@@ -412,11 +406,6 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
	}
	set_current_state(TASK_RUNNING);

	if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
		rw &= ~REQ_HARDBARRIER;
		goto retry;
	}

	if (error_bits)
		*error_bits = io->error_bits;

@@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,

	io = mempool_alloc(client->pool, GFP_NOIO);
	io->error_bits = 0;
	io->eopnotsupp_bits = 0;
	atomic_set(&io->count, 1); /* see dispatch_io() */
	io->sleeper = NULL;
	io->client = client;
+1 −1
Original line number Diff line number Diff line
@@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc)
		.count = 0,
	};

	lc->io_req.bi_rw = WRITE_BARRIER;
	lc->io_req.bi_rw = WRITE_FLUSH;

	return dm_io(&lc->io_req, 1, &null_location, NULL);
}
+4 −4
Original line number Diff line number Diff line
@@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti)
	struct dm_io_region io[ms->nr_mirrors];
	struct mirror *m;
	struct dm_io_request io_req = {
		.bi_rw = WRITE_BARRIER,
		.bi_rw = WRITE_FLUSH,
		.mem.type = DM_IO_KMEM,
		.mem.ptr.bvec = NULL,
		.client = ms->io_client,
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
	struct dm_io_region io[ms->nr_mirrors], *dest = io;
	struct mirror *m;
	struct dm_io_request io_req = {
		.bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
		.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
		.mem.type = DM_IO_BVEC,
		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
		.notify.fn = write_callback,
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
	bio_list_init(&requeue);

	while ((bio = bio_list_pop(writes))) {
		if (unlikely(bio_empty_barrier(bio))) {
		if (bio->bi_rw & REQ_FLUSH) {
			bio_list_add(&sync, bio);
			continue;
		}
@@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
	 * We need to dec pending if this was a write.
	 */
	if (rw == WRITE) {
		if (likely(!bio_empty_barrier(bio)))
		if (!(bio->bi_rw & REQ_FLUSH))
			dm_rh_dec(ms->rh, map_context->ll);
		return error;
	}
+8 −8
Original line number Diff line number Diff line
@@ -81,9 +81,9 @@ struct dm_region_hash {
	struct list_head failed_recovered_regions;

	/*
	 * If there was a barrier failure no regions can be marked clean.
	 * If there was a flush failure no regions can be marked clean.
	 */
	int barrier_failure;
	int flush_failure;

	void *context;
	sector_t target_begin;
@@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create(
	INIT_LIST_HEAD(&rh->quiesced_regions);
	INIT_LIST_HEAD(&rh->recovered_regions);
	INIT_LIST_HEAD(&rh->failed_recovered_regions);
	rh->barrier_failure = 0;
	rh->flush_failure = 0;

	rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
						      sizeof(struct dm_region));
@@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
	region_t region = dm_rh_bio_to_region(rh, bio);
	int recovering = 0;

	if (bio_empty_barrier(bio)) {
		rh->barrier_failure = 1;
	if (bio->bi_rw & REQ_FLUSH) {
		rh->flush_failure = 1;
		return;
	}

@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
	struct bio *bio;

	for (bio = bios->head; bio; bio = bio->bi_next) {
		if (bio_empty_barrier(bio))
		if (bio->bi_rw & REQ_FLUSH)
			continue;
		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
	}
@@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
		 */

		/* do nothing for DM_RH_NOSYNC */
		if (unlikely(rh->barrier_failure)) {
		if (unlikely(rh->flush_failure)) {
			/*
			 * If a write barrier failed some time ago, we
			 * If a write flush failed some time ago, we
			 * don't know whether or not this write made it
			 * to the disk, so we must resync the device.
			 */
Loading