Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ba368991 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper changes from Mike Snitzer:

 - Allow the thin target to paired with any size external origin; also
   allow thin snapshots to be larger than the external origin.

 - Add support for quickly loading a repetitive pattern into the
   dm-switch target.

 - Use per-bio data in the dm-crypt target instead of always using a
   mempool for each allocation.  Required switching to kmalloc alignment
   for the bio slab.

 - Fix DM core to properly stack the QUEUE_FLAG_NO_SG_MERGE flag

 - Fix the dm-cache and dm-thin targets' export of the minimum_io_size
   to match the data block size -- this fixes an issue where mkfs.xfs
   would improperly infer raid striping was in place on the underlying
   storage.

 - Small cleanups in dm-io, dm-mpath and dm-cache

* tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm table: propagate QUEUE_FLAG_NO_SG_MERGE
  dm switch: efficiently support repetitive patterns
  dm switch: factor out switch_region_table_read
  dm cache: set minimum_io_size to cache's data block size
  dm thin: set minimum_io_size to pool's data block size
  dm crypt: use per-bio data
  block: use kmalloc alignment for bio slab
  dm table: make dm_table_supports_discards static
  dm cache metadata: use dm-space-map-metadata.h defined size limits
  dm cache: fail migrations in the do_worker error path
  dm cache: simplify deferred set reference count increments
  dm thin: relax external origin size constraints
  dm thin: switch to an atomic_t for tracking pending new block preparations
  dm mpath: eliminate pg_ready() wrapper
  dm io: simplify dec_count and sync_io
parents a8e4def6 200612ec
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -106,6 +106,11 @@ which paths.
    The path number in the range 0 ... (<num_paths> - 1).
    Expressed in hexadecimal (WITHOUT any prefix like 0x).

R<n>,<m>
    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
    slots.

Status
======

@@ -124,3 +129,10 @@ Create a switch device with 64kB region size:
Set mappings for the first 7 entries to point to devices switch0, switch1,
switch2, switch0, switch1, switch2, switch1:
    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1

Set repetitive mapping. This command:
    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
is equivalent to:
    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
+2 −1
Original line number Diff line number Diff line
@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
	bslab = &bio_slabs[entry];

	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
	slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
	slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
				 SLAB_HWCACHE_ALIGN, NULL);
	if (!slab)
		goto out_unlock;

+2 −2
Original line number Diff line number Diff line
@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
	disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
	disk_super->cache_blocks = cpu_to_le32(0);

@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
					    bool may_format_device)
{
	int r;
	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
					  CACHE_METADATA_CACHE_SIZE,
					  CACHE_MAX_CONCURRENT_LOCKS);
	if (IS_ERR(cmd->bm)) {
+3 −5
Original line number Diff line number Diff line
@@ -9,19 +9,17 @@

#include "dm-cache-block-types.h"
#include "dm-cache-policy-internal.h"
#include "persistent-data/dm-space-map-metadata.h"

/*----------------------------------------------------------------*/

#define DM_CACHE_METADATA_BLOCK_SIZE 4096
#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE

/* FIXME: remove this restriction */
/*
 * The metadata device is currently limited in size.
 *
 * We have one block of index, which can hold 255 index entries.  Each
 * index entry contains allocation info about 16k metadata blocks.
 */
#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS

/*
 * A metadata device larger than 16GB triggers a warning.
+80 −48
Original line number Diff line number Diff line
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
	return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
}

/*
 * You must increment the deferred set whilst the prison cell is held.  To
 * encourage this, we ask for 'cell' to be passed in.
 */
static void inc_ds(struct cache *cache, struct bio *bio,
		   struct dm_bio_prison_cell *cell)
{
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);

	BUG_ON(!cell);
	BUG_ON(pb->all_io_entry);

	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
}

static void issue(struct cache *cache, struct bio *bio)
{
	unsigned long flags;
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
	spin_unlock_irqrestore(&cache->lock, flags);
}

static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
{
	inc_ds(cache, bio, cell);
	issue(cache, bio);
}

static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
	unsigned long flags;
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)

	dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
	remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);

	/*
	 * No need to inc_ds() here, since the cell will be held for the
	 * duration of the io.
	 */
	generic_make_request(bio);
}

@@ -1115,7 +1142,6 @@ static void check_for_quiesced_migrations(struct cache *cache,
		return;

	INIT_LIST_HEAD(&work);
	if (pb->all_io_entry)
	dm_deferred_entry_dec(pb->all_io_entry, &work);

	if (!list_empty(&work))
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
	else
		remap_to_cache(cache, bio, 0);

	/*
	 * REQ_FLUSH is not directed at any particular block so we don't
	 * need to inc_ds().  REQ_FUA's are split into a write + REQ_FLUSH
	 * by dm-core.
	 */
	issue(cache, bio);
}

@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
		   &cache->stats.read_miss : &cache->stats.write_miss);
}

static void issue_cache_bio(struct cache *cache, struct bio *bio,
			    struct per_bio_data *pb,
			    dm_oblock_t oblock, dm_cblock_t cblock)
{
	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
	remap_to_cache_dirty(cache, bio, oblock, cblock);
	issue(cache, bio);
}

static void process_bio(struct cache *cache, struct prealloc *structs,
			struct bio *bio)
{
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
	dm_oblock_t block = get_bio_block(cache, bio);
	struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
	struct policy_result lookup_result;
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	bool discarded_block = is_discarded_oblock(cache, block);
	bool passthrough = passthrough_mode(&cache->features);
	bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,

			} else {
				/* FIXME: factor out issue_origin() */
				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
				remap_to_origin_clear_discard(cache, bio, block);
				issue(cache, bio);
				inc_and_issue(cache, bio, new_ocell);
			}
		} else {
			inc_hit_counter(cache, bio);
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
			if (bio_data_dir(bio) == WRITE &&
			    writethrough_mode(&cache->features) &&
			    !is_dirty(cache, lookup_result.cblock)) {
				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
				issue(cache, bio);
			} else
				issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
				inc_and_issue(cache, bio, new_ocell);

			} else  {
				remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
				inc_and_issue(cache, bio, new_ocell);
			}
		}

		break;

	case POLICY_MISS:
		inc_miss_counter(cache, bio);
		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
		remap_to_origin_clear_discard(cache, bio, block);
		issue(cache, bio);
		inc_and_issue(cache, bio, new_ocell);
		break;

	case POLICY_NEW:
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
	bio_list_init(&cache->deferred_flush_bios);
	spin_unlock_irqrestore(&cache->lock, flags);

	/*
	 * These bios have already been through inc_ds()
	 */
	while ((bio = bio_list_pop(&bios)))
		submit_bios ? generic_make_request(bio) : bio_io_error(bio);
}
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
	bio_list_init(&cache->deferred_writethrough_bios);
	spin_unlock_irqrestore(&cache->lock, flags);

	/*
	 * These bios have already been through inc_ds()
	 */
	while ((bio = bio_list_pop(&bios)))
		generic_make_request(bio);
}
@@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws)

		if (commit_if_needed(cache)) {
			process_deferred_flush_bios(cache, false);
			process_migrations(cache, &cache->need_commit_migrations, migration_failure);

			/*
			 * FIXME: rollback metadata or just go into a
@@ -2406,16 +2433,13 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
	return r;
}

static int cache_map(struct dm_target *ti, struct bio *bio)
static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
{
	struct cache *cache = ti->private;

	int r;
	dm_oblock_t block = get_bio_block(cache, bio);
	size_t pb_data_size = get_per_bio_data_size(cache);
	bool can_migrate = false;
	bool discarded_block;
	struct dm_bio_prison_cell *cell;
	struct policy_result lookup_result;
	struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);

@@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
	/*
	 * Check to see if that block is currently migrating.
	 */
	cell = alloc_prison_cell(cache);
	if (!cell) {
	*cell = alloc_prison_cell(cache);
	if (!*cell) {
		defer_bio(cache, bio);
		return DM_MAPIO_SUBMITTED;
	}

	r = bio_detain(cache, block, bio, cell,
	r = bio_detain(cache, block, bio, *cell,
		       (cell_free_fn) free_prison_cell,
		       cache, &cell);
		       cache, cell);
	if (r) {
		if (r < 0)
			defer_bio(cache, bio);
@@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
	r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
		       bio, &lookup_result);
	if (r == -EWOULDBLOCK) {
		cell_defer(cache, cell, true);
		cell_defer(cache, *cell, true);
		return DM_MAPIO_SUBMITTED;

	} else if (r) {
		DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
		cell_defer(cache, *cell, false);
		bio_io_error(bio);
		return DM_MAPIO_SUBMITTED;
	}
@@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
				 * We need to invalidate this block, so
				 * defer for the worker thread.
				 */
				cell_defer(cache, cell, true);
				cell_defer(cache, *cell, true);
				r = DM_MAPIO_SUBMITTED;

			} else {
				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
				inc_miss_counter(cache, bio);
				remap_to_origin_clear_discard(cache, bio, block);

				cell_defer(cache, cell, false);
			}

		} else {
			inc_hit_counter(cache, bio);
			pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);

			if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
			    !is_dirty(cache, lookup_result.cblock))
				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
			else
				remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);

			cell_defer(cache, cell, false);
		}
		break;

	case POLICY_MISS:
		inc_miss_counter(cache, bio);
		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);

		if (pb->req_nr != 0) {
			/*
			 * This is a duplicate writethrough io that is no
			 * longer needed because the block has been demoted.
			 */
			bio_endio(bio, 0);
			cell_defer(cache, cell, false);
			return DM_MAPIO_SUBMITTED;
		} else {
			cell_defer(cache, *cell, false);
			r = DM_MAPIO_SUBMITTED;

		} else
			remap_to_origin_clear_discard(cache, bio, block);
			cell_defer(cache, cell, false);
		}

		break;

	default:
		DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
			    (unsigned) lookup_result.op);
		cell_defer(cache, *cell, false);
		bio_io_error(bio);
		r = DM_MAPIO_SUBMITTED;
	}
@@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
	return r;
}

static int cache_map(struct dm_target *ti, struct bio *bio)
{
	int r;
	struct dm_bio_prison_cell *cell;
	struct cache *cache = ti->private;

	r = __cache_map(cache, bio, &cell);
	if (r == DM_MAPIO_REMAPPED) {
		inc_ds(cache, bio, cell);
		cell_defer(cache, cell, false);
	}

	return r;
}

static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
{
	struct cache *cache = ti->private;
@@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
		residency = policy_residency(cache->policy);

		DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
		       (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
		       (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
		       (unsigned long long)nr_blocks_metadata,
		       cache->sectors_per_block,
@@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
	 */
	if (io_opt_sectors < cache->sectors_per_block ||
	    do_div(io_opt_sectors, cache->sectors_per_block)) {
		blk_limits_io_min(limits, 0);
		blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
		blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
	}
	set_discard_limits(cache, limits);
@@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type cache_target = {
	.name = "cache",
	.version = {1, 4, 0},
	.version = {1, 5, 0},
	.module = THIS_MODULE,
	.ctr = cache_ctr,
	.dtr = cache_dtr,
Loading