Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bfc835b5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper fixes from Mike Snitzer:
 "Two stable fixes for dm-cache and one 3.19 DM core fix:

   - fix potential for dm-cache metadata corruption via stale metadata
     buffers being used when switching an inactive cache table to
     active; this could occur due to each table having it's own bufio
     client rather than sharing the client between tables.

   - fix dm-cache target to properly account for discard IO while
     suspending otherwise IO quiescing could complete prematurely.

   - fix DM core's handling of multiple internal suspends by maintaining
     an 'internal_suspend_count' and only resuming the device when this
     count drops to zero"

* tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: fix handling of multiple internal suspends
  dm cache: fix problematic dual use of a single migration count variable
  dm cache: share cache-metadata object across inactive and active DM tables
parents 8e908e99 96b26c8c
Loading
Loading
Loading
Loading
+95 −6
Original line number Diff line number Diff line
@@ -94,6 +94,9 @@ struct cache_disk_superblock {
} __packed;

struct dm_cache_metadata {
	atomic_t ref_count;
	struct list_head list;

	struct block_device *bdev;
	struct dm_block_manager *bm;
	struct dm_space_map *metadata_sm;
@@ -669,7 +672,7 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)

/*----------------------------------------------------------------*/

struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
					       sector_t data_block_size,
					       bool may_format_device,
					       size_t policy_hint_size)
@@ -683,6 +686,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
		return NULL;
	}

	atomic_set(&cmd->ref_count, 1);
	init_rwsem(&cmd->root_lock);
	cmd->bdev = bdev;
	cmd->data_block_size = data_block_size;
@@ -705,11 +709,96 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
	return cmd;
}

/*
 * We keep a little list of ref counted metadata objects to prevent two
 * different target instances creating separate bufio instances.  This is
 * an issue if a table is reloaded before the suspend.
 */
static DEFINE_MUTEX(table_lock);
static LIST_HEAD(table);

static struct dm_cache_metadata *lookup(struct block_device *bdev)
{
	struct dm_cache_metadata *cmd;

	list_for_each_entry(cmd, &table, list)
		if (cmd->bdev == bdev) {
			atomic_inc(&cmd->ref_count);
			return cmd;
		}

	return NULL;
}

static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
						sector_t data_block_size,
						bool may_format_device,
						size_t policy_hint_size)
{
	struct dm_cache_metadata *cmd, *cmd2;

	mutex_lock(&table_lock);
	cmd = lookup(bdev);
	mutex_unlock(&table_lock);

	if (cmd)
		return cmd;

	cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
	if (cmd) {
		mutex_lock(&table_lock);
		cmd2 = lookup(bdev);
		if (cmd2) {
			mutex_unlock(&table_lock);
			__destroy_persistent_data_objects(cmd);
			kfree(cmd);
			return cmd2;
		}
		list_add(&cmd->list, &table);
		mutex_unlock(&table_lock);
	}

	return cmd;
}

static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
{
	if (cmd->data_block_size != data_block_size) {
		DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
		      (unsigned long long) data_block_size,
		      (unsigned long long) cmd->data_block_size);
		return false;
	}

	return true;
}

struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
						 sector_t data_block_size,
						 bool may_format_device,
						 size_t policy_hint_size)
{
	struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
						       may_format_device, policy_hint_size);
	if (cmd && !same_params(cmd, data_block_size)) {
		dm_cache_metadata_close(cmd);
		return NULL;
	}

	return cmd;
}

void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
{
	if (atomic_dec_and_test(&cmd->ref_count)) {
		mutex_lock(&table_lock);
		list_del(&cmd->list);
		mutex_unlock(&table_lock);

		__destroy_persistent_data_objects(cmd);
		kfree(cmd);
	}
}

/*
 * Checks that the given cache block is either unmapped or clean.
+50 −39
Original line number Diff line number Diff line
@@ -221,7 +221,13 @@ struct cache {
	struct list_head need_commit_migrations;
	sector_t migration_threshold;
	wait_queue_head_t migration_wait;
	atomic_t nr_migrations;
	atomic_t nr_allocated_migrations;

	/*
	 * The number of in flight migrations that are performing
	 * background io. eg, promotion, writeback.
	 */
	atomic_t nr_io_migrations;

	wait_queue_head_t quiescing_wait;
	atomic_t quiescing;
@@ -258,7 +264,6 @@ struct cache {
	struct dm_deferred_set *all_io_ds;

	mempool_t *migration_pool;
	struct dm_cache_migration *next_migration;

	struct dm_cache_policy *policy;
	unsigned policy_nr_args;
@@ -350,10 +355,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel
	dm_bio_prison_free_cell(cache->prison, cell);
}

static struct dm_cache_migration *alloc_migration(struct cache *cache)
{
	struct dm_cache_migration *mg;

	mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
	if (mg) {
		mg->cache = cache;
		atomic_inc(&mg->cache->nr_allocated_migrations);
	}

	return mg;
}

static void free_migration(struct dm_cache_migration *mg)
{
	if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
		wake_up(&mg->cache->migration_wait);

	mempool_free(mg, mg->cache->migration_pool);
}

static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
{
	if (!p->mg) {
		p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
		p->mg = alloc_migration(cache);
		if (!p->mg)
			return -ENOMEM;
	}
@@ -382,7 +408,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
		free_prison_cell(cache, p->cell1);

	if (p->mg)
		mempool_free(p->mg, cache->migration_pool);
		free_migration(p->mg);
}

static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
@@ -854,24 +880,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
 * Migration covers moving data from the origin device to the cache, or
 * vice versa.
 *--------------------------------------------------------------*/
static void free_migration(struct dm_cache_migration *mg)
{
	mempool_free(mg, mg->cache->migration_pool);
}

static void inc_nr_migrations(struct cache *cache)
static void inc_io_migrations(struct cache *cache)
{
	atomic_inc(&cache->nr_migrations);
	atomic_inc(&cache->nr_io_migrations);
}

static void dec_nr_migrations(struct cache *cache)
static void dec_io_migrations(struct cache *cache)
{
	atomic_dec(&cache->nr_migrations);

	/*
	 * Wake the worker in case we're suspending the target.
	 */
	wake_up(&cache->migration_wait);
	atomic_dec(&cache->nr_io_migrations);
}

static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
@@ -894,11 +910,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
	wake_worker(cache);
}

static void cleanup_migration(struct dm_cache_migration *mg)
static void free_io_migration(struct dm_cache_migration *mg)
{
	struct cache *cache = mg->cache;
	dec_io_migrations(mg->cache);
	free_migration(mg);
	dec_nr_migrations(cache);
}

static void migration_failure(struct dm_cache_migration *mg)
@@ -923,7 +938,7 @@ static void migration_failure(struct dm_cache_migration *mg)
		cell_defer(cache, mg->new_ocell, true);
	}

	cleanup_migration(mg);
	free_io_migration(mg);
}

static void migration_success_pre_commit(struct dm_cache_migration *mg)
@@ -934,7 +949,7 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
	if (mg->writeback) {
		clear_dirty(cache, mg->old_oblock, mg->cblock);
		cell_defer(cache, mg->old_ocell, false);
		cleanup_migration(mg);
		free_io_migration(mg);
		return;

	} else if (mg->demote) {
@@ -944,14 +959,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
					     mg->old_oblock);
			if (mg->promote)
				cell_defer(cache, mg->new_ocell, true);
			cleanup_migration(mg);
			free_io_migration(mg);
			return;
		}
	} else {
		if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
			DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
			policy_remove_mapping(cache->policy, mg->new_oblock);
			cleanup_migration(mg);
			free_io_migration(mg);
			return;
		}
	}
@@ -984,7 +999,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
		} else {
			if (mg->invalidate)
				policy_remove_mapping(cache->policy, mg->old_oblock);
			cleanup_migration(mg);
			free_io_migration(mg);
		}

	} else {
@@ -999,7 +1014,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
			bio_endio(mg->new_ocell->holder, 0);
			cell_defer(cache, mg->new_ocell, false);
		}
		cleanup_migration(mg);
		free_io_migration(mg);
	}
}

@@ -1251,7 +1266,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
	mg->new_ocell = cell;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	inc_io_migrations(cache);
	quiesce_migration(mg);
}

@@ -1275,7 +1290,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
	mg->new_ocell = NULL;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	inc_io_migrations(cache);
	quiesce_migration(mg);
}

@@ -1302,7 +1317,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
	mg->new_ocell = new_ocell;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	inc_io_migrations(cache);
	quiesce_migration(mg);
}

@@ -1330,7 +1345,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
	mg->new_ocell = NULL;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	inc_io_migrations(cache);
	quiesce_migration(mg);
}

@@ -1412,7 +1427,7 @@ static void process_discard_bio(struct cache *cache, struct prealloc *structs,

static bool spare_migration_bandwidth(struct cache *cache)
{
	sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
		cache->sectors_per_block;
	return current_volume < cache->migration_threshold;
}
@@ -1764,7 +1779,7 @@ static void stop_quiescing(struct cache *cache)

static void wait_for_migrations(struct cache *cache)
{
	wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
	wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
}

static void stop_worker(struct cache *cache)
@@ -1876,9 +1891,6 @@ static void destroy(struct cache *cache)
{
	unsigned i;

	if (cache->next_migration)
		mempool_free(cache->next_migration, cache->migration_pool);

	if (cache->migration_pool)
		mempool_destroy(cache->migration_pool);

@@ -2424,7 +2436,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
	INIT_LIST_HEAD(&cache->quiesced_migrations);
	INIT_LIST_HEAD(&cache->completed_migrations);
	INIT_LIST_HEAD(&cache->need_commit_migrations);
	atomic_set(&cache->nr_migrations, 0);
	atomic_set(&cache->nr_allocated_migrations, 0);
	atomic_set(&cache->nr_io_migrations, 0);
	init_waitqueue_head(&cache->migration_wait);

	init_waitqueue_head(&cache->quiescing_wait);
@@ -2487,8 +2500,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
		goto bad;
	}

	cache->next_migration = NULL;

	cache->need_tick_bio = true;
	cache->sized = false;
	cache->invalidate = false;
+7 −2
Original line number Diff line number Diff line
@@ -206,6 +206,9 @@ struct mapped_device {
	/* zero-length flush that will be cloned and submitted to targets */
	struct bio flush_bio;

	/* the number of internal suspends */
	unsigned internal_suspend_count;

	struct dm_stats stats;
};

@@ -2928,7 +2931,7 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
{
	struct dm_table *map = NULL;

	if (dm_suspended_internally_md(md))
	if (md->internal_suspend_count++)
		return; /* nested internal suspend */

	if (dm_suspended_md(md)) {
@@ -2953,7 +2956,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla

static void __dm_internal_resume(struct mapped_device *md)
{
	if (!dm_suspended_internally_md(md))
	BUG_ON(!md->internal_suspend_count);

	if (--md->internal_suspend_count)
		return; /* resume from nested internal suspend */

	if (dm_suspended_md(md))