Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1e1a4e8f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper update from Mike Snitzer:

 - a couple small cleanups in dm-cache, dm-verity, persistent-data's
   dm-btree, and DM core.

 - a 4.1-stable fix for dm-cache that fixes the leaking of deferred bio
   prison cells

 - a 4.2-stable fix that adds feature reporting for the dm-stats
   features added in 4.2

 - improve DM-snapshot to not invalidate the on-disk snapshot if
   snapshot device write overflow occurs; but a write overflow triggered
   through the origin device will still invalidate the snapshot.

 - optimize DM-thinp's async discard submission a bit now that late bio
   splitting has been included in block core.

 - switch DM-cache's SMQ policy lock from using a mutex to a spinlock;
   improves performance on very low latency devices (eg. NVMe SSD).

 - document DM RAID 4/5/6's discard support

[ I did not pull the slab changes, which weren't appropriate for this
  tree, and weren't obviously the right thing to do anyway.  At the very
  least they need some discussion and explanation before getting merged.

  Because not pulling the actual tagged commit but doing a partial pull
  instead, this merge commit thus also obviously is missing the git
  signature from the original tag ]

* tag 'dm-4.3-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: fix use after freeing migrations
  dm cache: small cleanups related to deferred prison cell cleanup
  dm cache: fix leaking of deferred bio prison cells
  dm raid: document RAID 4/5/6 discard support
  dm stats: report precise_timestamps and histogram in @stats_list output
  dm thin: optimize async discard submission
  dm snapshot: don't invalidate on-disk image on snapshot write overflow
  dm: remove unlikely() before IS_ERR()
  dm: do not override error code returned from dm_get_device()
  dm: test return value for DM_MAPIO_SUBMITTED
  dm verity: remove unused mempool
  dm cache: move wake_waker() from free_migrations() to where it is needed
  dm btree remove: remove unused function get_nr_entries()
  dm btree: remove unused "dm_block_t root" parameter in btree_split_sibling()
  dm cache policy smq: change the mutex to a spinlock
parents d975f309 cc7da0ba
Loading
Loading
Loading
Loading
+31 −0
Original line number Diff line number Diff line
@@ -209,6 +209,37 @@ include:
	"repair" - Initiate a repair of the array.
	"reshape"- Currently unsupported (-EINVAL).


Discard Support
---------------
The implementation of discard support among hardware vendors varies.
When a block is discarded, some storage devices will return zeroes when
the block is read.  These devices set the 'discard_zeroes_data'
attribute.  Other devices will return random data.  Confusingly, some
devices that advertise 'discard_zeroes_data' will not reliably return
zeroes when discarded blocks are read!  Since RAID 4/5/6 uses blocks
from a number of devices to calculate parity blocks and (for performance
reasons) relies on 'discard_zeroes_data' being reliable, it is important
that the devices be consistent.  Blocks may be discarded in the middle
of a RAID 4/5/6 stripe and if subsequent read results are not
consistent, the parity blocks may be calculated differently at any time;
making the parity blocks useless for redundancy.  It is important to
understand how your hardware behaves with discards if you are going to
enable discards with RAID 4/5/6.

Since the behavior of storage devices is unreliable in this respect,
even when reporting 'discard_zeroes_data', by default RAID 4/5/6
discard support is disabled -- this ensures data integrity at the
expense of losing some performance.

Storage devices that properly support 'discard_zeroes_data' are
increasingly whitelisted in the kernel and can thus be trusted.

For trusted devices, the following dm-raid module parameter can be set
to safely enable discard support for RAID 4/5/6:
    'devices_handle_discards_safely'


Version History
---------------
1.0.0	Initial version.  Support for RAID 4/5/6
+4 −0
Original line number Diff line number Diff line
@@ -121,6 +121,10 @@ Messages

	Output format:
	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
	        precise_timestamps histogram:n1,n2,n3,...

	The strings "precise_timestamps" and "histogram" are printed only
	if they were specified when creating the region.

    @stats_print <region_id> [<starting_line> <number_of_lines>]

+39 −71
Original line number Diff line number Diff line
@@ -772,7 +772,7 @@ struct smq_policy {
	struct dm_cache_policy policy;

	/* protects everything */
	struct mutex lock;
	spinlock_t lock;
	dm_cblock_t cache_size;
	sector_t cache_block_size;

@@ -807,13 +807,7 @@ struct smq_policy {
	/*
	 * Keeps track of time, incremented by the core.  We use this to
	 * avoid attributing multiple hits within the same tick.
	 *
	 * Access to tick_protected should be done with the spin lock held.
	 * It's copied to tick at the start of the map function (within the
	 * mutex).
	 */
	spinlock_t tick_lock;
	unsigned tick_protected;
	unsigned tick;

	/*
@@ -1296,46 +1290,20 @@ static void smq_destroy(struct dm_cache_policy *p)
	kfree(mq);
}

static void copy_tick(struct smq_policy *mq)
{
	unsigned long flags, tick;

	spin_lock_irqsave(&mq->tick_lock, flags);
	tick = mq->tick_protected;
	if (tick != mq->tick) {
		update_sentinels(mq);
		end_hotspot_period(mq);
		end_cache_period(mq);
		mq->tick = tick;
	}
	spin_unlock_irqrestore(&mq->tick_lock, flags);
}

static bool maybe_lock(struct smq_policy *mq, bool can_block)
{
	if (can_block) {
		mutex_lock(&mq->lock);
		return true;
	} else
		return mutex_trylock(&mq->lock);
}

static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
		   bool can_block, bool can_migrate, bool fast_promote,
		   struct bio *bio, struct policy_locker *locker,
		   struct policy_result *result)
{
	int r;
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);

	result->op = POLICY_MISS;

	if (!maybe_lock(mq, can_block))
		return -EWOULDBLOCK;

	copy_tick(mq);
	spin_lock_irqsave(&mq->lock, flags);
	r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);

	return r;
}
@@ -1343,20 +1311,18 @@ static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
{
	int r;
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);
	struct entry *e;

	if (!mutex_trylock(&mq->lock))
		return -EWOULDBLOCK;

	spin_lock_irqsave(&mq->lock, flags);
	e = h_lookup(&mq->table, oblock);
	if (e) {
		*cblock = infer_cblock(mq, e);
		r = 0;
	} else
		r = -ENOENT;

	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);

	return r;
}
@@ -1375,20 +1341,22 @@ static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, boo

static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
{
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	__smq_set_clear_dirty(mq, oblock, true);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);
}

static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
{
	struct smq_policy *mq = to_smq_policy(p);
	unsigned long flags;

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	__smq_set_clear_dirty(mq, oblock, false);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);
}

static int smq_load_mapping(struct dm_cache_policy *p,
@@ -1433,14 +1401,14 @@ static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
	struct smq_policy *mq = to_smq_policy(p);
	int r = 0;

	mutex_lock(&mq->lock);

	/*
	 * We don't need to lock here since this method is only called once
	 * the IO has stopped.
	 */
	r = smq_save_hints(mq, &mq->clean, fn, context);
	if (!r)
		r = smq_save_hints(mq, &mq->dirty, fn, context);

	mutex_unlock(&mq->lock);

	return r;
}

@@ -1458,10 +1426,11 @@ static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
{
	struct smq_policy *mq = to_smq_policy(p);
	unsigned long flags;

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	__remove_mapping(mq, oblock);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);
}

static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
@@ -1480,11 +1449,12 @@ static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
{
	int r;
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	r = __remove_cblock(mq, cblock);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);

	return r;
}
@@ -1537,11 +1507,12 @@ static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
			      dm_cblock_t *cblock, bool critical_only)
{
	int r;
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	r = __smq_writeback_work(mq, oblock, cblock, critical_only);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);

	return r;
}
@@ -1562,21 +1533,23 @@ static void __force_mapping(struct smq_policy *mq,
static void smq_force_mapping(struct dm_cache_policy *p,
			      dm_oblock_t current_oblock, dm_oblock_t new_oblock)
{
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	__force_mapping(mq, current_oblock, new_oblock);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);
}

static dm_cblock_t smq_residency(struct dm_cache_policy *p)
{
	dm_cblock_t r;
	unsigned long flags;
	struct smq_policy *mq = to_smq_policy(p);

	mutex_lock(&mq->lock);
	spin_lock_irqsave(&mq->lock, flags);
	r = to_cblock(mq->cache_alloc.nr_allocated);
	mutex_unlock(&mq->lock);
	spin_unlock_irqrestore(&mq->lock, flags);

	return r;
}
@@ -1586,15 +1559,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block)
	struct smq_policy *mq = to_smq_policy(p);
	unsigned long flags;

	spin_lock_irqsave(&mq->tick_lock, flags);
	mq->tick_protected++;
	spin_unlock_irqrestore(&mq->tick_lock, flags);

	if (can_block) {
		mutex_lock(&mq->lock);
		copy_tick(mq);
		mutex_unlock(&mq->lock);
	}
	spin_lock_irqsave(&mq->lock, flags);
	mq->tick++;
	update_sentinels(mq);
	end_hotspot_period(mq);
	end_cache_period(mq);
	spin_unlock_irqrestore(&mq->lock, flags);
}

/* Init the policy plugin interface function pointers. */
@@ -1694,10 +1664,8 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
	} else
		mq->cache_hit_bits = NULL;

	mq->tick_protected = 0;
	mq->tick = 0;
	mutex_init(&mq->lock);
	spin_lock_init(&mq->tick_lock);
	spin_lock_init(&mq->lock);

	q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
	mq->hotspot.nr_top_levels = 8;
+19 −20
Original line number Diff line number Diff line
@@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg)
		wake_up(&cache->migration_wait);

	mempool_free(mg, cache->migration_pool);
	wake_worker(cache);
}

static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
@@ -1064,14 +1063,6 @@ static void dec_io_migrations(struct cache *cache)
	atomic_dec(&cache->nr_io_migrations);
}

static void __cell_release(struct cache *cache, struct dm_bio_prison_cell *cell,
			   bool holder, struct bio_list *bios)
{
	(holder ? dm_cell_release : dm_cell_release_no_holder)
		(cache->prison, cell, bios);
	free_prison_cell(cache, cell);
}

static bool discard_or_flush(struct bio *bio)
{
	return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD);
@@ -1079,14 +1070,13 @@ static bool discard_or_flush(struct bio *bio)

static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
{
	if (discard_or_flush(cell->holder))
	if (discard_or_flush(cell->holder)) {
		/*
		 * We have to handle these bios
		 * individually.
		 * We have to handle these bios individually.
		 */
		__cell_release(cache, cell, true, &cache->deferred_bios);

	else
		dm_cell_release(cache->prison, cell, &cache->deferred_bios);
		free_prison_cell(cache, cell);
	} else
		list_add_tail(&cell->user_list, &cache->deferred_cells);
}

@@ -1113,7 +1103,7 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, boo
static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err)
{
	dm_cell_error(cache->prison, cell, err);
	dm_bio_prison_free_cell(cache->prison, cell);
	free_prison_cell(cache, cell);
}

static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)
@@ -1123,8 +1113,11 @@ static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)

static void free_io_migration(struct dm_cache_migration *mg)
{
	dec_io_migrations(mg->cache);
	struct cache *cache = mg->cache;

	dec_io_migrations(cache);
	free_migration(mg);
	wake_worker(cache);
}

static void migration_failure(struct dm_cache_migration *mg)
@@ -1351,16 +1344,18 @@ static void issue_discard(struct dm_cache_migration *mg)
{
	dm_dblock_t b, e;
	struct bio *bio = mg->new_ocell->holder;
	struct cache *cache = mg->cache;

	calc_discard_block_range(mg->cache, bio, &b, &e);
	calc_discard_block_range(cache, bio, &b, &e);
	while (b != e) {
		set_discard(mg->cache, b);
		set_discard(cache, b);
		b = to_dblock(from_dblock(b) + 1);
	}

	bio_endio(bio);
	cell_defer(mg->cache, mg->new_ocell, false);
	cell_defer(cache, mg->new_ocell, false);
	free_migration(mg);
	wake_worker(cache);
}

static void issue_copy_or_discard(struct dm_cache_migration *mg)
@@ -1729,6 +1724,8 @@ static void remap_cell_to_origin_clear_discard(struct cache *cache,
		remap_to_origin(cache, bio);
		issue(cache, bio);
	}

	free_prison_cell(cache, cell);
}

static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell,
@@ -1763,6 +1760,8 @@ static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_
		remap_to_cache(cache, bio, cblock);
		issue(cache, bio);
	}

	free_prison_cell(cache, cell);
}

/*----------------------------------------------------------------*/
+3 −1
Original line number Diff line number Diff line
@@ -1811,11 +1811,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
	}
	cc->iv_offset = tmpll;

	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
	ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev);
	if (ret) {
		ti->error = "Device lookup failed";
		goto bad;
	}

	ret = -EINVAL;
	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
		ti->error = "Invalid device sector";
		goto bad;
Loading