Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 48915c2c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper updates from Mike Snitzer:

 - various fixes and cleanups for request-based DM core

 - add support for delaying the requeue of requests; used by DM
   multipath when all paths have failed and 'queue_if_no_path' is
   enabled

 - DM cache improvements to speedup the loading metadata and the writing
   of the hint array

 - fix potential for a dm-crypt crash on device teardown

 - remove dm_bufio_cond_resched() and just using cond_resched()

 - change DM multipath to return a reservation conflict error
   immediately; rather than failing the path and retrying (potentially
   indefinitely)

* tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits)
  dm mpath: always return reservation conflict without failing over
  dm bufio: remove dm_bufio_cond_resched()
  dm crypt: fix crash on exit
  dm cache metadata: switch to using the new cursor api for loading metadata
  dm array: introduce cursor api
  dm btree: introduce cursor api
  dm cache policy smq: distribute entries to random levels when switching to smq
  dm cache: speed up writing of the hint array
  dm array: add dm_array_new()
  dm mpath: delay the requeue of blk-mq requests while all paths down
  dm mpath: use dm_mq_kick_requeue_list()
  dm rq: introduce dm_mq_kick_requeue_list()
  dm rq: reduce arguments passed to map_request() and dm_requeue_original_request()
  dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests
  dm: convert wait loops to use autoremove_wake_function()
  dm: use signal_pending_state() in dm_wait_for_completion()
  dm: rename task state function arguments
  dm: add two lockdep_assert_held() statements
  dm rq: simplify dm_old_stop_queue()
  dm mpath: check if path's request_queue is dying in activate_path()
  ...
parents b9044ac8 8ff232c1
Loading
Loading
Loading
Loading
+9 −22
Original line number Diff line number Diff line
@@ -191,19 +191,6 @@ static void dm_bufio_unlock(struct dm_bufio_client *c)
	mutex_unlock(&c->lock);
}

/*
 * FIXME Move to sched.h?
 */
#ifdef CONFIG_PREEMPT_VOLUNTARY
#  define dm_bufio_cond_resched()		\
do {						\
	if (unlikely(need_resched()))		\
		_cond_resched();		\
} while (0)
#else
#  define dm_bufio_cond_resched()                do { } while (0)
#endif

/*----------------------------------------------------------------*/

/*
@@ -741,7 +728,7 @@ static void __flush_write_list(struct list_head *write_list)
			list_entry(write_list->next, struct dm_buffer, write_list);
		list_del(&b->write_list);
		submit_io(b, WRITE, b->block, write_endio);
		dm_bufio_cond_resched();
		cond_resched();
	}
	blk_finish_plug(&plug);
}
@@ -780,7 +767,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
			__unlink_buffer(b);
			return b;
		}
		dm_bufio_cond_resched();
		cond_resched();
	}

	list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
@@ -791,7 +778,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
			__unlink_buffer(b);
			return b;
		}
		dm_bufio_cond_resched();
		cond_resched();
	}

	return NULL;
@@ -923,7 +910,7 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
			return;

		__write_dirty_buffer(b, write_list);
		dm_bufio_cond_resched();
		cond_resched();
	}
}

@@ -973,7 +960,7 @@ static void __check_watermark(struct dm_bufio_client *c,
			return;

		__free_buffer_wake(b);
		dm_bufio_cond_resched();
		cond_resched();
	}

	if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
@@ -1170,7 +1157,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
				submit_io(b, READ, b->block, read_endio);
			dm_bufio_release(b);

			dm_bufio_cond_resched();
			cond_resched();

			if (!n_blocks)
				goto flush_plug;
@@ -1291,7 +1278,7 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
		    !test_bit(B_WRITING, &b->state))
			__relink_lru(b, LIST_CLEAN);

		dm_bufio_cond_resched();
		cond_resched();

		/*
		 * If we dropped the lock, the list is no longer consistent,
@@ -1574,7 +1561,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
				freed++;
			if (!--nr_to_scan || ((count - freed) <= retain_target))
				return freed;
			dm_bufio_cond_resched();
			cond_resched();
		}
	}
	return freed;
@@ -1808,7 +1795,7 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
		if (__try_evict_buffer(b, 0))
			count--;

		dm_bufio_cond_resched();
		cond_resched();
	}

	dm_bufio_unlock(c);
+107 −76
Original line number Diff line number Diff line
@@ -140,6 +140,13 @@ struct dm_cache_metadata {
	 * the device.
	 */
	bool fail_io:1;

	/*
	 * These structures are used when loading metadata.  They're too
	 * big to put on the stack.
	 */
	struct dm_array_cursor mapping_cursor;
	struct dm_array_cursor hint_cursor;
};

/*-------------------------------------------------------------------
@@ -1171,31 +1178,37 @@ static bool hints_array_available(struct dm_cache_metadata *cmd,
		hints_array_initialized(cmd);
}

static int __load_mapping(void *context, uint64_t cblock, void *leaf)
static int __load_mapping(struct dm_cache_metadata *cmd,
			  uint64_t cb, bool hints_valid,
			  struct dm_array_cursor *mapping_cursor,
			  struct dm_array_cursor *hint_cursor,
			  load_mapping_fn fn, void *context)
{
	int r = 0;
	bool dirty;
	__le64 value;
	__le32 hint_value = 0;

	__le64 mapping;
	__le32 hint = 0;

	__le64 *mapping_value_le;
	__le32 *hint_value_le;

	dm_oblock_t oblock;
	unsigned flags;
	struct thunk *thunk = context;
	struct dm_cache_metadata *cmd = thunk->cmd;

	memcpy(&value, leaf, sizeof(value));
	unpack_value(value, &oblock, &flags);
	dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
	memcpy(&mapping, mapping_value_le, sizeof(mapping));
	unpack_value(mapping, &oblock, &flags);

	if (flags & M_VALID) {
		if (thunk->hints_valid) {
			r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
					       cblock, &hint_value);
			if (r && r != -ENODATA)
				return r;
		if (hints_valid) {
			dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
			memcpy(&hint, hint_value_le, sizeof(hint));
		}

		dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
		r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
			      dirty, le32_to_cpu(hint_value), thunk->hints_valid);
		r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
		       le32_to_cpu(hint), hints_valid);
		if (r)
			DMERR("policy couldn't load cblock");
	}

	return r;
@@ -1205,16 +1218,60 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
			   struct dm_cache_policy *policy,
			   load_mapping_fn fn, void *context)
{
	struct thunk thunk;
	int r;
	uint64_t cb;

	bool hints_valid = hints_array_available(cmd, policy);

	if (from_cblock(cmd->cache_blocks) == 0)
		/* Nothing to do */
		return 0;

	r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
	if (r)
		return r;

	if (hints_valid) {
		r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
		if (r) {
			dm_array_cursor_end(&cmd->mapping_cursor);
			return r;
		}
	}

	for (cb = 0; ; cb++) {
		r = __load_mapping(cmd, cb, hints_valid,
				   &cmd->mapping_cursor, &cmd->hint_cursor,
				   fn, context);
		if (r)
			goto out;

	thunk.fn = fn;
	thunk.context = context;
		/*
		 * We need to break out before we move the cursors.
		 */
		if (cb >= (from_cblock(cmd->cache_blocks) - 1))
			break;

	thunk.cmd = cmd;
	thunk.respect_dirty_flags = cmd->clean_when_opened;
	thunk.hints_valid = hints_array_available(cmd, policy);
		r = dm_array_cursor_next(&cmd->mapping_cursor);
		if (r) {
			DMERR("dm_array_cursor_next for mapping failed");
			goto out;
		}

	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
		if (hints_valid) {
			r = dm_array_cursor_next(&cmd->hint_cursor);
			if (r) {
				DMERR("dm_array_cursor_next for hint failed");
				goto out;
			}
		}
	}
out:
	dm_array_cursor_end(&cmd->mapping_cursor);
	if (hints_valid)
		dm_array_cursor_end(&cmd->hint_cursor);

	return r;
}

int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
@@ -1368,10 +1425,24 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,

/*----------------------------------------------------------------*/

static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
static int get_hint(uint32_t index, void *value_le, void *context)
{
	uint32_t value;
	struct dm_cache_policy *policy = context;

	value = policy_get_hint(policy, to_cblock(index));
	*((__le32 *) value_le) = cpu_to_le32(value);

	return 0;
}

/*
 * It's quicker to always delete the hint array, and recreate with
 * dm_array_new().
 */
static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
{
	int r;
	__le32 value;
	size_t hint_size;
	const char *policy_name = dm_cache_policy_get_name(policy);
	const unsigned *policy_version = dm_cache_policy_get_version(policy);
@@ -1380,7 +1451,6 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
		return -EINVAL;

	if (!policy_unchanged(cmd, policy)) {
	strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
	memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));

@@ -1395,48 +1465,9 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
			return r;
	}

		r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
		if (r)
			return r;

		value = cpu_to_le32(0);
		__dm_bless_for_disk(&value);
		r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
	return dm_array_new(&cmd->hint_info, &cmd->hint_root,
			    from_cblock(cmd->cache_blocks),
				    &value, &cmd->hint_root);
		if (r)
			return r;
	}

	return 0;
}

static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint)
{
	struct dm_cache_metadata *cmd = context;
	__le32 value = cpu_to_le32(hint);
	int r;

	__dm_bless_for_disk(&value);

	r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
			       from_cblock(cblock), &value, &cmd->hint_root);
	cmd->changed = true;

	return r;
}

static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
{
	int r;

	r = begin_hints(cmd, policy);
	if (r) {
		DMERR("begin_hints failed");
		return r;
	}

	return policy_walk_mappings(policy, save_hint, cmd);
			    get_hint, policy);
}

int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
+1 −1
Original line number Diff line number Diff line
@@ -395,7 +395,7 @@ static void init_policy_functions(struct policy *p)
	p->policy.set_dirty = wb_set_dirty;
	p->policy.clear_dirty = wb_clear_dirty;
	p->policy.load_mapping = wb_load_mapping;
	p->policy.walk_mappings = NULL;
	p->policy.get_hint = NULL;
	p->policy.remove_mapping = wb_remove_mapping;
	p->policy.writeback_work = wb_writeback_work;
	p->policy.force_mapping = wb_force_mapping;
+3 −3
Original line number Diff line number Diff line
@@ -48,10 +48,10 @@ static inline int policy_load_mapping(struct dm_cache_policy *p,
	return p->load_mapping(p, oblock, cblock, hint, hint_valid);
}

static inline int policy_walk_mappings(struct dm_cache_policy *p,
				      policy_walk_fn fn, void *context)
static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
				       dm_cblock_t cblock)
{
	return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0;
	return p->get_hint ? p->get_hint(p, cblock) : 0;
}

static inline int policy_writeback_work(struct dm_cache_policy *p,
+12 −33
Original line number Diff line number Diff line
@@ -1359,6 +1359,11 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
	spin_unlock_irqrestore(&mq->lock, flags);
}

static unsigned random_level(dm_cblock_t cblock)
{
	return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
}

static int smq_load_mapping(struct dm_cache_policy *p,
			    dm_oblock_t oblock, dm_cblock_t cblock,
			    uint32_t hint, bool hint_valid)
@@ -1369,47 +1374,21 @@ static int smq_load_mapping(struct dm_cache_policy *p,
	e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
	e->oblock = oblock;
	e->dirty = false;	/* this gets corrected in a minute */
	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : 1;
	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
	push(mq, e);

	return 0;
}

static int smq_save_hints(struct smq_policy *mq, struct queue *q,
			  policy_walk_fn fn, void *context)
{
	int r;
	unsigned level;
	struct entry *e;

	for (level = 0; level < q->nr_levels; level++)
		for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
			if (!e->sentinel) {
				r = fn(context, infer_cblock(mq, e),
				       e->oblock, e->level);
				if (r)
					return r;
			}
		}

	return 0;
}

static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
			     void *context)
static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
{
	struct smq_policy *mq = to_smq_policy(p);
	int r = 0;
	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));

	/*
	 * We don't need to lock here since this method is only called once
	 * the IO has stopped.
	 */
	r = smq_save_hints(mq, &mq->clean, fn, context);
	if (!r)
		r = smq_save_hints(mq, &mq->dirty, fn, context);
	if (!e->allocated)
		return 0;

	return r;
	return e->level;
}

static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
@@ -1616,7 +1595,7 @@ static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
	mq->policy.set_dirty = smq_set_dirty;
	mq->policy.clear_dirty = smq_clear_dirty;
	mq->policy.load_mapping = smq_load_mapping;
	mq->policy.walk_mappings = smq_walk_mappings;
	mq->policy.get_hint = smq_get_hint;
	mq->policy.remove_mapping = smq_remove_mapping;
	mq->policy.remove_cblock = smq_remove_cblock;
	mq->policy.writeback_work = smq_writeback_work;
Loading