Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c3a086e6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull dm fixes from Alasdair G Kergon:
 "A few fixes for problems discovered during the 3.6 cycle.

  Of particular note, are fixes to the thin target's discard support,
  which I hope is finally working correctly; and fixes for multipath
  ioctls and device limits when there are no paths."

* tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm verity: fix overflow check
  dm thin: fix discard support for data devices
  dm thin: tidy discard support
  dm: retain table limits when swapping to new table with no devices
  dm table: clear add_random unless all devices have it set
  dm: handle requests beyond end of device instead of using BUG_ON
  dm mpath: only retry ioctl when no paths if queue_if_no_path set
  dm thin: do not set discard_zeroes_data
parents 99a1300e 1d55f6bc
Loading
Loading
Loading
Loading
+7 −4
Original line number Diff line number Diff line
@@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
			   unsigned long arg)
{
	struct multipath *m = ti->private;
	struct pgpath *pgpath;
	struct block_device *bdev;
	fmode_t mode;
	unsigned long flags;
@@ -1570,12 +1571,14 @@ again:
	if (!m->current_pgpath)
		__choose_pgpath(m, 0);

	if (m->current_pgpath) {
		bdev = m->current_pgpath->path.dev->bdev;
		mode = m->current_pgpath->path.dev->mode;
	pgpath = m->current_pgpath;

	if (pgpath) {
		bdev = pgpath->path.dev->bdev;
		mode = pgpath->path.dev->mode;
	}

	if (m->queue_io)
	if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
		r = -EAGAIN;
	else if (!bdev)
		r = -EIO;
+57 −4
Original line number Diff line number Diff line
@@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
	return &t->targets[(KEYS_PER_NODE * n) + k];
}

static int count_device(struct dm_target *ti, struct dm_dev *dev,
			sector_t start, sector_t len, void *data)
{
	unsigned *num_devices = data;

	(*num_devices)++;

	return 0;
}

/*
 * Check whether a table has no data devices attached using each
 * target's iterate_devices method.
 * Returns false if the result is unknown because a target doesn't
 * support iterate_devices.
 */
bool dm_table_has_no_data_devices(struct dm_table *table)
{
	struct dm_target *uninitialized_var(ti);
	unsigned i = 0, num_devices = 0;

	while (i < dm_table_get_num_targets(table)) {
		ti = dm_table_get_target(table, i++);

		if (!ti->type->iterate_devices)
			return false;

		ti->type->iterate_devices(ti, count_device, &num_devices);
		if (num_devices)
			return false;
	}

	return true;
}

/*
 * Establish the new table's queue_limits and validate them.
 */
@@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
	return q && blk_queue_nonrot(q);
}

static bool dm_table_is_nonrot(struct dm_table *t)
static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
			     sector_t start, sector_t len, void *data)
{
	struct request_queue *q = bdev_get_queue(dev->bdev);

	return q && !blk_queue_add_random(q);
}

static bool dm_table_all_devices_attribute(struct dm_table *t,
					   iterate_devices_callout_fn func)
{
	struct dm_target *ti;
	unsigned i = 0;

	/* Ensure that all underlying device are non-rotational. */
	while (i < dm_table_get_num_targets(t)) {
		ti = dm_table_get_target(t, i++);

		if (!ti->type->iterate_devices ||
		    !ti->type->iterate_devices(ti, device_is_nonrot, NULL))
		    !ti->type->iterate_devices(ti, func, NULL))
			return 0;
	}

@@ -1396,13 +1439,23 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
	if (!dm_table_discard_zeroes_data(t))
		q->limits.discard_zeroes_data = 0;

	if (dm_table_is_nonrot(t))
	/* Ensure that all underlying devices are non-rotational. */
	if (dm_table_all_devices_attribute(t, device_is_nonrot))
		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
	else
		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);

	dm_table_set_integrity(t);

	/*
	 * Determine whether or not this queue's I/O timings contribute
	 * to the entropy pool, Only request-based targets use this.
	 * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
	 * have it set.
	 */
	if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);

	/*
	 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
	 * visible to other CPUs because, once the flag is set, incoming bios
+88 −47
Original line number Diff line number Diff line
@@ -509,9 +509,9 @@ enum pool_mode {
struct pool_features {
	enum pool_mode mode;

	unsigned zero_new_blocks:1;
	unsigned discard_enabled:1;
	unsigned discard_passdown:1;
	bool zero_new_blocks:1;
	bool discard_enabled:1;
	bool discard_passdown:1;
};

struct thin_c;
@@ -580,7 +580,8 @@ struct pool_c {
	struct dm_target_callbacks callbacks;

	dm_block_t low_water_blocks;
	struct pool_features pf;
	struct pool_features requested_pf; /* Features requested during table load */
	struct pool_features adjusted_pf;  /* Features used after adjusting for constituent devices */
};

/*
@@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool)
/*----------------------------------------------------------------
 * Binding of control targets to a pool object
 *--------------------------------------------------------------*/
static bool data_dev_supports_discard(struct pool_c *pt)
{
	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);

	return q && blk_queue_discard(q);
}

/*
 * If discard_passdown was enabled verify that the data device
 * supports discards.  Disable discard_passdown if not.
 */
static void disable_passdown_if_not_supported(struct pool_c *pt)
{
	struct pool *pool = pt->pool;
	struct block_device *data_bdev = pt->data_dev->bdev;
	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
	sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
	const char *reason = NULL;
	char buf[BDEVNAME_SIZE];

	if (!pt->adjusted_pf.discard_passdown)
		return;

	if (!data_dev_supports_discard(pt))
		reason = "discard unsupported";

	else if (data_limits->max_discard_sectors < pool->sectors_per_block)
		reason = "max discard sectors smaller than a block";

	else if (data_limits->discard_granularity > block_size)
		reason = "discard granularity larger than a block";

	else if (block_size & (data_limits->discard_granularity - 1))
		reason = "discard granularity not a factor of block size";

	if (reason) {
		DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
		pt->adjusted_pf.discard_passdown = false;
	}
}

static int bind_control_target(struct pool *pool, struct dm_target *ti)
{
	struct pool_c *pt = ti->private;
@@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
	 * We want to make sure that degraded pools are never upgraded.
	 */
	enum pool_mode old_mode = pool->pf.mode;
	enum pool_mode new_mode = pt->pf.mode;
	enum pool_mode new_mode = pt->adjusted_pf.mode;

	if (old_mode > new_mode)
		new_mode = old_mode;

	pool->ti = ti;
	pool->low_water_blocks = pt->low_water_blocks;
	pool->pf = pt->pf;
	set_pool_mode(pool, new_mode);
	pool->pf = pt->adjusted_pf;

	/*
	 * If discard_passdown was enabled verify that the data device
	 * supports discards.  Disable discard_passdown if not; otherwise
	 * -EOPNOTSUPP will be returned.
	 */
	/* FIXME: pull this out into a sep fn. */
	if (pt->pf.discard_passdown) {
		struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
		if (!q || !blk_queue_discard(q)) {
			char buf[BDEVNAME_SIZE];
			DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
			       bdevname(pt->data_dev->bdev, buf));
			pool->pf.discard_passdown = 0;
		}
	}
	set_pool_mode(pool, new_mode);

	return 0;
}
@@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
static void pool_features_init(struct pool_features *pf)
{
	pf->mode = PM_WRITE;
	pf->zero_new_blocks = 1;
	pf->discard_enabled = 1;
	pf->discard_passdown = 1;
	pf->zero_new_blocks = true;
	pf->discard_enabled = true;
	pf->discard_passdown = true;
}

static void __pool_destroy(struct pool *pool)
@@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
		argc--;

		if (!strcasecmp(arg_name, "skip_block_zeroing"))
			pf->zero_new_blocks = 0;
			pf->zero_new_blocks = false;

		else if (!strcasecmp(arg_name, "ignore_discard"))
			pf->discard_enabled = 0;
			pf->discard_enabled = false;

		else if (!strcasecmp(arg_name, "no_discard_passdown"))
			pf->discard_passdown = 0;
			pf->discard_passdown = false;

		else if (!strcasecmp(arg_name, "read_only"))
			pf->mode = PM_READ_ONLY;
@@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
	pt->metadata_dev = metadata_dev;
	pt->data_dev = data_dev;
	pt->low_water_blocks = low_water_blocks;
	pt->pf = pf;
	pt->adjusted_pf = pt->requested_pf = pf;
	ti->num_flush_requests = 1;

	/*
	 * Only need to enable discards if the pool should pass
	 * them down to the data device.  The thin device's discard
@@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
	 */
	if (pf.discard_enabled && pf.discard_passdown) {
		ti->num_discard_requests = 1;

		/*
		 * Setting 'discards_supported' circumvents the normal
		 * stacking of discard limits (this keeps the pool and
		 * thin devices' discard limits consistent).
		 */
		ti->discards_supported = true;
		ti->discard_zeroes_data_unsupported = true;
	}
	ti->private = pt;

@@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
		       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
		       (unsigned long)pool->sectors_per_block,
		       (unsigned long long)pt->low_water_blocks);
		emit_flags(&pt->pf, result, sz, maxlen);
		emit_flags(&pt->requested_pf, result, sz, maxlen);
		break;
	}

@@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}

static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
{
	/*
	 * FIXME: these limits may be incompatible with the pool's data device
	 */
	struct pool *pool = pt->pool;
	struct queue_limits *data_limits;

	limits->max_discard_sectors = pool->sectors_per_block;

	/*
	 * This is just a hint, and not enforced.  We have to cope with
	 * bios that cover a block partially.  A discard that spans a block
	 * boundary is not sent to this target.
	 * discard_granularity is just a hint, and not enforced.
	 */
	if (pt->adjusted_pf.discard_passdown) {
		data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
		limits->discard_granularity = data_limits->discard_granularity;
	} else
		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
	limits->discard_zeroes_data = pool->pf.zero_new_blocks;
}

static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)

	blk_limits_io_min(limits, 0);
	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
	if (pool->pf.discard_enabled)
		set_discard_limits(pool, limits);

	/*
	 * pt->adjusted_pf is a staging area for the actual features to use.
	 * They get transferred to the live pool in bind_control_target()
	 * called from pool_preresume().
	 */
	if (!pt->adjusted_pf.discard_enabled)
		return;

	disable_passdown_if_not_supported(pt);

	set_discard_limits(pt, limits);
}

static struct target_type pool_target = {
	.name = "thin-pool",
	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
		    DM_TARGET_IMMUTABLE,
	.version = {1, 3, 0},
	.version = {1, 4, 0},
	.module = THIS_MODULE,
	.ctr = pool_ctr,
	.dtr = pool_dtr,
@@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,
	return 0;
}

/*
 * A thin device always inherits its queue limits from its pool.
 */
static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
	struct thin_c *tc = ti->private;
	struct pool *pool = tc->pool;

	blk_limits_io_min(limits, 0);
	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
	set_discard_limits(pool, limits);
	*limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
}

static struct target_type thin_target = {
	.name = "thin",
	.version = {1, 3, 0},
	.version = {1, 4, 0},
	.module	= THIS_MODULE,
	.ctr = thin_ctr,
	.dtr = thin_dtr,
+4 −4
Original line number Diff line number Diff line
@@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
	v->hash_dev_block_bits = ffs(num) - 1;

	if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
	    num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) !=
	    (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) {
	    (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
	    >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
		ti->error = "Invalid data blocks";
		r = -EINVAL;
		goto bad;
@@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
	}

	if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
	    num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) !=
	    (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) {
	    (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
	    >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
		ti->error = "Invalid hash start";
		r = -EINVAL;
		goto bad;
+52 −19
Original line number Diff line number Diff line
@@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
{
	int r = error;
	struct dm_rq_target_io *tio = clone->end_io_data;
	dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
	dm_request_endio_fn rq_end_io = NULL;

	if (tio->ti) {
		rq_end_io = tio->ti->type->rq_end_io;

		if (mapped && rq_end_io)
			r = rq_end_io(tio->ti, clone, error, &tio->info);
	}

	if (r <= 0)
		/* The target wants to complete the I/O */
@@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
	int r, requeued = 0;
	struct dm_rq_target_io *tio = clone->end_io_data;

	/*
	 * Hold the md reference here for the in-flight I/O.
	 * We can't rely on the reference count by device opener,
	 * because the device may be closed during the request completion
	 * when all bios are completed.
	 * See the comment in rq_completed() too.
	 */
	dm_get(md);

	tio->ti = ti;
	r = ti->type->map_rq(ti, clone, &tio->info);
	switch (r) {
@@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone,
	return requeued;
}

static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
{
	struct request *clone;

	blk_start_request(orig);
	clone = orig->special;
	atomic_inc(&md->pending[rq_data_dir(clone)]);

	/*
	 * Hold the md reference here for the in-flight I/O.
	 * We can't rely on the reference count by device opener,
	 * because the device may be closed during the request completion
	 * when all bios are completed.
	 * See the comment in rq_completed() too.
	 */
	dm_get(md);

	return clone;
}

/*
 * q->request_fn for request-based dm.
 * Called with the queue lock held.
@@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q)
			pos = blk_rq_pos(rq);

		ti = dm_table_find_target(map, pos);
		BUG_ON(!dm_target_is_valid(ti));
		if (!dm_target_is_valid(ti)) {
			/*
			 * Must perform setup, that dm_done() requires,
			 * before calling dm_kill_unmapped_request
			 */
			DMERR_LIMIT("request attempted access beyond the end of device");
			clone = dm_start_request(md, rq);
			dm_kill_unmapped_request(clone, -EIO);
			continue;
		}

		if (ti->type->busy && ti->type->busy(ti))
			goto delay_and_out;

		blk_start_request(rq);
		clone = rq->special;
		atomic_inc(&md->pending[rq_data_dir(clone)]);
		clone = dm_start_request(md, rq);

		spin_unlock(q->queue_lock);
		if (map_request(ti, clone, md))
@@ -1684,8 +1706,6 @@ delay_and_out:
	blk_delay_queue(q, HZ / 10);
out:
	dm_table_put(map);

	return;
}

int dm_underlying_device_busy(struct request_queue *q)
@@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md)
 */
struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
{
	struct dm_table *map = ERR_PTR(-EINVAL);
	struct dm_table *live_map, *map = ERR_PTR(-EINVAL);
	struct queue_limits limits;
	int r;

@@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
	if (!dm_suspended_md(md))
		goto out;

	/*
	 * If the new table has no data devices, retain the existing limits.
	 * This helps multipath with queue_if_no_path if all paths disappear,
	 * then new I/O is queued based on these limits, and then some paths
	 * reappear.
	 */
	if (dm_table_has_no_data_devices(table)) {
		live_map = dm_get_live_table(md);
		if (live_map)
			limits = md->queue->limits;
		dm_table_put(live_map);
	}

	r = dm_calculate_queue_limits(table, &limits);
	if (r) {
		map = ERR_PTR(r);
Loading