Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e5863d9a authored by Mike Snitzer's avatar Mike Snitzer
Browse files

dm: allocate requests in target when stacking on blk-mq devices



For blk-mq request-based DM the responsibility of allocating a cloned
request is transfered from DM core to the target type.  Doing so
enables the cloned request to be allocated from the appropriate
blk-mq request_queue's pool (only the DM target, e.g. multipath, can
know which block device to send a given cloned request to).

Care was taken to preserve compatibility with old-style block request
completion that requires request-based DM _not_ acquire the clone
request's queue lock in the completion path.  As such, there are now 2
different request-based DM target_type interfaces:
1) the original .map_rq() interface will continue to be used for
   non-blk-mq devices -- the preallocated clone request is passed in
   from DM core.
2) a new .clone_and_map_rq() and .release_clone_rq() will be used for
   blk-mq devices -- blk_get_request() and blk_put_request() are used
   respectively from these hooks.

dm_table_set_type() was updated to detect if the request-based target is
being stacked on blk-mq devices, if so DM_TYPE_MQ_REQUEST_BASED is set.
DM core disallows switching the DM table's type after it is set.  This
means that there is no mixing of non-blk-mq and blk-mq devices within
the same request-based DM table.

[This patch was started by Keith and later heavily modified by Mike]

Tested-by: default avatarBart Van Assche <bvanassche@acm.org>
Signed-off-by: default avatarKeith Busch <keith.busch@intel.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 466d89a6
Loading
Loading
Loading
Loading
+43 −8
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include "dm-path-selector.h"
#include "dm-uevent.h"

#include <linux/blkdev.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/mempool.h>
@@ -378,12 +379,13 @@ static int __must_push_back(struct multipath *m)
/*
 * Map cloned requests
 */
static int multipath_map(struct dm_target *ti, struct request *clone,
			 union map_info *map_context)
static int __multipath_map(struct dm_target *ti, struct request *clone,
			   union map_info *map_context,
			   struct request *rq, struct request **__clone)
{
	struct multipath *m = (struct multipath *) ti->private;
	int r = DM_MAPIO_REQUEUE;
	size_t nr_bytes = blk_rq_bytes(clone);
	size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
	struct pgpath *pgpath;
	struct block_device *bdev;
	struct dm_mpath_io *mpio;
@@ -416,11 +418,24 @@ static int multipath_map(struct dm_target *ti, struct request *clone,

	bdev = pgpath->path.dev->bdev;

	spin_unlock_irq(&m->lock);

	if (clone) {
		/* Old request-based interface: allocated clone is passed in */
		clone->q = bdev_get_queue(bdev);
		clone->rq_disk = bdev->bd_disk;
		clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;

	spin_unlock_irq(&m->lock);
	} else {
		/* blk-mq request-based interface */
		*__clone = blk_get_request(bdev_get_queue(bdev),
					   rq_data_dir(rq), GFP_KERNEL);
		if (IS_ERR(*__clone))
			/* ENOMEM, requeue */
			return r;
		(*__clone)->bio = (*__clone)->biotail = NULL;
		(*__clone)->rq_disk = bdev->bd_disk;
		(*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
	}

	if (pgpath->pg->ps.type->start_io)
		pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
@@ -434,6 +449,24 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
	return r;
}

static int multipath_map(struct dm_target *ti, struct request *clone,
			 union map_info *map_context)
{
	return __multipath_map(ti, clone, map_context, NULL, NULL);
}

static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
				   union map_info *map_context,
				   struct request **clone)
{
	return __multipath_map(ti, NULL, map_context, rq, clone);
}

static void multipath_release_clone(struct request *clone)
{
	blk_put_request(clone);
}

/*
 * If we run out of usable paths, should we queue I/O or error it?
 */
@@ -1670,11 +1703,13 @@ static int multipath_busy(struct dm_target *ti)
 *---------------------------------------------------------------*/
static struct target_type multipath_target = {
	.name = "multipath",
	.version = {1, 7, 0},
	.version = {1, 8, 0},
	.module = THIS_MODULE,
	.ctr = multipath_ctr,
	.dtr = multipath_dtr,
	.map_rq = multipath_map,
	.clone_and_map_rq = multipath_clone_and_map,
	.release_clone_rq = multipath_release_clone,
	.rq_end_io = multipath_end_io,
	.presuspend = multipath_presuspend,
	.postsuspend = multipath_postsuspend,
+29 −5
Original line number Diff line number Diff line
@@ -827,6 +827,7 @@ static int dm_table_set_type(struct dm_table *t)
{
	unsigned i;
	unsigned bio_based = 0, request_based = 0, hybrid = 0;
	bool use_blk_mq = false;
	struct dm_target *tgt;
	struct dm_dev_internal *dd;
	struct list_head *devices;
@@ -872,11 +873,26 @@ static int dm_table_set_type(struct dm_table *t)
	/* Non-request-stackable devices can't be used for request-based dm */
	devices = dm_table_get_devices(t);
	list_for_each_entry(dd, devices, list) {
		if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) {
			DMWARN("table load rejected: including"
		struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);

		if (!blk_queue_stackable(q)) {
			DMERR("table load rejected: including"
			      " non-request-stackable devices");
			return -EINVAL;
		}

		if (q->mq_ops)
			use_blk_mq = true;
	}

	if (use_blk_mq) {
		/* verify _all_ devices in the table are blk-mq devices */
		list_for_each_entry(dd, devices, list)
			if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
				DMERR("table load rejected: not all devices"
				      " are blk-mq request-stackable");
				return -EINVAL;
			}
	}

	/*
@@ -890,7 +906,7 @@ static int dm_table_set_type(struct dm_table *t)
		return -EINVAL;
	}

	t->type = DM_TYPE_REQUEST_BASED;
	t->type = !use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;

	return 0;
}
@@ -907,7 +923,15 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)

bool dm_table_request_based(struct dm_table *t)
{
	return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
	unsigned table_type = dm_table_get_type(t);

	return (table_type == DM_TYPE_REQUEST_BASED ||
		table_type == DM_TYPE_MQ_REQUEST_BASED);
}

bool dm_table_mq_request_based(struct dm_table *t)
{
	return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
}

static int dm_table_alloc_md_mempools(struct dm_table *t)
+14 −1
Original line number Diff line number Diff line
@@ -137,13 +137,26 @@ static int io_err_map_rq(struct dm_target *ti, struct request *clone,
	return -EIO;
}

static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
				   union map_info *map_context,
				   struct request **clone)
{
	return -EIO;
}

static void io_err_release_clone_rq(struct request *clone)
{
}

static struct target_type error_target = {
	.name = "error",
	.version = {1, 2, 0},
	.version = {1, 3, 0},
	.ctr  = io_err_ctr,
	.dtr  = io_err_dtr,
	.map  = io_err_map,
	.map_rq = io_err_map_rq,
	.clone_and_map_rq = io_err_clone_and_map_rq,
	.release_clone_rq = io_err_release_clone_rq,
};

int __init dm_target_init(void)
+85 −29
Original line number Diff line number Diff line
@@ -1044,6 +1044,9 @@ static void free_rq_clone(struct request *clone)
	struct dm_rq_target_io *tio = clone->end_io_data;

	blk_rq_unprep_clone(clone);
	if (clone->q && clone->q->mq_ops)
		tio->ti->type->release_clone_rq(clone);
	else
		free_clone_request(tio->md, clone);
	free_rq_tio(tio);
}
@@ -1086,6 +1089,7 @@ static void dm_unprep_request(struct request *rq)
	rq->special = NULL;
	rq->cmd_flags &= ~REQ_DONTPREP;

	if (clone)
		free_rq_clone(clone);
}

@@ -1185,6 +1189,13 @@ static void dm_softirq_done(struct request *rq)
	struct dm_rq_target_io *tio = rq->special;
	struct request *clone = tio->clone;

	if (!clone) {
		blk_end_request_all(rq, tio->error);
		rq_completed(tio->md, rq_data_dir(rq), false);
		free_rq_tio(tio);
		return;
	}

	if (rq->cmd_flags & REQ_FAILED)
		mapped = false;

@@ -1207,7 +1218,7 @@ static void dm_complete_request(struct request *rq, int error)
 * Complete the not-mapped clone and the original request with the error status
 * through softirq context.
 * Target's rq_end_io() function isn't called.
 * This may be used when the target's map_rq() function fails.
 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
 */
static void dm_kill_unmapped_request(struct request *rq, int error)
{
@@ -1222,13 +1233,15 @@ static void end_clone_request(struct request *clone, int error)
{
	struct dm_rq_target_io *tio = clone->end_io_data;

	if (!clone->q->mq_ops) {
		/*
		 * For just cleaning up the information of the queue in which
		 * the clone was dispatched.
	 * The clone is *NOT* freed actually here because it is alloced from
	 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
		 * The clone is *NOT* freed actually here because it is alloced
		 * from dm own mempool (REQ_ALLOCED isn't set).
		 */
		__blk_put_request(clone->q, clone);
	}

	/*
	 * Actual request completion is done in a softirq context which doesn't
@@ -1789,6 +1802,8 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
					struct mapped_device *md, gfp_t gfp_mask)
{
	struct dm_rq_target_io *tio;
	int srcu_idx;
	struct dm_table *table;

	tio = alloc_rq_tio(md, gfp_mask);
	if (!tio)
@@ -1802,10 +1817,15 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
	memset(&tio->info, 0, sizeof(tio->info));
	init_kthread_work(&tio->work, map_tio_request);

	table = dm_get_live_table(md, &srcu_idx);
	if (!dm_table_mq_request_based(table)) {
		if (!clone_rq(rq, md, tio, gfp_mask)) {
			dm_put_live_table(md, srcu_idx);
			free_rq_tio(tio);
			return NULL;
		}
	}
	dm_put_live_table(md, srcu_idx);

	return tio;
}
@@ -1835,17 +1855,36 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)

/*
 * Returns:
 * 0  : the request has been processed (not requeued)
 * !0 : the request has been requeued
 * 0                : the request has been processed
 * DM_MAPIO_REQUEUE : the original request needs to be requeued
 * < 0              : the request was completed due to failure
 */
static int map_request(struct dm_target *ti, struct request *rq,
		       struct mapped_device *md)
{
	int r, requeued = 0;
	int r;
	struct dm_rq_target_io *tio = rq->special;
	struct request *clone = tio->clone;
	struct request *clone = NULL;

	if (tio->clone) {
		clone = tio->clone;
		r = ti->type->map_rq(ti, clone, &tio->info);
	} else {
		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
		if (r < 0) {
			/* The target wants to complete the I/O */
			dm_kill_unmapped_request(rq, r);
			return r;
		}
		if (IS_ERR(clone))
			return DM_MAPIO_REQUEUE;
		if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
			/* -ENOMEM */
			ti->type->release_clone_rq(clone);
			return DM_MAPIO_REQUEUE;
		}
	}

	switch (r) {
	case DM_MAPIO_SUBMITTED:
		/* The target has taken the I/O to submit by itself later */
@@ -1859,7 +1898,6 @@ static int map_request(struct dm_target *ti, struct request *rq,
	case DM_MAPIO_REQUEUE:
		/* The target wants to requeue the I/O */
		dm_requeue_unmapped_request(clone);
		requeued = 1;
		break;
	default:
		if (r > 0) {
@@ -1869,17 +1907,20 @@ static int map_request(struct dm_target *ti, struct request *rq,

		/* The target wants to complete the I/O */
		dm_kill_unmapped_request(rq, r);
		break;
		return r;
	}

	return requeued;
	return 0;
}

static void map_tio_request(struct kthread_work *work)
{
	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
	struct request *rq = tio->orig;
	struct mapped_device *md = tio->md;

	map_request(tio->ti, tio->orig, tio->md);
	if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
		dm_requeue_unmapped_original_request(md, rq);
}

static void dm_start_request(struct mapped_device *md, struct request *orig)
@@ -2459,6 +2500,14 @@ unsigned dm_get_md_type(struct mapped_device *md)
	return md->type;
}

static bool dm_md_type_request_based(struct mapped_device *md)
{
	unsigned table_type = dm_get_md_type(md);

	return (table_type == DM_TYPE_REQUEST_BASED ||
		table_type == DM_TYPE_MQ_REQUEST_BASED);
}

struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
{
	return md->immutable_target_type;
@@ -2511,8 +2560,7 @@ static int dm_init_request_based_queue(struct mapped_device *md)
 */
int dm_setup_md_queue(struct mapped_device *md)
{
	if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
	    !dm_init_request_based_queue(md)) {
	if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
		DMWARN("Cannot initialize queue for request-based mapped device");
		return -EINVAL;
	}
@@ -3184,27 +3232,35 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
{
	struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
	struct kmem_cache *cachep;
	unsigned int pool_size;
	unsigned int pool_size = 0;
	unsigned int front_pad;

	if (!pools)
		return NULL;

	if (type == DM_TYPE_BIO_BASED) {
	switch (type) {
	case DM_TYPE_BIO_BASED:
		cachep = _io_cache;
		pool_size = dm_get_reserved_bio_based_ios();
		front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
	} else if (type == DM_TYPE_REQUEST_BASED) {
		cachep = _rq_tio_cache;
		break;
	case DM_TYPE_REQUEST_BASED:
		pool_size = dm_get_reserved_rq_based_ios();
		pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
		if (!pools->rq_pool)
			goto out;
		/* fall through to setup remaining rq-based pools */
	case DM_TYPE_MQ_REQUEST_BASED:
		cachep = _rq_tio_cache;
		if (!pool_size)
			pool_size = dm_get_reserved_rq_based_ios();
		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
		/* per_bio_data_size is not used. See __bind_mempools(). */
		WARN_ON(per_bio_data_size != 0);
	} else
		break;
	default:
		goto out;
	}

	pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
	if (!pools->io_pool)
+5 −3
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#define DM_TYPE_NONE			0
#define DM_TYPE_BIO_BASED		1
#define DM_TYPE_REQUEST_BASED		2
#define DM_TYPE_MQ_REQUEST_BASED	3

/*
 * List of devices that a metadevice uses and should open/close.
@@ -73,6 +74,7 @@ int dm_table_any_busy_target(struct dm_table *t);
unsigned dm_table_get_type(struct dm_table *t);
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
bool dm_table_mq_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);

Loading