Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 83d5e5b0 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Alasdair G Kergon
Browse files

dm: optimize use SRCU and RCU



This patch removes "io_lock" and "map_lock" in struct mapped_device and
"holders" in struct dm_table and replaces these mechanisms with
sleepable-rcu.

Previously, the code would call "dm_get_live_table" and "dm_table_put" to
get and release table. Now, the code is changed to call "dm_get_live_table"
and "dm_put_live_table". dm_get_live_table locks sleepable-rcu and
dm_put_live_table unlocks it.

dm_get_live_table_fast/dm_put_live_table_fast can be used instead of
dm_get_live_table/dm_put_live_table. These *_fast functions use
non-sleepable RCU, so the caller must not block between them.

If the code changes active or inactive dm table, it must call
dm_sync_table before destroying the old table.

Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarJun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: default avatarAlasdair G Kergon <agk@redhat.com>
parent 2480945c
Loading
Loading
Loading
Loading
+82 −40
Original line number Diff line number Diff line
@@ -36,6 +36,14 @@ struct hash_cell {
	struct dm_table *new_map;
};

/*
 * A dummy definition to make RCU happy.
 * struct dm_table should never be dereferenced in this file.
 */
struct dm_table {
	int undefined__;
};

struct vers_iter {
    size_t param_size;
    struct dm_target_versions *vers, *old_vers;
@@ -242,9 +250,10 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
	return -EBUSY;
}

static void __hash_remove(struct hash_cell *hc)
static struct dm_table *__hash_remove(struct hash_cell *hc)
{
	struct dm_table *table;
	int srcu_idx;

	/* remove from the dev hash */
	list_del(&hc->uuid_list);
@@ -253,16 +262,18 @@ static void __hash_remove(struct hash_cell *hc)
	dm_set_mdptr(hc->md, NULL);
	mutex_unlock(&dm_hash_cells_mutex);

	table = dm_get_live_table(hc->md);
	if (table) {
	table = dm_get_live_table(hc->md, &srcu_idx);
	if (table)
		dm_table_event(table);
		dm_table_put(table);
	}
	dm_put_live_table(hc->md, srcu_idx);

	table = NULL;
	if (hc->new_map)
		dm_table_destroy(hc->new_map);
		table = hc->new_map;
	dm_put(hc->md);
	free_cell(hc);

	return table;
}

static void dm_hash_remove_all(int keep_open_devices)
@@ -270,6 +281,7 @@ static void dm_hash_remove_all(int keep_open_devices)
	int i, dev_skipped;
	struct hash_cell *hc;
	struct mapped_device *md;
	struct dm_table *t;

retry:
	dev_skipped = 0;
@@ -287,10 +299,14 @@ static void dm_hash_remove_all(int keep_open_devices)
				continue;
			}

			__hash_remove(hc);
			t = __hash_remove(hc);

			up_write(&_hash_lock);

			if (t) {
				dm_sync_table(md);
				dm_table_destroy(t);
			}
			dm_put(md);
			if (likely(keep_open_devices))
				dm_destroy(md);
@@ -356,6 +372,7 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
	struct dm_table *table;
	struct mapped_device *md;
	unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
	int srcu_idx;

	/*
	 * duplicate new.
@@ -418,11 +435,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
	/*
	 * Wake up any dm event waiters.
	 */
	table = dm_get_live_table(hc->md);
	if (table) {
	table = dm_get_live_table(hc->md, &srcu_idx);
	if (table)
		dm_table_event(table);
		dm_table_put(table);
	}
	dm_put_live_table(hc->md, srcu_idx);

	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
		param->flags |= DM_UEVENT_GENERATED_FLAG;
@@ -620,11 +636,14 @@ static int check_name(const char *name)
 * _hash_lock without first calling dm_table_put, because dm_table_destroy
 * waits for this dm_table_put and could be called under this lock.
 */
static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx)
{
	struct hash_cell *hc;
	struct dm_table *table = NULL;

	/* increment rcu count, we don't care about the table pointer */
	dm_get_live_table(md, srcu_idx);

	down_read(&_hash_lock);
	hc = dm_get_mdptr(md);
	if (!hc || hc->md != md) {
@@ -633,8 +652,6 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
	}

	table = hc->new_map;
	if (table)
		dm_table_get(table);

out:
	up_read(&_hash_lock);
@@ -643,10 +660,11 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
}

static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
						      struct dm_ioctl *param)
						      struct dm_ioctl *param,
						      int *srcu_idx)
{
	return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ?
		dm_get_inactive_table(md) : dm_get_live_table(md);
		dm_get_inactive_table(md, srcu_idx) : dm_get_live_table(md, srcu_idx);
}

/*
@@ -657,6 +675,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
{
	struct gendisk *disk = dm_disk(md);
	struct dm_table *table;
	int srcu_idx;

	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
			  DM_ACTIVE_PRESENT_FLAG);
@@ -676,26 +695,27 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
	param->event_nr = dm_get_event_nr(md);
	param->target_count = 0;

	table = dm_get_live_table(md);
	table = dm_get_live_table(md, &srcu_idx);
	if (table) {
		if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
			if (get_disk_ro(disk))
				param->flags |= DM_READONLY_FLAG;
			param->target_count = dm_table_get_num_targets(table);
		}
		dm_table_put(table);

		param->flags |= DM_ACTIVE_PRESENT_FLAG;
	}
	dm_put_live_table(md, srcu_idx);

	if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) {
		table = dm_get_inactive_table(md);
		int srcu_idx;
		table = dm_get_inactive_table(md, &srcu_idx);
		if (table) {
			if (!(dm_table_get_mode(table) & FMODE_WRITE))
				param->flags |= DM_READONLY_FLAG;
			param->target_count = dm_table_get_num_targets(table);
			dm_table_put(table);
		}
		dm_put_live_table(md, srcu_idx);
	}
}

@@ -796,6 +816,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
	struct hash_cell *hc;
	struct mapped_device *md;
	int r;
	struct dm_table *t;

	down_write(&_hash_lock);
	hc = __find_device_hash_cell(param);
@@ -819,9 +840,14 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
		return r;
	}

	__hash_remove(hc);
	t = __hash_remove(hc);
	up_write(&_hash_lock);

	if (t) {
		dm_sync_table(md);
		dm_table_destroy(t);
	}

	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
		param->flags |= DM_UEVENT_GENERATED_FLAG;

@@ -986,6 +1012,7 @@ static int do_resume(struct dm_ioctl *param)

		old_map = dm_swap_table(md, new_map);
		if (IS_ERR(old_map)) {
			dm_sync_table(md);
			dm_table_destroy(new_map);
			dm_put(md);
			return PTR_ERR(old_map);
@@ -1003,6 +1030,10 @@ static int do_resume(struct dm_ioctl *param)
			param->flags |= DM_UEVENT_GENERATED_FLAG;
	}

	/*
	 * Since dm_swap_table synchronizes RCU, nobody should be in
	 * read-side critical section already.
	 */
	if (old_map)
		dm_table_destroy(old_map);

@@ -1125,6 +1156,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
	int r = 0;
	struct mapped_device *md;
	struct dm_table *table;
	int srcu_idx;

	md = find_device(param);
	if (!md)
@@ -1145,11 +1177,10 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
	 */
	__dev_status(md, param);

	table = dm_get_live_or_inactive_table(md, param);
	if (table) {
	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
	if (table)
		retrieve_status(table, param, param_size);
		dm_table_put(table);
	}
	dm_put_live_table(md, srcu_idx);

out:
	dm_put(md);
@@ -1221,7 +1252,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
{
	int r;
	struct hash_cell *hc;
	struct dm_table *t;
	struct dm_table *t, *old_map = NULL;
	struct mapped_device *md;
	struct target_type *immutable_target_type;

@@ -1277,14 +1308,14 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
	hc = dm_get_mdptr(md);
	if (!hc || hc->md != md) {
		DMWARN("device has been removed from the dev hash table.");
		dm_table_destroy(t);
		up_write(&_hash_lock);
		dm_table_destroy(t);
		r = -ENXIO;
		goto out;
	}

	if (hc->new_map)
		dm_table_destroy(hc->new_map);
		old_map = hc->new_map;
	hc->new_map = t;
	up_write(&_hash_lock);

@@ -1292,6 +1323,11 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
	__dev_status(md, param);

out:
	if (old_map) {
		dm_sync_table(md);
		dm_table_destroy(old_map);
	}

	dm_put(md);

	return r;
@@ -1301,6 +1337,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
{
	struct hash_cell *hc;
	struct mapped_device *md;
	struct dm_table *old_map = NULL;

	down_write(&_hash_lock);

@@ -1312,7 +1349,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
	}

	if (hc->new_map) {
		dm_table_destroy(hc->new_map);
		old_map = hc->new_map;
		hc->new_map = NULL;
	}

@@ -1321,6 +1358,10 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
	__dev_status(hc->md, param);
	md = hc->md;
	up_write(&_hash_lock);
	if (old_map) {
		dm_sync_table(md);
		dm_table_destroy(old_map);
	}
	dm_put(md);

	return 0;
@@ -1370,6 +1411,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
{
	struct mapped_device *md;
	struct dm_table *table;
	int srcu_idx;

	md = find_device(param);
	if (!md)
@@ -1377,11 +1419,10 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)

	__dev_status(md, param);

	table = dm_get_live_or_inactive_table(md, param);
	if (table) {
	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
	if (table)
		retrieve_deps(table, param, param_size);
		dm_table_put(table);
	}
	dm_put_live_table(md, srcu_idx);

	dm_put(md);

@@ -1396,6 +1437,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
{
	struct mapped_device *md;
	struct dm_table *table;
	int srcu_idx;

	md = find_device(param);
	if (!md)
@@ -1403,11 +1445,10 @@ static int table_status(struct dm_ioctl *param, size_t param_size)

	__dev_status(md, param);

	table = dm_get_live_or_inactive_table(md, param);
	if (table) {
	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
	if (table)
		retrieve_status(table, param, param_size);
		dm_table_put(table);
	}
	dm_put_live_table(md, srcu_idx);

	dm_put(md);

@@ -1443,6 +1484,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
	struct dm_target_msg *tmsg = (void *) param + param->data_start;
	size_t maxlen;
	char *result = get_result_buffer(param, param_size, &maxlen);
	int srcu_idx;

	md = find_device(param);
	if (!md)
@@ -1470,9 +1512,9 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
	if (r <= 1)
		goto out_argv;

	table = dm_get_live_table(md);
	table = dm_get_live_table(md, &srcu_idx);
	if (!table)
		goto out_argv;
		goto out_table;

	if (dm_deleting_md(md)) {
		r = -ENXIO;
@@ -1491,7 +1533,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
	}

 out_table:
	dm_table_put(table);
	dm_put_live_table(md, srcu_idx);
 out_argv:
	kfree(argv);
 out:
+0 −35
Original line number Diff line number Diff line
@@ -26,22 +26,8 @@
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)

/*
 * The table has always exactly one reference from either mapped_device->map
 * or hash_cell->new_map. This reference is not counted in table->holders.
 * A pair of dm_create_table/dm_destroy_table functions is used for table
 * creation/destruction.
 *
 * Temporary references from the other code increase table->holders. A pair
 * of dm_table_get/dm_table_put functions is used to manipulate it.
 *
 * When the table is about to be destroyed, we wait for table->holders to
 * drop to zero.
 */

struct dm_table {
	struct mapped_device *md;
	atomic_t holders;
	unsigned type;

	/* btree table */
@@ -208,7 +194,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,

	INIT_LIST_HEAD(&t->devices);
	INIT_LIST_HEAD(&t->target_callbacks);
	atomic_set(&t->holders, 0);

	if (!num_targets)
		num_targets = KEYS_PER_NODE;
@@ -246,10 +231,6 @@ void dm_table_destroy(struct dm_table *t)
	if (!t)
		return;

	while (atomic_read(&t->holders))
		msleep(1);
	smp_mb();

	/* free the indexes */
	if (t->depth >= 2)
		vfree(t->index[t->depth - 2]);
@@ -274,22 +255,6 @@ void dm_table_destroy(struct dm_table *t)
	kfree(t);
}

void dm_table_get(struct dm_table *t)
{
	atomic_inc(&t->holders);
}
EXPORT_SYMBOL(dm_table_get);

void dm_table_put(struct dm_table *t)
{
	if (!t)
		return;

	smp_mb__before_atomic_dec();
	atomic_dec(&t->holders);
}
EXPORT_SYMBOL(dm_table_put);

/*
 * Checks to see if we need to extend highs or targets.
 */
+93 −67
Original line number Diff line number Diff line
@@ -116,13 +116,20 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_MERGE_IS_OPTIONAL 6

/*
 * A dummy definition to make RCU happy.
 * struct dm_table should never be dereferenced in this file.
 */
struct dm_table {
	int undefined__;
};

/*
 * Work processed by per-device workqueue.
 */
struct mapped_device {
	struct rw_semaphore io_lock;
	struct srcu_struct io_barrier;
	struct mutex suspend_lock;
	rwlock_t map_lock;
	atomic_t holders;
	atomic_t open_count;

@@ -156,6 +163,8 @@ struct mapped_device {

	/*
	 * The current mapping.
	 * Use dm_get_live_table{_fast} or take suspend_lock for
	 * dereference.
	 */
	struct dm_table *map;

@@ -386,12 +395,14 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
			unsigned int cmd, unsigned long arg)
{
	struct mapped_device *md = bdev->bd_disk->private_data;
	int srcu_idx;
	struct dm_table *map;
	struct dm_target *tgt;
	int r = -ENOTTY;

retry:
	map = dm_get_live_table(md);
	map = dm_get_live_table(md, &srcu_idx);

	if (!map || !dm_table_get_size(map))
		goto out;

@@ -410,7 +421,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
		r = tgt->type->ioctl(tgt, cmd, arg);

out:
	dm_table_put(map);
	dm_put_live_table(md, srcu_idx);

	if (r == -ENOTCONN) {
		msleep(10);
@@ -509,20 +520,39 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
/*
 * Everyone (including functions in this file), should use this
 * function to access the md->map field, and make sure they call
 * dm_table_put() when finished.
 * dm_put_live_table() when finished.
 */
struct dm_table *dm_get_live_table(struct mapped_device *md)
struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
{
	struct dm_table *t;
	unsigned long flags;
	*srcu_idx = srcu_read_lock(&md->io_barrier);

	return srcu_dereference(md->map, &md->io_barrier);
}

	read_lock_irqsave(&md->map_lock, flags);
	t = md->map;
	if (t)
		dm_table_get(t);
	read_unlock_irqrestore(&md->map_lock, flags);
void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
{
	srcu_read_unlock(&md->io_barrier, srcu_idx);
}

	return t;
void dm_sync_table(struct mapped_device *md)
{
	synchronize_srcu(&md->io_barrier);
	synchronize_rcu_expedited();
}

/*
 * A fast alternative to dm_get_live_table/dm_put_live_table.
 * The caller must not block between these two functions.
 */
static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
{
	rcu_read_lock();
	return rcu_dereference(md->map);
}

static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
{
	rcu_read_unlock();
}

/*
@@ -1356,17 +1386,18 @@ static int __split_and_process_non_flush(struct clone_info *ci)
/*
 * Entry point to split a bio into clones and submit them to the targets.
 */
static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
static void __split_and_process_bio(struct mapped_device *md,
				    struct dm_table *map, struct bio *bio)
{
	struct clone_info ci;
	int error = 0;

	ci.map = dm_get_live_table(md);
	if (unlikely(!ci.map)) {
	if (unlikely(!map)) {
		bio_io_error(bio);
		return;
	}

	ci.map = map;
	ci.md = md;
	ci.io = alloc_io(md);
	ci.io->error = 0;
@@ -1393,7 +1424,6 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)

	/* drop the extra reference count */
	dec_pending(ci.io, error);
	dm_table_put(ci.map);
}
/*-----------------------------------------------------------------
 * CRUD END
@@ -1404,7 +1434,7 @@ static int dm_merge_bvec(struct request_queue *q,
			 struct bio_vec *biovec)
{
	struct mapped_device *md = q->queuedata;
	struct dm_table *map = dm_get_live_table(md);
	struct dm_table *map = dm_get_live_table_fast(md);
	struct dm_target *ti;
	sector_t max_sectors;
	int max_size = 0;
@@ -1414,7 +1444,7 @@ static int dm_merge_bvec(struct request_queue *q,

	ti = dm_table_find_target(map, bvm->bi_sector);
	if (!dm_target_is_valid(ti))
		goto out_table;
		goto out;

	/*
	 * Find maximum amount of I/O that won't need splitting
@@ -1443,10 +1473,8 @@ static int dm_merge_bvec(struct request_queue *q,

		max_size = 0;

out_table:
	dm_table_put(map);

out:
	dm_put_live_table_fast(md);
	/*
	 * Always allow an entire first page
	 */
@@ -1465,8 +1493,10 @@ static void _dm_request(struct request_queue *q, struct bio *bio)
	int rw = bio_data_dir(bio);
	struct mapped_device *md = q->queuedata;
	int cpu;
	int srcu_idx;
	struct dm_table *map;

	down_read(&md->io_lock);
	map = dm_get_live_table(md, &srcu_idx);

	cpu = part_stat_lock();
	part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
@@ -1475,7 +1505,7 @@ static void _dm_request(struct request_queue *q, struct bio *bio)

	/* if we're suspended, we have to queue this io for later */
	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
		up_read(&md->io_lock);
		dm_put_live_table(md, srcu_idx);

		if (bio_rw(bio) != READA)
			queue_io(md, bio);
@@ -1484,8 +1514,8 @@ static void _dm_request(struct request_queue *q, struct bio *bio)
		return;
	}

	__split_and_process_bio(md, bio);
	up_read(&md->io_lock);
	__split_and_process_bio(md, map, bio);
	dm_put_live_table(md, srcu_idx);
	return;
}

@@ -1671,7 +1701,8 @@ static struct request *dm_start_request(struct mapped_device *md, struct request
static void dm_request_fn(struct request_queue *q)
{
	struct mapped_device *md = q->queuedata;
	struct dm_table *map = dm_get_live_table(md);
	int srcu_idx;
	struct dm_table *map = dm_get_live_table(md, &srcu_idx);
	struct dm_target *ti;
	struct request *rq, *clone;
	sector_t pos;
@@ -1726,7 +1757,7 @@ static void dm_request_fn(struct request_queue *q)
delay_and_out:
	blk_delay_queue(q, HZ / 10);
out:
	dm_table_put(map);
	dm_put_live_table(md, srcu_idx);
}

int dm_underlying_device_busy(struct request_queue *q)
@@ -1739,14 +1770,14 @@ static int dm_lld_busy(struct request_queue *q)
{
	int r;
	struct mapped_device *md = q->queuedata;
	struct dm_table *map = dm_get_live_table(md);
	struct dm_table *map = dm_get_live_table_fast(md);

	if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
		r = 1;
	else
		r = dm_table_any_busy_target(map);

	dm_table_put(map);
	dm_put_live_table_fast(md);

	return r;
}
@@ -1758,7 +1789,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
	struct dm_table *map;

	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
		map = dm_get_live_table(md);
		map = dm_get_live_table_fast(md);
		if (map) {
			/*
			 * Request-based dm cares about only own queue for
@@ -1769,9 +1800,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
				    bdi_bits;
			else
				r = dm_table_any_congested(map, bdi_bits);

			dm_table_put(map);
		}
		dm_put_live_table_fast(md);
	}

	return r;
@@ -1876,12 +1906,14 @@ static struct mapped_device *alloc_dev(int minor)
	if (r < 0)
		goto bad_minor;

	r = init_srcu_struct(&md->io_barrier);
	if (r < 0)
		goto bad_io_barrier;

	md->type = DM_TYPE_NONE;
	init_rwsem(&md->io_lock);
	mutex_init(&md->suspend_lock);
	mutex_init(&md->type_lock);
	spin_lock_init(&md->deferred_lock);
	rwlock_init(&md->map_lock);
	atomic_set(&md->holders, 1);
	atomic_set(&md->open_count, 0);
	atomic_set(&md->event_nr, 0);
@@ -1944,6 +1976,8 @@ static struct mapped_device *alloc_dev(int minor)
bad_disk:
	blk_cleanup_queue(md->queue);
bad_queue:
	cleanup_srcu_struct(&md->io_barrier);
bad_io_barrier:
	free_minor(minor);
bad_minor:
	module_put(THIS_MODULE);
@@ -1967,6 +2001,7 @@ static void free_dev(struct mapped_device *md)
		bioset_free(md->bs);
	blk_integrity_unregister(md->disk);
	del_gendisk(md->disk);
	cleanup_srcu_struct(&md->io_barrier);
	free_minor(minor);

	spin_lock(&_minor_lock);
@@ -2109,7 +2144,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
	struct dm_table *old_map;
	struct request_queue *q = md->queue;
	sector_t size;
	unsigned long flags;
	int merge_is_optional;

	size = dm_table_get_size(t);
@@ -2138,9 +2172,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,

	merge_is_optional = dm_table_merge_is_optional(t);

	write_lock_irqsave(&md->map_lock, flags);
	old_map = md->map;
	md->map = t;
	rcu_assign_pointer(md->map, t);
	md->immutable_target_type = dm_table_get_immutable_target_type(t);

	dm_table_set_restrictions(t, q, limits);
@@ -2148,7 +2181,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
		set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
	else
		clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
	write_unlock_irqrestore(&md->map_lock, flags);
	dm_sync_table(md);

	return old_map;
}
@@ -2159,15 +2192,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
static struct dm_table *__unbind(struct mapped_device *md)
{
	struct dm_table *map = md->map;
	unsigned long flags;

	if (!map)
		return NULL;

	dm_table_event_callback(map, NULL, NULL);
	write_lock_irqsave(&md->map_lock, flags);
	md->map = NULL;
	write_unlock_irqrestore(&md->map_lock, flags);
	rcu_assign_pointer(md->map, NULL);
	dm_sync_table(md);

	return map;
}
@@ -2319,11 +2350,12 @@ EXPORT_SYMBOL_GPL(dm_device_name);
static void __dm_destroy(struct mapped_device *md, bool wait)
{
	struct dm_table *map;
	int srcu_idx;

	might_sleep();

	spin_lock(&_minor_lock);
	map = dm_get_live_table(md);
	map = dm_get_live_table(md, &srcu_idx);
	idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
	set_bit(DMF_FREEING, &md->flags);
	spin_unlock(&_minor_lock);
@@ -2333,6 +2365,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
		dm_table_postsuspend_targets(map);
	}

	/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
	dm_put_live_table(md, srcu_idx);

	/*
	 * Rare, but there may be I/O requests still going to complete,
	 * for example.  Wait for all references to disappear.
@@ -2347,7 +2382,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
		       dm_device_name(md), atomic_read(&md->holders));

	dm_sysfs_exit(md);
	dm_table_put(map);
	dm_table_destroy(__unbind(md));
	free_dev(md);
}
@@ -2404,8 +2438,10 @@ static void dm_wq_work(struct work_struct *work)
	struct mapped_device *md = container_of(work, struct mapped_device,
						work);
	struct bio *c;
	int srcu_idx;
	struct dm_table *map;

	down_read(&md->io_lock);
	map = dm_get_live_table(md, &srcu_idx);

	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
		spin_lock_irq(&md->deferred_lock);
@@ -2415,17 +2451,13 @@ static void dm_wq_work(struct work_struct *work)
		if (!c)
			break;

		up_read(&md->io_lock);

		if (dm_request_based(md))
			generic_make_request(c);
		else
			__split_and_process_bio(md, c);

		down_read(&md->io_lock);
			__split_and_process_bio(md, map, c);
	}

	up_read(&md->io_lock);
	dm_put_live_table(md, srcu_idx);
}

static void dm_queue_flush(struct mapped_device *md)
@@ -2457,10 +2489,10 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
	 * reappear.
	 */
	if (dm_table_has_no_data_devices(table)) {
		live_map = dm_get_live_table(md);
		live_map = dm_get_live_table_fast(md);
		if (live_map)
			limits = md->queue->limits;
		dm_table_put(live_map);
		dm_put_live_table_fast(md);
	}

	if (!live_map) {
@@ -2540,7 +2572,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
		goto out_unlock;
	}

	map = dm_get_live_table(md);
	map = md->map;

	/*
	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2561,7 +2593,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
	if (!noflush && do_lockfs) {
		r = lock_fs(md);
		if (r)
			goto out;
			goto out_unlock;
	}

	/*
@@ -2576,9 +2608,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
	 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
	 * flush_workqueue(md->wq).
	 */
	down_write(&md->io_lock);
	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
	up_write(&md->io_lock);
	synchronize_srcu(&md->io_barrier);

	/*
	 * Stop md->queue before flushing md->wq in case request-based
@@ -2596,10 +2627,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
	 */
	r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);

	down_write(&md->io_lock);
	if (noflush)
		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
	up_write(&md->io_lock);
	synchronize_srcu(&md->io_barrier);

	/* were we interrupted ? */
	if (r < 0) {
@@ -2609,7 +2639,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
			start_queue(md->queue);

		unlock_fs(md);
		goto out; /* pushback list is already flushed, so skip flush */
		goto out_unlock; /* pushback list is already flushed, so skip flush */
	}

	/*
@@ -2622,9 +2652,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)

	dm_table_postsuspend_targets(map);

out:
	dm_table_put(map);

out_unlock:
	mutex_unlock(&md->suspend_lock);
	return r;
@@ -2639,7 +2666,7 @@ int dm_resume(struct mapped_device *md)
	if (!dm_suspended_md(md))
		goto out;

	map = dm_get_live_table(md);
	map = md->map;
	if (!map || !dm_table_get_size(map))
		goto out;

@@ -2663,7 +2690,6 @@ int dm_resume(struct mapped_device *md)

	r = 0;
out:
	dm_table_put(map);
	mutex_unlock(&md->suspend_lock);

	return r;
+3 −3
Original line number Diff line number Diff line
@@ -446,9 +446,9 @@ int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len);
/*
 * Table reference counting.
 */
struct dm_table *dm_get_live_table(struct mapped_device *md);
void dm_table_get(struct dm_table *t);
void dm_table_put(struct dm_table *t);
struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx);
void dm_put_live_table(struct mapped_device *md, int srcu_idx);
void dm_sync_table(struct mapped_device *md);

/*
 * Queries
+2 −2
Original line number Diff line number Diff line
@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)

#define DM_VERSION_MAJOR	4
#define DM_VERSION_MINOR	24
#define DM_VERSION_MINOR	25
#define DM_VERSION_PATCHLEVEL	0
#define DM_VERSION_EXTRA	"-ioctl (2013-01-15)"
#define DM_VERSION_EXTRA	"-ioctl (2013-06-26)"

/* Status bits */
#define DM_READONLY_FLAG	(1 << 0) /* In/Out */