Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 83d5e5b0 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Alasdair G Kergon
Browse files

dm: optimize use SRCU and RCU



This patch removes "io_lock" and "map_lock" in struct mapped_device and
"holders" in struct dm_table and replaces these mechanisms with
sleepable-rcu.

Previously, the code would call "dm_get_live_table" and "dm_table_put" to
get and release table. Now, the code is changed to call "dm_get_live_table"
and "dm_put_live_table". dm_get_live_table locks sleepable-rcu and
dm_put_live_table unlocks it.

dm_get_live_table_fast/dm_put_live_table_fast can be used instead of
dm_get_live_table/dm_put_live_table. These *_fast functions use
non-sleepable RCU, so the caller must not block between them.

If the code changes active or inactive dm table, it must call
dm_sync_table before destroying the old table.

Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarJun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: default avatarAlasdair G Kergon <agk@redhat.com>
parent 2480945c
Loading
Loading
Loading
Loading
+82 −40
Original line number Original line Diff line number Diff line
@@ -36,6 +36,14 @@ struct hash_cell {
	struct dm_table *new_map;
	struct dm_table *new_map;
};
};


/*
 * A dummy definition to make RCU happy.
 * struct dm_table should never be dereferenced in this file.
 */
struct dm_table {
	int undefined__;
};

struct vers_iter {
struct vers_iter {
    size_t param_size;
    size_t param_size;
    struct dm_target_versions *vers, *old_vers;
    struct dm_target_versions *vers, *old_vers;
@@ -242,9 +250,10 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
	return -EBUSY;
	return -EBUSY;
}
}


static void __hash_remove(struct hash_cell *hc)
static struct dm_table *__hash_remove(struct hash_cell *hc)
{
{
	struct dm_table *table;
	struct dm_table *table;
	int srcu_idx;


	/* remove from the dev hash */
	/* remove from the dev hash */
	list_del(&hc->uuid_list);
	list_del(&hc->uuid_list);
@@ -253,16 +262,18 @@ static void __hash_remove(struct hash_cell *hc)
	dm_set_mdptr(hc->md, NULL);
	dm_set_mdptr(hc->md, NULL);
	mutex_unlock(&dm_hash_cells_mutex);
	mutex_unlock(&dm_hash_cells_mutex);


	table = dm_get_live_table(hc->md);
	table = dm_get_live_table(hc->md, &srcu_idx);
	if (table) {
	if (table)
		dm_table_event(table);
		dm_table_event(table);
		dm_table_put(table);
	dm_put_live_table(hc->md, srcu_idx);
	}


	table = NULL;
	if (hc->new_map)
	if (hc->new_map)
		dm_table_destroy(hc->new_map);
		table = hc->new_map;
	dm_put(hc->md);
	dm_put(hc->md);
	free_cell(hc);
	free_cell(hc);

	return table;
}
}


static void dm_hash_remove_all(int keep_open_devices)
static void dm_hash_remove_all(int keep_open_devices)
@@ -270,6 +281,7 @@ static void dm_hash_remove_all(int keep_open_devices)
	int i, dev_skipped;
	int i, dev_skipped;
	struct hash_cell *hc;
	struct hash_cell *hc;
	struct mapped_device *md;
	struct mapped_device *md;
	struct dm_table *t;


retry:
retry:
	dev_skipped = 0;
	dev_skipped = 0;
@@ -287,10 +299,14 @@ static void dm_hash_remove_all(int keep_open_devices)
				continue;
				continue;
			}
			}


			__hash_remove(hc);
			t = __hash_remove(hc);


			up_write(&_hash_lock);
			up_write(&_hash_lock);


			if (t) {
				dm_sync_table(md);
				dm_table_destroy(t);
			}
			dm_put(md);
			dm_put(md);
			if (likely(keep_open_devices))
			if (likely(keep_open_devices))
				dm_destroy(md);
				dm_destroy(md);
@@ -356,6 +372,7 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
	struct dm_table *table;
	struct dm_table *table;
	struct mapped_device *md;
	struct mapped_device *md;
	unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
	unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
	int srcu_idx;


	/*
	/*
	 * duplicate new.
	 * duplicate new.
@@ -418,11 +435,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
	/*
	/*
	 * Wake up any dm event waiters.
	 * Wake up any dm event waiters.
	 */
	 */
	table = dm_get_live_table(hc->md);
	table = dm_get_live_table(hc->md, &srcu_idx);
	if (table) {
	if (table)
		dm_table_event(table);
		dm_table_event(table);
		dm_table_put(table);
	dm_put_live_table(hc->md, srcu_idx);
	}


	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
		param->flags |= DM_UEVENT_GENERATED_FLAG;
		param->flags |= DM_UEVENT_GENERATED_FLAG;
@@ -620,11 +636,14 @@ static int check_name(const char *name)
 * _hash_lock without first calling dm_table_put, because dm_table_destroy
 * _hash_lock without first calling dm_table_put, because dm_table_destroy
 * waits for this dm_table_put and could be called under this lock.
 * waits for this dm_table_put and could be called under this lock.
 */
 */
static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx)
{
{
	struct hash_cell *hc;
	struct hash_cell *hc;
	struct dm_table *table = NULL;
	struct dm_table *table = NULL;


	/* increment rcu count, we don't care about the table pointer */
	dm_get_live_table(md, srcu_idx);

	down_read(&_hash_lock);
	down_read(&_hash_lock);
	hc = dm_get_mdptr(md);
	hc = dm_get_mdptr(md);
	if (!hc || hc->md != md) {
	if (!hc || hc->md != md) {
@@ -633,8 +652,6 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
	}
	}


	table = hc->new_map;
	table = hc->new_map;
	if (table)
		dm_table_get(table);


out:
out:
	up_read(&_hash_lock);
	up_read(&_hash_lock);
@@ -643,10 +660,11 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
}
}


static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
						      struct dm_ioctl *param)
						      struct dm_ioctl *param,
						      int *srcu_idx)
{
{
	return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ?
	return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ?
		dm_get_inactive_table(md) : dm_get_live_table(md);
		dm_get_inactive_table(md, srcu_idx) : dm_get_live_table(md, srcu_idx);
}
}


/*
/*
@@ -657,6 +675,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
{
{
	struct gendisk *disk = dm_disk(md);
	struct gendisk *disk = dm_disk(md);
	struct dm_table *table;
	struct dm_table *table;
	int srcu_idx;


	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
			  DM_ACTIVE_PRESENT_FLAG);
			  DM_ACTIVE_PRESENT_FLAG);
@@ -676,26 +695,27 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
	param->event_nr = dm_get_event_nr(md);
	param->event_nr = dm_get_event_nr(md);
	param->target_count = 0;
	param->target_count = 0;


	table = dm_get_live_table(md);
	table = dm_get_live_table(md, &srcu_idx);
	if (table) {
	if (table) {
		if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
		if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
			if (get_disk_ro(disk))
			if (get_disk_ro(disk))
				param->flags |= DM_READONLY_FLAG;
				param->flags |= DM_READONLY_FLAG;
			param->target_count = dm_table_get_num_targets(table);
			param->target_count = dm_table_get_num_targets(table);
		}
		}
		dm_table_put(table);


		param->flags |= DM_ACTIVE_PRESENT_FLAG;
		param->flags |= DM_ACTIVE_PRESENT_FLAG;
	}
	}
	dm_put_live_table(md, srcu_idx);


	if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) {
	if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) {
		table = dm_get_inactive_table(md);
		int srcu_idx;
		table = dm_get_inactive_table(md, &srcu_idx);
		if (table) {
		if (table) {
			if (!(dm_table_get_mode(table) & FMODE_WRITE))
			if (!(dm_table_get_mode(table) & FMODE_WRITE))
				param->flags |= DM_READONLY_FLAG;
				param->flags |= DM_READONLY_FLAG;
			param->target_count = dm_table_get_num_targets(table);
			param->target_count = dm_table_get_num_targets(table);
			dm_table_put(table);
		}
		}
		dm_put_live_table(md, srcu_idx);
	}
	}
}
}


@@ -796,6 +816,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
	struct hash_cell *hc;
	struct hash_cell *hc;
	struct mapped_device *md;
	struct mapped_device *md;
	int r;
	int r;
	struct dm_table *t;


	down_write(&_hash_lock);
	down_write(&_hash_lock);
	hc = __find_device_hash_cell(param);
	hc = __find_device_hash_cell(param);
@@ -819,9 +840,14 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
		return r;
		return r;
	}
	}


	__hash_remove(hc);
	t = __hash_remove(hc);
	up_write(&_hash_lock);
	up_write(&_hash_lock);


	if (t) {
		dm_sync_table(md);
		dm_table_destroy(t);
	}

	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
		param->flags |= DM_UEVENT_GENERATED_FLAG;
		param->flags |= DM_UEVENT_GENERATED_FLAG;


@@ -986,6 +1012,7 @@ static int do_resume(struct dm_ioctl *param)


		old_map = dm_swap_table(md, new_map);
		old_map = dm_swap_table(md, new_map);
		if (IS_ERR(old_map)) {
		if (IS_ERR(old_map)) {
			dm_sync_table(md);
			dm_table_destroy(new_map);
			dm_table_destroy(new_map);
			dm_put(md);
			dm_put(md);
			return PTR_ERR(old_map);
			return PTR_ERR(old_map);
@@ -1003,6 +1030,10 @@ static int do_resume(struct dm_ioctl *param)
			param->flags |= DM_UEVENT_GENERATED_FLAG;
			param->flags |= DM_UEVENT_GENERATED_FLAG;
	}
	}


	/*
	 * Since dm_swap_table synchronizes RCU, nobody should be in
	 * read-side critical section already.
	 */
	if (old_map)
	if (old_map)
		dm_table_destroy(old_map);
		dm_table_destroy(old_map);


@@ -1125,6 +1156,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
	int r = 0;
	int r = 0;
	struct mapped_device *md;
	struct mapped_device *md;
	struct dm_table *table;
	struct dm_table *table;
	int srcu_idx;


	md = find_device(param);
	md = find_device(param);
	if (!md)
	if (!md)
@@ -1145,11 +1177,10 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
	 */
	 */
	__dev_status(md, param);
	__dev_status(md, param);


	table = dm_get_live_or_inactive_table(md, param);
	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
	if (table) {
	if (table)
		retrieve_status(table, param, param_size);
		retrieve_status(table, param, param_size);
		dm_table_put(table);
	dm_put_live_table(md, srcu_idx);
	}


out:
out:
	dm_put(md);
	dm_put(md);
@@ -1221,7 +1252,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
{
{
	int r;
	int r;
	struct hash_cell *hc;
	struct hash_cell *hc;
	struct dm_table *t;
	struct dm_table *t, *old_map = NULL;
	struct mapped_device *md;
	struct mapped_device *md;
	struct target_type *immutable_target_type;
	struct target_type *immutable_target_type;


@@ -1277,14 +1308,14 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
	hc = dm_get_mdptr(md);
	hc = dm_get_mdptr(md);
	if (!hc || hc->md != md) {
	if (!hc || hc->md != md) {
		DMWARN("device has been removed from the dev hash table.");
		DMWARN("device has been removed from the dev hash table.");
		dm_table_destroy(t);
		up_write(&_hash_lock);
		up_write(&_hash_lock);
		dm_table_destroy(t);
		r = -ENXIO;
		r = -ENXIO;
		goto out;
		goto out;
	}
	}


	if (hc->new_map)
	if (hc->new_map)
		dm_table_destroy(hc->new_map);
		old_map = hc->new_map;
	hc->new_map = t;
	hc->new_map = t;
	up_write(&_hash_lock);
	up_write(&_hash_lock);


@@ -1292,6 +1323,11 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
	__dev_status(md, param);
	__dev_status(md, param);


out:
out:
	if (old_map) {
		dm_sync_table(md);
		dm_table_destroy(old_map);
	}

	dm_put(md);
	dm_put(md);


	return r;
	return r;
@@ -1301,6 +1337,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
{
{
	struct hash_cell *hc;
	struct hash_cell *hc;
	struct mapped_device *md;
	struct mapped_device *md;
	struct dm_table *old_map = NULL;


	down_write(&_hash_lock);
	down_write(&_hash_lock);


@@ -1312,7 +1349,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
	}
	}


	if (hc->new_map) {
	if (hc->new_map) {
		dm_table_destroy(hc->new_map);
		old_map = hc->new_map;
		hc->new_map = NULL;
		hc->new_map = NULL;
	}
	}


@@ -1321,6 +1358,10 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
	__dev_status(hc->md, param);
	__dev_status(hc->md, param);
	md = hc->md;
	md = hc->md;
	up_write(&_hash_lock);
	up_write(&_hash_lock);
	if (old_map) {
		dm_sync_table(md);
		dm_table_destroy(old_map);
	}
	dm_put(md);
	dm_put(md);


	return 0;
	return 0;
@@ -1370,6 +1411,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
{
{
	struct mapped_device *md;
	struct mapped_device *md;
	struct dm_table *table;
	struct dm_table *table;
	int srcu_idx;


	md = find_device(param);
	md = find_device(param);
	if (!md)
	if (!md)
@@ -1377,11 +1419,10 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)


	__dev_status(md, param);
	__dev_status(md, param);


	table = dm_get_live_or_inactive_table(md, param);
	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
	if (table) {
	if (table)
		retrieve_deps(table, param, param_size);
		retrieve_deps(table, param, param_size);
		dm_table_put(table);
	dm_put_live_table(md, srcu_idx);
	}


	dm_put(md);
	dm_put(md);


@@ -1396,6 +1437,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
{
{
	struct mapped_device *md;
	struct mapped_device *md;
	struct dm_table *table;
	struct dm_table *table;
	int srcu_idx;


	md = find_device(param);
	md = find_device(param);
	if (!md)
	if (!md)
@@ -1403,11 +1445,10 @@ static int table_status(struct dm_ioctl *param, size_t param_size)


	__dev_status(md, param);
	__dev_status(md, param);


	table = dm_get_live_or_inactive_table(md, param);
	table = dm_get_live_or_inactive_table(md, param, &srcu_idx);
	if (table) {
	if (table)
		retrieve_status(table, param, param_size);
		retrieve_status(table, param, param_size);
		dm_table_put(table);
	dm_put_live_table(md, srcu_idx);
	}


	dm_put(md);
	dm_put(md);


@@ -1443,6 +1484,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
	struct dm_target_msg *tmsg = (void *) param + param->data_start;
	struct dm_target_msg *tmsg = (void *) param + param->data_start;
	size_t maxlen;
	size_t maxlen;
	char *result = get_result_buffer(param, param_size, &maxlen);
	char *result = get_result_buffer(param, param_size, &maxlen);
	int srcu_idx;


	md = find_device(param);
	md = find_device(param);
	if (!md)
	if (!md)
@@ -1470,9 +1512,9 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
	if (r <= 1)
	if (r <= 1)
		goto out_argv;
		goto out_argv;


	table = dm_get_live_table(md);
	table = dm_get_live_table(md, &srcu_idx);
	if (!table)
	if (!table)
		goto out_argv;
		goto out_table;


	if (dm_deleting_md(md)) {
	if (dm_deleting_md(md)) {
		r = -ENXIO;
		r = -ENXIO;
@@ -1491,7 +1533,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
	}
	}


 out_table:
 out_table:
	dm_table_put(table);
	dm_put_live_table(md, srcu_idx);
 out_argv:
 out_argv:
	kfree(argv);
	kfree(argv);
 out:
 out:
+0 −35
Original line number Original line Diff line number Diff line
@@ -26,22 +26,8 @@
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)


/*
 * The table has always exactly one reference from either mapped_device->map
 * or hash_cell->new_map. This reference is not counted in table->holders.
 * A pair of dm_create_table/dm_destroy_table functions is used for table
 * creation/destruction.
 *
 * Temporary references from the other code increase table->holders. A pair
 * of dm_table_get/dm_table_put functions is used to manipulate it.
 *
 * When the table is about to be destroyed, we wait for table->holders to
 * drop to zero.
 */

struct dm_table {
struct dm_table {
	struct mapped_device *md;
	struct mapped_device *md;
	atomic_t holders;
	unsigned type;
	unsigned type;


	/* btree table */
	/* btree table */
@@ -208,7 +194,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,


	INIT_LIST_HEAD(&t->devices);
	INIT_LIST_HEAD(&t->devices);
	INIT_LIST_HEAD(&t->target_callbacks);
	INIT_LIST_HEAD(&t->target_callbacks);
	atomic_set(&t->holders, 0);


	if (!num_targets)
	if (!num_targets)
		num_targets = KEYS_PER_NODE;
		num_targets = KEYS_PER_NODE;
@@ -246,10 +231,6 @@ void dm_table_destroy(struct dm_table *t)
	if (!t)
	if (!t)
		return;
		return;


	while (atomic_read(&t->holders))
		msleep(1);
	smp_mb();

	/* free the indexes */
	/* free the indexes */
	if (t->depth >= 2)
	if (t->depth >= 2)
		vfree(t->index[t->depth - 2]);
		vfree(t->index[t->depth - 2]);
@@ -274,22 +255,6 @@ void dm_table_destroy(struct dm_table *t)
	kfree(t);
	kfree(t);
}
}


void dm_table_get(struct dm_table *t)
{
	atomic_inc(&t->holders);
}
EXPORT_SYMBOL(dm_table_get);

void dm_table_put(struct dm_table *t)
{
	if (!t)
		return;

	smp_mb__before_atomic_dec();
	atomic_dec(&t->holders);
}
EXPORT_SYMBOL(dm_table_put);

/*
/*
 * Checks to see if we need to extend highs or targets.
 * Checks to see if we need to extend highs or targets.
 */
 */
+93 −67
Original line number Original line Diff line number Diff line
@@ -116,13 +116,20 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_MERGE_IS_OPTIONAL 6
#define DMF_MERGE_IS_OPTIONAL 6


/*
 * A dummy definition to make RCU happy.
 * struct dm_table should never be dereferenced in this file.
 */
struct dm_table {
	int undefined__;
};

/*
/*
 * Work processed by per-device workqueue.
 * Work processed by per-device workqueue.
 */
 */
struct mapped_device {
struct mapped_device {
	struct rw_semaphore io_lock;
	struct srcu_struct io_barrier;
	struct mutex suspend_lock;
	struct mutex suspend_lock;
	rwlock_t map_lock;
	atomic_t holders;
	atomic_t holders;
	atomic_t open_count;
	atomic_t open_count;


@@ -156,6 +163,8 @@ struct mapped_device {


	/*
	/*
	 * The current mapping.
	 * The current mapping.
	 * Use dm_get_live_table{_fast} or take suspend_lock for
	 * dereference.
	 */
	 */
	struct dm_table *map;
	struct dm_table *map;


@@ -386,12 +395,14 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
			unsigned int cmd, unsigned long arg)
			unsigned int cmd, unsigned long arg)
{
{
	struct mapped_device *md = bdev->bd_disk->private_data;
	struct mapped_device *md = bdev->bd_disk->private_data;
	int srcu_idx;
	struct dm_table *map;
	struct dm_table *map;
	struct dm_target *tgt;
	struct dm_target *tgt;
	int r = -ENOTTY;
	int r = -ENOTTY;


retry:
retry:
	map = dm_get_live_table(md);
	map = dm_get_live_table(md, &srcu_idx);

	if (!map || !dm_table_get_size(map))
	if (!map || !dm_table_get_size(map))
		goto out;
		goto out;


@@ -410,7 +421,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
		r = tgt->type->ioctl(tgt, cmd, arg);
		r = tgt->type->ioctl(tgt, cmd, arg);


out:
out:
	dm_table_put(map);
	dm_put_live_table(md, srcu_idx);


	if (r == -ENOTCONN) {
	if (r == -ENOTCONN) {
		msleep(10);
		msleep(10);
@@ -509,20 +520,39 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
/*
/*
 * Everyone (including functions in this file), should use this
 * Everyone (including functions in this file), should use this
 * function to access the md->map field, and make sure they call
 * function to access the md->map field, and make sure they call
 * dm_table_put() when finished.
 * dm_put_live_table() when finished.
 */
 */
struct dm_table *dm_get_live_table(struct mapped_device *md)
struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
{
{
	struct dm_table *t;
	*srcu_idx = srcu_read_lock(&md->io_barrier);
	unsigned long flags;

	return srcu_dereference(md->map, &md->io_barrier);
}


	read_lock_irqsave(&md->map_lock, flags);
void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
	t = md->map;
{
	if (t)
	srcu_read_unlock(&md->io_barrier, srcu_idx);
		dm_table_get(t);
}
	read_unlock_irqrestore(&md->map_lock, flags);


	return t;
void dm_sync_table(struct mapped_device *md)
{
	synchronize_srcu(&md->io_barrier);
	synchronize_rcu_expedited();
}

/*
 * A fast alternative to dm_get_live_table/dm_put_live_table.
 * The caller must not block between these two functions.
 */
static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
{
	rcu_read_lock();
	return rcu_dereference(md->map);
}

static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
{
	rcu_read_unlock();
}
}


/*
/*
@@ -1356,17 +1386,18 @@ static int __split_and_process_non_flush(struct clone_info *ci)
/*
/*
 * Entry point to split a bio into clones and submit them to the targets.
 * Entry point to split a bio into clones and submit them to the targets.
 */
 */
static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
static void __split_and_process_bio(struct mapped_device *md,
				    struct dm_table *map, struct bio *bio)
{
{
	struct clone_info ci;
	struct clone_info ci;
	int error = 0;
	int error = 0;


	ci.map = dm_get_live_table(md);
	if (unlikely(!map)) {
	if (unlikely(!ci.map)) {
		bio_io_error(bio);
		bio_io_error(bio);
		return;
		return;
	}
	}


	ci.map = map;
	ci.md = md;
	ci.md = md;
	ci.io = alloc_io(md);
	ci.io = alloc_io(md);
	ci.io->error = 0;
	ci.io->error = 0;
@@ -1393,7 +1424,6 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)


	/* drop the extra reference count */
	/* drop the extra reference count */
	dec_pending(ci.io, error);
	dec_pending(ci.io, error);
	dm_table_put(ci.map);
}
}
/*-----------------------------------------------------------------
/*-----------------------------------------------------------------
 * CRUD END
 * CRUD END
@@ -1404,7 +1434,7 @@ static int dm_merge_bvec(struct request_queue *q,
			 struct bio_vec *biovec)
			 struct bio_vec *biovec)
{
{
	struct mapped_device *md = q->queuedata;
	struct mapped_device *md = q->queuedata;
	struct dm_table *map = dm_get_live_table(md);
	struct dm_table *map = dm_get_live_table_fast(md);
	struct dm_target *ti;
	struct dm_target *ti;
	sector_t max_sectors;
	sector_t max_sectors;
	int max_size = 0;
	int max_size = 0;
@@ -1414,7 +1444,7 @@ static int dm_merge_bvec(struct request_queue *q,


	ti = dm_table_find_target(map, bvm->bi_sector);
	ti = dm_table_find_target(map, bvm->bi_sector);
	if (!dm_target_is_valid(ti))
	if (!dm_target_is_valid(ti))
		goto out_table;
		goto out;


	/*
	/*
	 * Find maximum amount of I/O that won't need splitting
	 * Find maximum amount of I/O that won't need splitting
@@ -1443,10 +1473,8 @@ static int dm_merge_bvec(struct request_queue *q,


		max_size = 0;
		max_size = 0;


out_table:
	dm_table_put(map);

out:
out:
	dm_put_live_table_fast(md);
	/*
	/*
	 * Always allow an entire first page
	 * Always allow an entire first page
	 */
	 */
@@ -1465,8 +1493,10 @@ static void _dm_request(struct request_queue *q, struct bio *bio)
	int rw = bio_data_dir(bio);
	int rw = bio_data_dir(bio);
	struct mapped_device *md = q->queuedata;
	struct mapped_device *md = q->queuedata;
	int cpu;
	int cpu;
	int srcu_idx;
	struct dm_table *map;


	down_read(&md->io_lock);
	map = dm_get_live_table(md, &srcu_idx);


	cpu = part_stat_lock();
	cpu = part_stat_lock();
	part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
	part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
@@ -1475,7 +1505,7 @@ static void _dm_request(struct request_queue *q, struct bio *bio)


	/* if we're suspended, we have to queue this io for later */
	/* if we're suspended, we have to queue this io for later */
	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
		up_read(&md->io_lock);
		dm_put_live_table(md, srcu_idx);


		if (bio_rw(bio) != READA)
		if (bio_rw(bio) != READA)
			queue_io(md, bio);
			queue_io(md, bio);
@@ -1484,8 +1514,8 @@ static void _dm_request(struct request_queue *q, struct bio *bio)
		return;
		return;
	}
	}


	__split_and_process_bio(md, bio);
	__split_and_process_bio(md, map, bio);
	up_read(&md->io_lock);
	dm_put_live_table(md, srcu_idx);
	return;
	return;
}
}


@@ -1671,7 +1701,8 @@ static struct request *dm_start_request(struct mapped_device *md, struct request
static void dm_request_fn(struct request_queue *q)
static void dm_request_fn(struct request_queue *q)
{
{
	struct mapped_device *md = q->queuedata;
	struct mapped_device *md = q->queuedata;
	struct dm_table *map = dm_get_live_table(md);
	int srcu_idx;
	struct dm_table *map = dm_get_live_table(md, &srcu_idx);
	struct dm_target *ti;
	struct dm_target *ti;
	struct request *rq, *clone;
	struct request *rq, *clone;
	sector_t pos;
	sector_t pos;
@@ -1726,7 +1757,7 @@ static void dm_request_fn(struct request_queue *q)
delay_and_out:
delay_and_out:
	blk_delay_queue(q, HZ / 10);
	blk_delay_queue(q, HZ / 10);
out:
out:
	dm_table_put(map);
	dm_put_live_table(md, srcu_idx);
}
}


int dm_underlying_device_busy(struct request_queue *q)
int dm_underlying_device_busy(struct request_queue *q)
@@ -1739,14 +1770,14 @@ static int dm_lld_busy(struct request_queue *q)
{
{
	int r;
	int r;
	struct mapped_device *md = q->queuedata;
	struct mapped_device *md = q->queuedata;
	struct dm_table *map = dm_get_live_table(md);
	struct dm_table *map = dm_get_live_table_fast(md);


	if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
	if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
		r = 1;
		r = 1;
	else
	else
		r = dm_table_any_busy_target(map);
		r = dm_table_any_busy_target(map);


	dm_table_put(map);
	dm_put_live_table_fast(md);


	return r;
	return r;
}
}
@@ -1758,7 +1789,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
	struct dm_table *map;
	struct dm_table *map;


	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
		map = dm_get_live_table(md);
		map = dm_get_live_table_fast(md);
		if (map) {
		if (map) {
			/*
			/*
			 * Request-based dm cares about only own queue for
			 * Request-based dm cares about only own queue for
@@ -1769,9 +1800,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
				    bdi_bits;
				    bdi_bits;
			else
			else
				r = dm_table_any_congested(map, bdi_bits);
				r = dm_table_any_congested(map, bdi_bits);

			dm_table_put(map);
		}
		}
		dm_put_live_table_fast(md);
	}
	}


	return r;
	return r;
@@ -1876,12 +1906,14 @@ static struct mapped_device *alloc_dev(int minor)
	if (r < 0)
	if (r < 0)
		goto bad_minor;
		goto bad_minor;


	r = init_srcu_struct(&md->io_barrier);
	if (r < 0)
		goto bad_io_barrier;

	md->type = DM_TYPE_NONE;
	md->type = DM_TYPE_NONE;
	init_rwsem(&md->io_lock);
	mutex_init(&md->suspend_lock);
	mutex_init(&md->suspend_lock);
	mutex_init(&md->type_lock);
	mutex_init(&md->type_lock);
	spin_lock_init(&md->deferred_lock);
	spin_lock_init(&md->deferred_lock);
	rwlock_init(&md->map_lock);
	atomic_set(&md->holders, 1);
	atomic_set(&md->holders, 1);
	atomic_set(&md->open_count, 0);
	atomic_set(&md->open_count, 0);
	atomic_set(&md->event_nr, 0);
	atomic_set(&md->event_nr, 0);
@@ -1944,6 +1976,8 @@ static struct mapped_device *alloc_dev(int minor)
bad_disk:
bad_disk:
	blk_cleanup_queue(md->queue);
	blk_cleanup_queue(md->queue);
bad_queue:
bad_queue:
	cleanup_srcu_struct(&md->io_barrier);
bad_io_barrier:
	free_minor(minor);
	free_minor(minor);
bad_minor:
bad_minor:
	module_put(THIS_MODULE);
	module_put(THIS_MODULE);
@@ -1967,6 +2001,7 @@ static void free_dev(struct mapped_device *md)
		bioset_free(md->bs);
		bioset_free(md->bs);
	blk_integrity_unregister(md->disk);
	blk_integrity_unregister(md->disk);
	del_gendisk(md->disk);
	del_gendisk(md->disk);
	cleanup_srcu_struct(&md->io_barrier);
	free_minor(minor);
	free_minor(minor);


	spin_lock(&_minor_lock);
	spin_lock(&_minor_lock);
@@ -2109,7 +2144,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
	struct dm_table *old_map;
	struct dm_table *old_map;
	struct request_queue *q = md->queue;
	struct request_queue *q = md->queue;
	sector_t size;
	sector_t size;
	unsigned long flags;
	int merge_is_optional;
	int merge_is_optional;


	size = dm_table_get_size(t);
	size = dm_table_get_size(t);
@@ -2138,9 +2172,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,


	merge_is_optional = dm_table_merge_is_optional(t);
	merge_is_optional = dm_table_merge_is_optional(t);


	write_lock_irqsave(&md->map_lock, flags);
	old_map = md->map;
	old_map = md->map;
	md->map = t;
	rcu_assign_pointer(md->map, t);
	md->immutable_target_type = dm_table_get_immutable_target_type(t);
	md->immutable_target_type = dm_table_get_immutable_target_type(t);


	dm_table_set_restrictions(t, q, limits);
	dm_table_set_restrictions(t, q, limits);
@@ -2148,7 +2181,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
		set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
		set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
	else
	else
		clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
		clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
	write_unlock_irqrestore(&md->map_lock, flags);
	dm_sync_table(md);


	return old_map;
	return old_map;
}
}
@@ -2159,15 +2192,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
static struct dm_table *__unbind(struct mapped_device *md)
static struct dm_table *__unbind(struct mapped_device *md)
{
{
	struct dm_table *map = md->map;
	struct dm_table *map = md->map;
	unsigned long flags;


	if (!map)
	if (!map)
		return NULL;
		return NULL;


	dm_table_event_callback(map, NULL, NULL);
	dm_table_event_callback(map, NULL, NULL);
	write_lock_irqsave(&md->map_lock, flags);
	rcu_assign_pointer(md->map, NULL);
	md->map = NULL;
	dm_sync_table(md);
	write_unlock_irqrestore(&md->map_lock, flags);


	return map;
	return map;
}
}
@@ -2319,11 +2350,12 @@ EXPORT_SYMBOL_GPL(dm_device_name);
static void __dm_destroy(struct mapped_device *md, bool wait)
static void __dm_destroy(struct mapped_device *md, bool wait)
{
{
	struct dm_table *map;
	struct dm_table *map;
	int srcu_idx;


	might_sleep();
	might_sleep();


	spin_lock(&_minor_lock);
	spin_lock(&_minor_lock);
	map = dm_get_live_table(md);
	map = dm_get_live_table(md, &srcu_idx);
	idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
	idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
	set_bit(DMF_FREEING, &md->flags);
	set_bit(DMF_FREEING, &md->flags);
	spin_unlock(&_minor_lock);
	spin_unlock(&_minor_lock);
@@ -2333,6 +2365,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
		dm_table_postsuspend_targets(map);
		dm_table_postsuspend_targets(map);
	}
	}


	/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
	dm_put_live_table(md, srcu_idx);

	/*
	/*
	 * Rare, but there may be I/O requests still going to complete,
	 * Rare, but there may be I/O requests still going to complete,
	 * for example.  Wait for all references to disappear.
	 * for example.  Wait for all references to disappear.
@@ -2347,7 +2382,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
		       dm_device_name(md), atomic_read(&md->holders));
		       dm_device_name(md), atomic_read(&md->holders));


	dm_sysfs_exit(md);
	dm_sysfs_exit(md);
	dm_table_put(map);
	dm_table_destroy(__unbind(md));
	dm_table_destroy(__unbind(md));
	free_dev(md);
	free_dev(md);
}
}
@@ -2404,8 +2438,10 @@ static void dm_wq_work(struct work_struct *work)
	struct mapped_device *md = container_of(work, struct mapped_device,
	struct mapped_device *md = container_of(work, struct mapped_device,
						work);
						work);
	struct bio *c;
	struct bio *c;
	int srcu_idx;
	struct dm_table *map;


	down_read(&md->io_lock);
	map = dm_get_live_table(md, &srcu_idx);


	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
		spin_lock_irq(&md->deferred_lock);
		spin_lock_irq(&md->deferred_lock);
@@ -2415,17 +2451,13 @@ static void dm_wq_work(struct work_struct *work)
		if (!c)
		if (!c)
			break;
			break;


		up_read(&md->io_lock);

		if (dm_request_based(md))
		if (dm_request_based(md))
			generic_make_request(c);
			generic_make_request(c);
		else
		else
			__split_and_process_bio(md, c);
			__split_and_process_bio(md, map, c);

		down_read(&md->io_lock);
	}
	}


	up_read(&md->io_lock);
	dm_put_live_table(md, srcu_idx);
}
}


static void dm_queue_flush(struct mapped_device *md)
static void dm_queue_flush(struct mapped_device *md)
@@ -2457,10 +2489,10 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
	 * reappear.
	 * reappear.
	 */
	 */
	if (dm_table_has_no_data_devices(table)) {
	if (dm_table_has_no_data_devices(table)) {
		live_map = dm_get_live_table(md);
		live_map = dm_get_live_table_fast(md);
		if (live_map)
		if (live_map)
			limits = md->queue->limits;
			limits = md->queue->limits;
		dm_table_put(live_map);
		dm_put_live_table_fast(md);
	}
	}


	if (!live_map) {
	if (!live_map) {
@@ -2540,7 +2572,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
		goto out_unlock;
		goto out_unlock;
	}
	}


	map = dm_get_live_table(md);
	map = md->map;


	/*
	/*
	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2561,7 +2593,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
	if (!noflush && do_lockfs) {
	if (!noflush && do_lockfs) {
		r = lock_fs(md);
		r = lock_fs(md);
		if (r)
		if (r)
			goto out;
			goto out_unlock;
	}
	}


	/*
	/*
@@ -2576,9 +2608,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
	 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
	 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
	 * flush_workqueue(md->wq).
	 * flush_workqueue(md->wq).
	 */
	 */
	down_write(&md->io_lock);
	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
	up_write(&md->io_lock);
	synchronize_srcu(&md->io_barrier);


	/*
	/*
	 * Stop md->queue before flushing md->wq in case request-based
	 * Stop md->queue before flushing md->wq in case request-based
@@ -2596,10 +2627,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
	 */
	 */
	r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
	r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);


	down_write(&md->io_lock);
	if (noflush)
	if (noflush)
		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
	up_write(&md->io_lock);
	synchronize_srcu(&md->io_barrier);


	/* were we interrupted ? */
	/* were we interrupted ? */
	if (r < 0) {
	if (r < 0) {
@@ -2609,7 +2639,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
			start_queue(md->queue);
			start_queue(md->queue);


		unlock_fs(md);
		unlock_fs(md);
		goto out; /* pushback list is already flushed, so skip flush */
		goto out_unlock; /* pushback list is already flushed, so skip flush */
	}
	}


	/*
	/*
@@ -2622,9 +2652,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)


	dm_table_postsuspend_targets(map);
	dm_table_postsuspend_targets(map);


out:
	dm_table_put(map);

out_unlock:
out_unlock:
	mutex_unlock(&md->suspend_lock);
	mutex_unlock(&md->suspend_lock);
	return r;
	return r;
@@ -2639,7 +2666,7 @@ int dm_resume(struct mapped_device *md)
	if (!dm_suspended_md(md))
	if (!dm_suspended_md(md))
		goto out;
		goto out;


	map = dm_get_live_table(md);
	map = md->map;
	if (!map || !dm_table_get_size(map))
	if (!map || !dm_table_get_size(map))
		goto out;
		goto out;


@@ -2663,7 +2690,6 @@ int dm_resume(struct mapped_device *md)


	r = 0;
	r = 0;
out:
out:
	dm_table_put(map);
	mutex_unlock(&md->suspend_lock);
	mutex_unlock(&md->suspend_lock);


	return r;
	return r;
+3 −3
Original line number Original line Diff line number Diff line
@@ -446,9 +446,9 @@ int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len);
/*
/*
 * Table reference counting.
 * Table reference counting.
 */
 */
struct dm_table *dm_get_live_table(struct mapped_device *md);
struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx);
void dm_table_get(struct dm_table *t);
void dm_put_live_table(struct mapped_device *md, int srcu_idx);
void dm_table_put(struct dm_table *t);
void dm_sync_table(struct mapped_device *md);


/*
/*
 * Queries
 * Queries
+2 −2
Original line number Original line Diff line number Diff line
@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)


#define DM_VERSION_MAJOR	4
#define DM_VERSION_MAJOR	4
#define DM_VERSION_MINOR	24
#define DM_VERSION_MINOR	25
#define DM_VERSION_PATCHLEVEL	0
#define DM_VERSION_PATCHLEVEL	0
#define DM_VERSION_EXTRA	"-ioctl (2013-01-15)"
#define DM_VERSION_EXTRA	"-ioctl (2013-06-26)"


/* Status bits */
/* Status bits */
#define DM_READONLY_FLAG	(1 << 0) /* In/Out */
#define DM_READONLY_FLAG	(1 << 0) /* In/Out */