Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c1f0c183 authored by Mike Snitzer's avatar Mike Snitzer Committed by Alasdair G Kergon
Browse files

dm snapshot: allow live exception store handover between tables



Permit in-use snapshot exception data to be 'handed over' from one
snapshot instance to another.  This is a pre-requisite for patches
that allow the changes made in a snapshot device to be merged back into
its origin device and also allows device resizing.

The basic call sequence is:

  dmsetup load new_snapshot (referencing the existing in-use cow device)
     - the ctr code detects that the cow is already in use and allows the
       two snapshot target instances to be linked together
  dmsetup suspend original_snapshot
  dmsetup resume new_snapshot
     - the new_snapshot becomes live, and if anything now tries to access
       the original one it will receive -EIO
  dmsetup remove original_snapshot

(There can only be two snapshot targets referencing the same cow device
simultaneously.)

Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarAlasdair G Kergon <agk@redhat.com>
parent 042d2a9b
Loading
Loading
Loading
Loading
+236 −27
Original line number Diff line number Diff line
@@ -302,23 +302,117 @@ static void __insert_origin(struct origin *o)
	list_add_tail(&o->hash_list, sl);
}

/*
 * _origins_lock must be held when calling this function.
 * Returns number of snapshots registered using the supplied cow device, plus:
 * snap_src - a snapshot suitable for use as a source of exception handover
 * snap_dest - a snapshot capable of receiving exception handover.
 *
 * Possible return values and states:
 *   0: NULL, NULL  - first new snapshot
 *   1: snap_src, NULL - normal snapshot
 *   2: snap_src, snap_dest  - waiting for handover
 *   2: snap_src, NULL - handed over, waiting for old to be deleted
 *   1: NULL, snap_dest - source got destroyed without handover
 */
static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
					struct dm_snapshot **snap_src,
					struct dm_snapshot **snap_dest)
{
	struct dm_snapshot *s;
	struct origin *o;
	int count = 0;
	int active;

	o = __lookup_origin(snap->origin->bdev);
	if (!o)
		goto out;

	list_for_each_entry(s, &o->snapshots, list) {
		if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
			continue;

		down_read(&s->lock);
		active = s->active;
		up_read(&s->lock);

		if (active) {
			if (snap_src)
				*snap_src = s;
		} else if (snap_dest)
			*snap_dest = s;

		count++;
	}

out:
	return count;
}

/*
 * On success, returns 1 if this snapshot is a handover destination,
 * otherwise returns 0.
 */
static int __validate_exception_handover(struct dm_snapshot *snap)
{
	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;

	/* Does snapshot need exceptions handed over to it? */
	if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest) == 2) ||
	    snap_dest) {
		snap->ti->error = "Snapshot cow pairing for exception "
				  "table handover failed";
		return -EINVAL;
	}

	/*
	 * If no snap_src was found, snap cannot become a handover
	 * destination.
	 */
	if (!snap_src)
		return 0;

	return 1;
}

static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
{
	struct dm_snapshot *l;

	/* Sort the list according to chunk size, largest-first smallest-last */
	list_for_each_entry(l, &o->snapshots, list)
		if (l->store->chunk_size < s->store->chunk_size)
			break;
	list_add_tail(&s->list, &l->list);
}

/*
 * Make a note of the snapshot and its origin so we can look it
 * up when the origin has a write on it.
 *
 * Also validate snapshot exception store handovers.
 * On success, returns 1 if this registration is a handover destination,
 * otherwise returns 0.
 */
static int register_snapshot(struct dm_snapshot *snap)
{
	struct dm_snapshot *l;
	struct origin *o, *new_o;
	struct origin *o, *new_o = NULL;
	struct block_device *bdev = snap->origin->bdev;
	int r = 0;

	new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
	if (!new_o)
		return -ENOMEM;

	down_write(&_origins_lock);
	o = __lookup_origin(bdev);

	r = __validate_exception_handover(snap);
	if (r < 0) {
		kfree(new_o);
		goto out;
	}

	o = __lookup_origin(bdev);
	if (o)
		kfree(new_o);
	else {
@@ -332,14 +426,27 @@ static int register_snapshot(struct dm_snapshot *snap)
		__insert_origin(o);
	}

	/* Sort the list according to chunk size, largest-first smallest-last */
	list_for_each_entry(l, &o->snapshots, list)
		if (l->store->chunk_size < snap->store->chunk_size)
			break;
	list_add_tail(&snap->list, &l->list);
	__insert_snapshot(o, snap);

out:
	up_write(&_origins_lock);

	return r;
}

/*
 * Move snapshot to correct place in list according to chunk size.
 */
static void reregister_snapshot(struct dm_snapshot *s)
{
	struct block_device *bdev = s->origin->bdev;

	down_write(&_origins_lock);

	list_del(&s->list);
	__insert_snapshot(__lookup_origin(bdev), s);

	up_write(&_origins_lock);
	return 0;
}

static void unregister_snapshot(struct dm_snapshot *s)
@@ -350,7 +457,7 @@ static void unregister_snapshot(struct dm_snapshot *s)
	o = __lookup_origin(s->origin->bdev);

	list_del(&s->list);
	if (list_empty(&o->snapshots)) {
	if (o && list_empty(&o->snapshots)) {
		list_del(&o->hash_list);
		kfree(o);
	}
@@ -662,6 +769,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
	s->suspended = 0;
	atomic_set(&s->pending_exceptions_count, 0);
	init_rwsem(&s->lock);
	INIT_LIST_HEAD(&s->list);
	spin_lock_init(&s->pe_lock);

	/* Allocate hash table for COW data */
@@ -696,39 +804,55 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)

	spin_lock_init(&s->tracked_chunk_lock);

	/* Metadata must only be loaded into one table at once */
	bio_list_init(&s->queued_bios);
	INIT_WORK(&s->queued_bios_work, flush_queued_bios);

	ti->private = s;
	ti->num_flush_requests = 1;

	/* Add snapshot to the list of snapshots for this origin */
	/* Exceptions aren't triggered till snapshot_resume() is called */
	r = register_snapshot(s);
	if (r == -ENOMEM) {
		ti->error = "Snapshot origin struct allocation failed";
		goto bad_load_and_register;
	} else if (r < 0) {
		/* invalid handover, register_snapshot has set ti->error */
		goto bad_load_and_register;
	}

	/*
	 * Metadata must only be loaded into one table at once, so skip this
	 * if metadata will be handed over during resume.
	 * Chunk size will be set during the handover - set it to zero to
	 * ensure it's ignored.
	 */
	if (r > 0) {
		s->store->chunk_size = 0;
		return 0;
	}

	r = s->store->type->read_metadata(s->store, dm_add_exception,
					  (void *)s);
	if (r < 0) {
		ti->error = "Failed to read snapshot metadata";
		goto bad_load_and_register;
		goto bad_read_metadata;
	} else if (r > 0) {
		s->valid = 0;
		DMWARN("Snapshot is marked invalid.");
	}

	bio_list_init(&s->queued_bios);
	INIT_WORK(&s->queued_bios_work, flush_queued_bios);

	if (!s->store->chunk_size) {
		ti->error = "Chunk size not set";
		goto bad_load_and_register;
		goto bad_read_metadata;
	}

	/* Add snapshot to the list of snapshots for this origin */
	/* Exceptions aren't triggered till snapshot_resume() is called */
	if (register_snapshot(s)) {
		r = -EINVAL;
		ti->error = "Cannot register snapshot origin";
		goto bad_load_and_register;
	}

	ti->private = s;
	ti->split_io = s->store->chunk_size;
	ti->num_flush_requests = 1;

	return 0;

bad_read_metadata:
	unregister_snapshot(s);

bad_load_and_register:
	mempool_destroy(s->tracked_chunk_pool);

@@ -767,15 +891,58 @@ static void __free_exceptions(struct dm_snapshot *s)
	dm_exception_table_exit(&s->complete, exception_cache);
}

static void __handover_exceptions(struct dm_snapshot *snap_src,
				  struct dm_snapshot *snap_dest)
{
	union {
		struct dm_exception_table table_swap;
		struct dm_exception_store *store_swap;
	} u;

	/*
	 * Swap all snapshot context information between the two instances.
	 */
	u.table_swap = snap_dest->complete;
	snap_dest->complete = snap_src->complete;
	snap_src->complete = u.table_swap;

	u.store_swap = snap_dest->store;
	snap_dest->store = snap_src->store;
	snap_src->store = u.store_swap;

	snap_dest->store->snap = snap_dest;
	snap_src->store->snap = snap_src;

	snap_dest->ti->split_io = snap_dest->store->chunk_size;
	snap_dest->valid = snap_src->valid;

	/*
	 * Set source invalid to ensure it receives no further I/O.
	 */
	snap_src->valid = 0;
}

static void snapshot_dtr(struct dm_target *ti)
{
#ifdef CONFIG_DM_DEBUG
	int i;
#endif
	struct dm_snapshot *s = ti->private;
	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;

	flush_workqueue(ksnapd);

	down_read(&_origins_lock);
	/* Check whether exception handover must be cancelled */
	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
	if (snap_src && snap_dest && (s == snap_src)) {
		down_write(&snap_dest->lock);
		snap_dest->valid = 0;
		up_write(&snap_dest->lock);
		DMERR("Cancelling snapshot handover.");
	}
	up_read(&_origins_lock);

	/* Prevent further origin writes from using this snapshot. */
	/* After this returns there can be no new kcopyd jobs. */
	unregister_snapshot(s);
@@ -1188,9 +1355,50 @@ static void snapshot_postsuspend(struct dm_target *ti)
	up_write(&s->lock);
}

static int snapshot_preresume(struct dm_target *ti)
{
	int r = 0;
	struct dm_snapshot *s = ti->private;
	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;

	down_read(&_origins_lock);
	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
	if (snap_src && snap_dest) {
		down_read(&snap_src->lock);
		if (s == snap_src) {
			DMERR("Unable to resume snapshot source until "
			      "handover completes.");
			r = -EINVAL;
		} else if (!snap_src->suspended) {
			DMERR("Unable to perform snapshot handover until "
			      "source is suspended.");
			r = -EINVAL;
		}
		up_read(&snap_src->lock);
	}
	up_read(&_origins_lock);

	return r;
}

static void snapshot_resume(struct dm_target *ti)
{
	struct dm_snapshot *s = ti->private;
	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;

	down_read(&_origins_lock);
	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
	if (snap_src && snap_dest) {
		down_write(&snap_src->lock);
		down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
		__handover_exceptions(snap_src, snap_dest);
		up_write(&snap_dest->lock);
		up_write(&snap_src->lock);
	}
	up_read(&_origins_lock);

	/* Now we have correct chunk size, reregister */
	reregister_snapshot(s);

	down_write(&s->lock);
	s->active = 1;
@@ -1510,6 +1718,7 @@ static struct target_type snapshot_target = {
	.map     = snapshot_map,
	.end_io  = snapshot_end_io,
	.postsuspend = snapshot_postsuspend,
	.preresume  = snapshot_preresume,
	.resume  = snapshot_resume,
	.status  = snapshot_status,
	.iterate_devices = snapshot_iterate_devices,