Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 742c8fdc authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer
Browse files

dm bio prison v2: new interface for the bio prison



The deferred set is gone and all methods have _v2 appended to the end of
their names to allow for continued use of the original bio prison in DM
thin-provisioning.

Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent c1ae3cfa
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-mirror-y	+= dm-raid1.o
dm-log-userspace-y \
		+= dm-log-userspace-base.o dm-log-userspace-transfer.o
dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
dm-thin-pool-y	+= dm-thin.o dm-thin-metadata.o
dm-cache-y	+= dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
dm-cache-smq-y   += dm-cache-policy-smq.o
+43 −3
Original line number Diff line number Diff line
@@ -5,7 +5,8 @@
 */

#include "dm.h"
#include "dm-bio-prison.h"
#include "dm-bio-prison-v1.h"
#include "dm-bio-prison-v2.h"

#include <linux/spinlock.h>
#include <linux/mempool.h>
@@ -398,7 +399,7 @@ EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);

/*----------------------------------------------------------------*/

static int __init dm_bio_prison_init(void)
static int __init dm_bio_prison_init_v1(void)
{
	_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
	if (!_cell_cache)
@@ -407,12 +408,51 @@ static int __init dm_bio_prison_init(void)
	return 0;
}

static void __exit dm_bio_prison_exit(void)
static void dm_bio_prison_exit_v1(void)
{
	kmem_cache_destroy(_cell_cache);
	_cell_cache = NULL;
}

static int (*_inits[])(void) __initdata = {
	dm_bio_prison_init_v1,
	dm_bio_prison_init_v2,
};

static void (*_exits[])(void) = {
	dm_bio_prison_exit_v1,
	dm_bio_prison_exit_v2,
};

static int __init dm_bio_prison_init(void)
{
	const int count = ARRAY_SIZE(_inits);

	int r, i;

	for (i = 0; i < count; i++) {
		r = _inits[i]();
		if (r)
			goto bad;
	}

	return 0;

      bad:
	while (i--)
		_exits[i]();

	return r;
}

static void __exit dm_bio_prison_exit(void)
{
	int i = ARRAY_SIZE(_exits);

	while (i--)
		_exits[i]();
}

/*
 * module hooks
 */
+1 −1
Original line number Diff line number Diff line
/*
 * Copyright (C) 2011-2012 Red Hat, Inc.
 * Copyright (C) 2011-2017 Red Hat, Inc.
 *
 * This file is released under the GPL.
 */
+369 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012-2017 Red Hat, Inc.
 *
 * This file is released under the GPL.
 */

#include "dm.h"
#include "dm-bio-prison-v2.h"

#include <linux/spinlock.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/rwsem.h>

/*----------------------------------------------------------------*/

#define MIN_CELLS 1024

struct dm_bio_prison_v2 {
	struct workqueue_struct *wq;

	spinlock_t lock;
	mempool_t *cell_pool;
	struct rb_root cells;
};

static struct kmem_cache *_cell_cache;

/*----------------------------------------------------------------*/

/*
 * @nr_cells should be the number of cells you want in use _concurrently_.
 * Don't confuse it with the number of distinct keys.
 */
struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
{
	struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL);

	if (!prison)
		return NULL;

	prison->wq = wq;
	spin_lock_init(&prison->lock);

	prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
	if (!prison->cell_pool) {
		kfree(prison);
		return NULL;
	}

	prison->cells = RB_ROOT;

	return prison;
}
EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2);

void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison)
{
	mempool_destroy(prison->cell_pool);
	kfree(prison);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2);

struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp)
{
	return mempool_alloc(prison->cell_pool, gfp);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2);

void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
				struct dm_bio_prison_cell_v2 *cell)
{
	mempool_free(cell, prison->cell_pool);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2);

static void __setup_new_cell(struct dm_cell_key_v2 *key,
			     struct dm_bio_prison_cell_v2 *cell)
{
	memset(cell, 0, sizeof(*cell));
	memcpy(&cell->key, key, sizeof(cell->key));
	bio_list_init(&cell->bios);
}

static int cmp_keys(struct dm_cell_key_v2 *lhs,
		    struct dm_cell_key_v2 *rhs)
{
	if (lhs->virtual < rhs->virtual)
		return -1;

	if (lhs->virtual > rhs->virtual)
		return 1;

	if (lhs->dev < rhs->dev)
		return -1;

	if (lhs->dev > rhs->dev)
		return 1;

	if (lhs->block_end <= rhs->block_begin)
		return -1;

	if (lhs->block_begin >= rhs->block_end)
		return 1;

	return 0;
}

/*
 * Returns true if node found, otherwise it inserts a new one.
 */
static bool __find_or_insert(struct dm_bio_prison_v2 *prison,
			     struct dm_cell_key_v2 *key,
			     struct dm_bio_prison_cell_v2 *cell_prealloc,
			     struct dm_bio_prison_cell_v2 **result)
{
	int r;
	struct rb_node **new = &prison->cells.rb_node, *parent = NULL;

	while (*new) {
		struct dm_bio_prison_cell_v2 *cell =
			container_of(*new, struct dm_bio_prison_cell_v2, node);

		r = cmp_keys(key, &cell->key);

		parent = *new;
		if (r < 0)
			new = &((*new)->rb_left);

		else if (r > 0)
			new = &((*new)->rb_right);

		else {
			*result = cell;
			return true;
		}
	}

	__setup_new_cell(key, cell_prealloc);
	*result = cell_prealloc;
	rb_link_node(&cell_prealloc->node, parent, new);
	rb_insert_color(&cell_prealloc->node, &prison->cells);

	return false;
}

static bool __get(struct dm_bio_prison_v2 *prison,
		  struct dm_cell_key_v2 *key,
		  unsigned lock_level,
		  struct bio *inmate,
		  struct dm_bio_prison_cell_v2 *cell_prealloc,
		  struct dm_bio_prison_cell_v2 **cell)
{
	if (__find_or_insert(prison, key, cell_prealloc, cell)) {
		if ((*cell)->exclusive_lock) {
			if (lock_level <= (*cell)->exclusive_level) {
				bio_list_add(&(*cell)->bios, inmate);
				return false;
			}
		}

		(*cell)->shared_count++;

	} else
		(*cell)->shared_count = 1;

	return true;
}

bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
		    struct dm_cell_key_v2 *key,
		    unsigned lock_level,
		    struct bio *inmate,
		    struct dm_bio_prison_cell_v2 *cell_prealloc,
		    struct dm_bio_prison_cell_v2 **cell_result)
{
	int r;
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result);
	spin_unlock_irqrestore(&prison->lock, flags);

	return r;
}
EXPORT_SYMBOL_GPL(dm_cell_get_v2);

static bool __put(struct dm_bio_prison_v2 *prison,
		  struct dm_bio_prison_cell_v2 *cell)
{
	BUG_ON(!cell->shared_count);
	cell->shared_count--;

	// FIXME: shared locks granted above the lock level could starve this
	if (!cell->shared_count) {
		if (cell->exclusive_lock){
			if (cell->quiesce_continuation) {
				queue_work(prison->wq, cell->quiesce_continuation);
				cell->quiesce_continuation = NULL;
			}
		} else {
			rb_erase(&cell->node, &prison->cells);
			return true;
		}
	}

	return false;
}

bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
		    struct dm_bio_prison_cell_v2 *cell)
{
	bool r;
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	r = __put(prison, cell);
	spin_unlock_irqrestore(&prison->lock, flags);

	return r;
}
EXPORT_SYMBOL_GPL(dm_cell_put_v2);

static int __lock(struct dm_bio_prison_v2 *prison,
		  struct dm_cell_key_v2 *key,
		  unsigned lock_level,
		  struct dm_bio_prison_cell_v2 *cell_prealloc,
		  struct dm_bio_prison_cell_v2 **cell_result)
{
	struct dm_bio_prison_cell_v2 *cell;

	if (__find_or_insert(prison, key, cell_prealloc, &cell)) {
		if (cell->exclusive_lock)
			return -EBUSY;

		cell->exclusive_lock = true;
		cell->exclusive_level = lock_level;
		*cell_result = cell;

		// FIXME: we don't yet know what level these shared locks
		// were taken at, so have to quiesce them all.
		return cell->shared_count > 0;

	} else {
		cell = cell_prealloc;
		cell->shared_count = 0;
		cell->exclusive_lock = true;
		cell->exclusive_level = lock_level;
		*cell_result = cell;
	}

	return 0;
}

int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
		    struct dm_cell_key_v2 *key,
		    unsigned lock_level,
		    struct dm_bio_prison_cell_v2 *cell_prealloc,
		    struct dm_bio_prison_cell_v2 **cell_result)
{
	int r;
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	r = __lock(prison, key, lock_level, cell_prealloc, cell_result);
	spin_unlock_irqrestore(&prison->lock, flags);

	return r;
}
EXPORT_SYMBOL_GPL(dm_cell_lock_v2);

static void __quiesce(struct dm_bio_prison_v2 *prison,
		      struct dm_bio_prison_cell_v2 *cell,
		      struct work_struct *continuation)
{
	if (!cell->shared_count)
		queue_work(prison->wq, continuation);
	else
		cell->quiesce_continuation = continuation;
}

void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
			struct dm_bio_prison_cell_v2 *cell,
			struct work_struct *continuation)
{
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	__quiesce(prison, cell, continuation);
	spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2);

static int __promote(struct dm_bio_prison_v2 *prison,
		     struct dm_bio_prison_cell_v2 *cell,
		     unsigned new_lock_level)
{
	if (!cell->exclusive_lock)
		return -EINVAL;

	cell->exclusive_level = new_lock_level;
	return cell->shared_count > 0;
}

int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
			    struct dm_bio_prison_cell_v2 *cell,
			    unsigned new_lock_level)
{
	int r;
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	r = __promote(prison, cell, new_lock_level);
	spin_unlock_irqrestore(&prison->lock, flags);

	return r;
}
EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2);

static bool __unlock(struct dm_bio_prison_v2 *prison,
		     struct dm_bio_prison_cell_v2 *cell,
		     struct bio_list *bios)
{
	BUG_ON(!cell->exclusive_lock);

	bio_list_merge(bios, &cell->bios);
	bio_list_init(&cell->bios);

	if (cell->shared_count) {
		cell->exclusive_lock = 0;
		return false;
	}

	rb_erase(&cell->node, &prison->cells);
	return true;
}

bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
		       struct dm_bio_prison_cell_v2 *cell,
		       struct bio_list *bios)
{
	bool r;
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	r = __unlock(prison, cell, bios);
	spin_unlock_irqrestore(&prison->lock, flags);

	return r;
}
EXPORT_SYMBOL_GPL(dm_cell_unlock_v2);

/*----------------------------------------------------------------*/

int __init dm_bio_prison_init_v2(void)
{
	_cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0);
	if (!_cell_cache)
		return -ENOMEM;

	return 0;
}

void dm_bio_prison_exit_v2(void)
{
	kmem_cache_destroy(_cell_cache);
	_cell_cache = NULL;
}
+152 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2011-2017 Red Hat, Inc.
 *
 * This file is released under the GPL.
 */

#ifndef DM_BIO_PRISON_V2_H
#define DM_BIO_PRISON_V2_H

#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */
#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */

#include <linux/bio.h>
#include <linux/rbtree.h>
#include <linux/workqueue.h>

/*----------------------------------------------------------------*/

int dm_bio_prison_init_v2(void);
void dm_bio_prison_exit_v2(void);

/*
 * Sometimes we can't deal with a bio straight away.  We put them in prison
 * where they can't cause any mischief.  Bios are put in a cell identified
 * by a key, multiple bios can be in the same cell.  When the cell is
 * subsequently unlocked the bios become available.
 */
struct dm_bio_prison_v2;

/*
 * Keys define a range of blocks within either a virtual or physical
 * device.
 */
struct dm_cell_key_v2 {
	int virtual;
	dm_thin_id dev;
	dm_block_t block_begin, block_end;
};

/*
 * Treat this as opaque, only in header so callers can manage allocation
 * themselves.
 */
struct dm_bio_prison_cell_v2 {
	// FIXME: pack these
	bool exclusive_lock;
	unsigned exclusive_level;
	unsigned shared_count;
	struct work_struct *quiesce_continuation;

	struct rb_node node;
	struct dm_cell_key_v2 key;
	struct bio_list bios;
};

struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq);
void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison);

/*
 * These two functions just wrap a mempool.  This is a transitory step:
 * Eventually all bio prison clients should manage their own cell memory.
 *
 * Like mempool_alloc(), dm_bio_prison_alloc_cell_v2() can only fail if called
 * in interrupt context or passed GFP_NOWAIT.
 */
struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison,
						    gfp_t gfp);
void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
				struct dm_bio_prison_cell_v2 *cell);

/*
 * Shared locks have a bio associated with them.
 *
 * If the lock is granted the caller can continue to use the bio, and must
 * call dm_cell_put_v2() to drop the reference count when finished using it.
 *
 * If the lock cannot be granted then the bio will be tracked within the
 * cell, and later given to the holder of the exclusive lock.
 *
 * See dm_cell_lock_v2() for discussion of the lock_level parameter.
 *
 * Compare *cell_result with cell_prealloc to see if the prealloc was used.
 * If cell_prealloc was used then inmate wasn't added to it.
 *
 * Returns true if the lock is granted.
 */
bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
		    struct dm_cell_key_v2 *key,
		    unsigned lock_level,
		    struct bio *inmate,
		    struct dm_bio_prison_cell_v2 *cell_prealloc,
		    struct dm_bio_prison_cell_v2 **cell_result);

/*
 * Decrement the shared reference count for the lock.  Returns true if
 * returning ownership of the cell (ie. you should free it).
 */
bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
		    struct dm_bio_prison_cell_v2 *cell);

/*
 * Locks a cell.  No associated bio.  Exclusive locks get priority.  These
 * locks constrain whether the io locks are granted according to level.
 *
 * Shared locks will still be granted if the lock_level is > (not = to) the
 * exclusive lock level.
 *
 * If an _exclusive_ lock is already held then -EBUSY is returned.
 *
 * Return values:
 *  < 0 - error
 *  0   - locked; no quiescing needed
 *  1   - locked; quiescing needed
 */
int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
		    struct dm_cell_key_v2 *key,
		    unsigned lock_level,
		    struct dm_bio_prison_cell_v2 *cell_prealloc,
		    struct dm_bio_prison_cell_v2 **cell_result);

void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
			struct dm_bio_prison_cell_v2 *cell,
			struct work_struct *continuation);

/*
 * Promotes an _exclusive_ lock to a higher lock level.
 *
 * Return values:
 *  < 0 - error
 *  0   - promoted; no quiescing needed
 *  1   - promoted; quiescing needed
 */
int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
			    struct dm_bio_prison_cell_v2 *cell,
			    unsigned new_lock_level);

/*
 * Adds any held bios to the bio list.
 *
 * There may be shared locks still held at this point even if you quiesced
 * (ie. different lock levels).
 *
 * Returns true if returning ownership of the cell (ie. you should free
 * it).
 */
bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
		       struct dm_bio_prison_cell_v2 *cell,
		       struct bio_list *bios);

/*----------------------------------------------------------------*/

#endif
Loading