Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b91593fa authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper updates from Mike Snitzer:

 - a few conversions from atomic_t to ref_count_t

 - a DM core fix for a race during device destruction that could result
   in a BUG_ON

 - a stable@ fix for a DM cache race condition that could lead to data
   corruption when operating in writeback mode (writethrough is default)

 - various DM cache cleanups and improvements

 - add DAX support to the DM log-writes target

 - a fix for the DM zoned target's ability to deal with the last zone of
   the drive being smaller than all others

 - a stable@ DM crypt and DM integrity fix for a negative check that was
   to restrictive (prevented slab debug with XFS ontop of DM crypt from
   working)

 - a DM raid target fix for a panic that can occur when forcing a raid
   to sync

* tag 'for-4.15/dm' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (25 commits)
  dm cache: lift common migration preparation code to alloc_migration()
  dm cache: remove usused deferred_cells member from struct cache
  dm cache policy smq: allocate cache blocks in order
  dm cache policy smq: change max background work from 10240 to 4096 blocks
  dm cache background tracker: limit amount of background work that may be issued at once
  dm cache policy smq: take origin idle status into account when queuing writebacks
  dm cache policy smq: handle races with queuing background_work
  dm raid: fix panic when attempting to force a raid to sync
  dm integrity: allow unaligned bv_offset
  dm crypt: allow unaligned bv_offset
  dm: small cleanup in dm_get_md()
  dm: fix race between dm_get_from_kobject() and __dm_destroy()
  dm: allocate struct mapped_device with kvzalloc
  dm zoned: ignore last smaller runt zone
  dm space map metadata: use ARRAY_SIZE
  dm log writes: add support for DAX
  dm log writes: add support for inline data buffers
  dm cache: simplify get_per_bio_data() by removing data_size argument
  dm cache: remove all obsolete writethrough-specific code
  dm cache: submit writethrough writes in parallel to origin and cache
  ...
parents e2c5923c ef7afb36
Loading
Loading
Loading
Loading
+12 −6
Original line number Diff line number Diff line
@@ -161,8 +161,17 @@ EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);

static bool max_work_reached(struct background_tracker *b)
{
	// FIXME: finish
	return false;
	return atomic_read(&b->pending_promotes) +
		atomic_read(&b->pending_writebacks) +
		atomic_read(&b->pending_demotes) >= b->max_work;
}

struct bt_work *alloc_work(struct background_tracker *b)
{
	if (max_work_reached(b))
		return NULL;

	return kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
}

int btracker_queue(struct background_tracker *b,
@@ -174,10 +183,7 @@ int btracker_queue(struct background_tracker *b,
	if (pwork)
		*pwork = NULL;

	if (max_work_reached(b))
		return -ENOMEM;

	w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
	w = alloc_work(b);
	if (!w)
		return -ENOMEM;

+5 −4
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include "persistent-data/dm-transaction-manager.h"

#include <linux/device-mapper.h>
#include <linux/refcount.h>

/*----------------------------------------------------------------*/

@@ -100,7 +101,7 @@ struct cache_disk_superblock {
} __packed;

struct dm_cache_metadata {
	atomic_t ref_count;
	refcount_t ref_count;
	struct list_head list;

	unsigned version;
@@ -753,7 +754,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
	}

	cmd->version = metadata_version;
	atomic_set(&cmd->ref_count, 1);
	refcount_set(&cmd->ref_count, 1);
	init_rwsem(&cmd->root_lock);
	cmd->bdev = bdev;
	cmd->data_block_size = data_block_size;
@@ -791,7 +792,7 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev)

	list_for_each_entry(cmd, &table, list)
		if (cmd->bdev == bdev) {
			atomic_inc(&cmd->ref_count);
			refcount_inc(&cmd->ref_count);
			return cmd;
		}

@@ -862,7 +863,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,

void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
{
	if (atomic_dec_and_test(&cmd->ref_count)) {
	if (refcount_dec_and_test(&cmd->ref_count)) {
		mutex_lock(&table_lock);
		list_del(&cmd->list);
		mutex_unlock(&table_lock);
+33 −9
Original line number Diff line number Diff line
@@ -213,6 +213,19 @@ static void l_del(struct entry_space *es, struct ilist *l, struct entry *e)
		l->nr_elts--;
}

static struct entry *l_pop_head(struct entry_space *es, struct ilist *l)
{
	struct entry *e;

	for (e = l_head(es, l); e; e = l_next(es, e))
		if (!e->sentinel) {
			l_del(es, l, e);
			return e;
		}

	return NULL;
}

static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l)
{
	struct entry *e;
@@ -719,7 +732,7 @@ static struct entry *alloc_entry(struct entry_alloc *ea)
	if (l_empty(&ea->free))
		return NULL;

	e = l_pop_tail(ea->es, &ea->free);
	e = l_pop_head(ea->es, &ea->free);
	init_entry(e);
	ea->nr_allocated++;

@@ -1158,13 +1171,13 @@ static void clear_pending(struct smq_policy *mq, struct entry *e)
	e->pending_work = false;
}

static void queue_writeback(struct smq_policy *mq)
static void queue_writeback(struct smq_policy *mq, bool idle)
{
	int r;
	struct policy_work work;
	struct entry *e;

	e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed);
	e = q_peek(&mq->dirty, mq->dirty.nr_levels, idle);
	if (e) {
		mark_pending(mq, e);
		q_del(&mq->dirty, e);
@@ -1174,12 +1187,16 @@ static void queue_writeback(struct smq_policy *mq)
		work.cblock = infer_cblock(mq, e);

		r = btracker_queue(mq->bg_work, &work, NULL);
		WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race.
		if (r) {
			clear_pending(mq, e);
			q_push_front(&mq->dirty, e);
		}
	}
}

static void queue_demotion(struct smq_policy *mq)
{
	int r;
	struct policy_work work;
	struct entry *e;

@@ -1189,7 +1206,7 @@ static void queue_demotion(struct smq_policy *mq)
	e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
	if (!e) {
		if (!clean_target_met(mq, true))
			queue_writeback(mq);
			queue_writeback(mq, false);
		return;
	}

@@ -1199,12 +1216,17 @@ static void queue_demotion(struct smq_policy *mq)
	work.op = POLICY_DEMOTE;
	work.oblock = e->oblock;
	work.cblock = infer_cblock(mq, e);
	btracker_queue(mq->bg_work, &work, NULL);
	r = btracker_queue(mq->bg_work, &work, NULL);
	if (r) {
		clear_pending(mq, e);
		q_push_front(&mq->clean, e);
	}
}

static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
			    struct policy_work **workp)
{
	int r;
	struct entry *e;
	struct policy_work work;

@@ -1234,7 +1256,9 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
	work.op = POLICY_PROMOTE;
	work.oblock = oblock;
	work.cblock = infer_cblock(mq, e);
	btracker_queue(mq->bg_work, &work, workp);
	r = btracker_queue(mq->bg_work, &work, workp);
	if (r)
		free_entry(&mq->cache_alloc, e);
}

/*----------------------------------------------------------------*/
@@ -1418,7 +1442,7 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
	r = btracker_issue(mq->bg_work, result);
	if (r == -ENODATA) {
		if (!clean_target_met(mq, idle)) {
			queue_writeback(mq);
			queue_writeback(mq, idle);
			r = btracker_issue(mq->bg_work, result);
		}
	}
@@ -1778,7 +1802,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
	mq->next_hotspot_period = jiffies;
	mq->next_cache_period = jiffies;

	mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */
	mq->bg_work = btracker_create(4096); /* FIXME: hard coded value */
	if (!mq->bg_work)
		goto bad_btracker;

+132 −194
Original line number Diff line number Diff line
@@ -408,9 +408,7 @@ struct cache {
	int sectors_per_block_shift;

	spinlock_t lock;
	struct list_head deferred_cells;
	struct bio_list deferred_bios;
	struct bio_list deferred_writethrough_bios;
	sector_t migration_threshold;
	wait_queue_head_t migration_wait;
	atomic_t nr_allocated_migrations;
@@ -446,10 +444,10 @@ struct cache {
	struct dm_kcopyd_client *copier;
	struct workqueue_struct *wq;
	struct work_struct deferred_bio_worker;
	struct work_struct deferred_writethrough_worker;
	struct work_struct migration_worker;
	struct delayed_work waker;
	struct dm_bio_prison_v2 *prison;
	struct bio_set *bs;

	mempool_t *migration_pool;

@@ -490,15 +488,6 @@ struct per_bio_data {
	struct dm_bio_prison_cell_v2 *cell;
	struct dm_hook_info hook_info;
	sector_t len;

	/*
	 * writethrough fields.  These MUST remain at the end of this
	 * structure and the 'cache' member must be the first as it
	 * is used to determine the offset of the writethrough fields.
	 */
	struct cache *cache;
	dm_cblock_t cblock;
	struct dm_bio_details bio_details;
};

struct dm_cache_migration {
@@ -515,19 +504,19 @@ struct dm_cache_migration {

/*----------------------------------------------------------------*/

static bool writethrough_mode(struct cache_features *f)
static bool writethrough_mode(struct cache *cache)
{
	return f->io_mode == CM_IO_WRITETHROUGH;
	return cache->features.io_mode == CM_IO_WRITETHROUGH;
}

static bool writeback_mode(struct cache_features *f)
static bool writeback_mode(struct cache *cache)
{
	return f->io_mode == CM_IO_WRITEBACK;
	return cache->features.io_mode == CM_IO_WRITEBACK;
}

static inline bool passthrough_mode(struct cache_features *f)
static inline bool passthrough_mode(struct cache *cache)
{
	return unlikely(f->io_mode == CM_IO_PASSTHROUGH);
	return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
}

/*----------------------------------------------------------------*/
@@ -537,14 +526,9 @@ static void wake_deferred_bio_worker(struct cache *cache)
	queue_work(cache->wq, &cache->deferred_bio_worker);
}

static void wake_deferred_writethrough_worker(struct cache *cache)
{
	queue_work(cache->wq, &cache->deferred_writethrough_worker);
}

static void wake_migration_worker(struct cache *cache)
{
	if (passthrough_mode(&cache->features))
	if (passthrough_mode(cache))
		return;

	queue_work(cache->wq, &cache->migration_worker);
@@ -567,10 +551,13 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
	struct dm_cache_migration *mg;

	mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
	if (mg) {
	if (!mg)
		return NULL;

	memset(mg, 0, sizeof(*mg));

	mg->cache = cache;
		atomic_inc(&mg->cache->nr_allocated_migrations);
	}
	atomic_inc(&cache->nr_allocated_migrations);

	return mg;
}
@@ -618,27 +605,16 @@ static unsigned lock_level(struct bio *bio)
 * Per bio data
 *--------------------------------------------------------------*/

/*
 * If using writeback, leave out struct per_bio_data's writethrough fields.
 */
#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))

static size_t get_per_bio_data_size(struct cache *cache)
{
	return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
}

static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
static struct per_bio_data *get_per_bio_data(struct bio *bio)
{
	struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
	BUG_ON(!pb);
	return pb;
}

static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
static struct per_bio_data *init_per_bio_data(struct bio *bio)
{
	struct per_bio_data *pb = get_per_bio_data(bio, data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	pb->tick = false;
	pb->req_nr = dm_bio_get_target_bio_nr(bio);
@@ -678,7 +654,6 @@ static void defer_bios(struct cache *cache, struct bio_list *bios)
static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
{
	bool r;
	size_t pb_size;
	struct per_bio_data *pb;
	struct dm_cell_key_v2 key;
	dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
@@ -703,8 +678,7 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi
	if (cell != cell_prealloc)
		free_prison_cell(cache, cell_prealloc);

	pb_size = get_per_bio_data_size(cache);
	pb = get_per_bio_data(bio, pb_size);
	pb = get_per_bio_data(bio);
	pb->cell = cell;

	return r;
@@ -856,28 +830,35 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
{
	unsigned long flags;
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb;

	spin_lock_irqsave(&cache->lock, flags);
	if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
	    bio_op(bio) != REQ_OP_DISCARD) {
		pb = get_per_bio_data(bio);
		pb->tick = true;
		cache->need_tick_bio = false;
	}
	spin_unlock_irqrestore(&cache->lock, flags);
}

static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
					  dm_oblock_t oblock)
static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
					    dm_oblock_t oblock, bool bio_has_pbd)
{
	// FIXME: this is called way too much.
	if (bio_has_pbd)
		check_if_tick_bio_needed(cache, bio);
	remap_to_origin(cache, bio);
	if (bio_data_dir(bio) == WRITE)
		clear_discard(cache, oblock_to_dblock(cache, oblock));
}

static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
					  dm_oblock_t oblock)
{
	// FIXME: check_if_tick_bio_needed() is called way too much through this interface
	__remap_to_origin_clear_discard(cache, bio, oblock, true);
}

static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
				 dm_oblock_t oblock, dm_cblock_t cblock)
{
@@ -908,10 +889,10 @@ static bool accountable_bio(struct cache *cache, struct bio *bio)

static void accounted_begin(struct cache *cache, struct bio *bio)
{
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb;

	if (accountable_bio(cache, bio)) {
		pb = get_per_bio_data(bio);
		pb->len = bio_sectors(bio);
		iot_io_begin(&cache->tracker, pb->len);
	}
@@ -919,8 +900,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)

static void accounted_complete(struct cache *cache, struct bio *bio)
{
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	iot_io_end(&cache->tracker, pb->len);
}
@@ -937,57 +917,26 @@ static void issue_op(struct bio *bio, void *context)
	accounted_request(cache, bio);
}

static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
	unsigned long flags;

	spin_lock_irqsave(&cache->lock, flags);
	bio_list_add(&cache->deferred_writethrough_bios, bio);
	spin_unlock_irqrestore(&cache->lock, flags);

	wake_deferred_writethrough_worker(cache);
}

static void writethrough_endio(struct bio *bio)
{
	struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);

	dm_unhook_bio(&pb->hook_info, bio);

	if (bio->bi_status) {
		bio_endio(bio);
		return;
	}

	dm_bio_restore(&pb->bio_details, bio);
	remap_to_cache(pb->cache, bio, pb->cblock);

	/*
	 * We can't issue this bio directly, since we're in interrupt
	 * context.  So it gets put on a bio list for processing by the
	 * worker thread.
	 */
	defer_writethrough_bio(pb->cache, bio);
}

/*
 * FIXME: send in parallel, huge latency as is.
 * When running in writethrough mode we need to send writes to clean blocks
 * to both the cache and origin devices.  In future we'd like to clone the
 * bio and send them in parallel, but for now we're doing them in
 * series as this is easier.
 * to both the cache and origin devices.  Clone the bio and send them in parallel.
 */
static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
				      dm_oblock_t oblock, dm_cblock_t cblock)
{
	struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
	struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs);

	pb->cache = cache;
	pb->cblock = cblock;
	dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
	dm_bio_record(&pb->bio_details, bio);
	BUG_ON(!origin_bio);

	remap_to_origin_clear_discard(pb->cache, bio, oblock);
	bio_chain(origin_bio, bio);
	/*
	 * Passing false to __remap_to_origin_clear_discard() skips
	 * all code that might use per_bio_data (since clone doesn't have it)
	 */
	__remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
	submit_bio(origin_bio);

	remap_to_cache(cache, bio, cblock);
}

/*----------------------------------------------------------------
@@ -1201,6 +1150,18 @@ static void background_work_end(struct cache *cache)

/*----------------------------------------------------------------*/

static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
	return (bio_data_dir(bio) == WRITE) &&
		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}

static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
{
	return writeback_mode(cache) &&
		(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
}

static void quiesce(struct dm_cache_migration *mg,
		    void (*continuation)(struct work_struct *))
{
@@ -1248,8 +1209,7 @@ static int copy(struct dm_cache_migration *mg, bool promote)

static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
{
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
		free_prison_cell(cache, pb->cell);
@@ -1260,23 +1220,21 @@ static void overwrite_endio(struct bio *bio)
{
	struct dm_cache_migration *mg = bio->bi_private;
	struct cache *cache = mg->cache;
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	dm_unhook_bio(&pb->hook_info, bio);

	if (bio->bi_status)
		mg->k.input = bio->bi_status;

	queue_continuation(mg->cache->wq, &mg->k);
	queue_continuation(cache->wq, &mg->k);
}

static void overwrite(struct dm_cache_migration *mg,
		      void (*continuation)(struct work_struct *))
{
	struct bio *bio = mg->overwrite_bio;
	size_t pb_data_size = get_per_bio_data_size(mg->cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);

@@ -1474,22 +1432,9 @@ static void mg_upgrade_lock(struct work_struct *ws)
	}
}

static void mg_copy(struct work_struct *ws)
static void mg_full_copy(struct work_struct *ws)
{
	int r;
	struct dm_cache_migration *mg = ws_to_mg(ws);

	if (mg->overwrite_bio) {
		/*
		 * It's safe to do this here, even though it's new data
		 * because all IO has been locked out of the block.
		 *
		 * mg_lock_writes() already took READ_WRITE_LOCK_LEVEL
		 * so _not_ using mg_upgrade_lock() as continutation.
		 */
		overwrite(mg, mg_update_metadata_after_copy);

	} else {
	struct cache *cache = mg->cache;
	struct policy_work *op = mg->op;
	bool is_policy_promote = (op->op == POLICY_PROMOTE);
@@ -1502,13 +1447,46 @@ static void mg_copy(struct work_struct *ws)

	init_continuation(&mg->k, mg_upgrade_lock);

		r = copy(mg, is_policy_promote);
		if (r) {
	if (copy(mg, is_policy_promote)) {
		DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
		mg->k.input = BLK_STS_IOERR;
		mg_complete(mg, false);
	}
}

static void mg_copy(struct work_struct *ws)
{
	struct dm_cache_migration *mg = ws_to_mg(ws);

	if (mg->overwrite_bio) {
		/*
		 * No exclusive lock was held when we last checked if the bio
		 * was optimisable.  So we have to check again in case things
		 * have changed (eg, the block may no longer be discarded).
		 */
		if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
			/*
			 * Fallback to a real full copy after doing some tidying up.
			 */
			bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
			BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
			mg->overwrite_bio = NULL;
			inc_io_migrations(mg->cache);
			mg_full_copy(ws);
			return;
		}

		/*
		 * It's safe to do this here, even though it's new data
		 * because all IO has been locked out of the block.
		 *
		 * mg_lock_writes() already took READ_WRITE_LOCK_LEVEL
		 * so _not_ using mg_upgrade_lock() as continutation.
		 */
		overwrite(mg, mg_update_metadata_after_copy);

	} else
		mg_full_copy(ws);
}

static int mg_lock_writes(struct dm_cache_migration *mg)
@@ -1567,9 +1545,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio
		return -ENOMEM;
	}

	memset(mg, 0, sizeof(*mg));

	mg->cache = cache;
	mg->op = op;
	mg->overwrite_bio = bio;

@@ -1703,9 +1678,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
		return -ENOMEM;
	}

	memset(mg, 0, sizeof(*mg));

	mg->cache = cache;
	mg->overwrite_bio = bio;
	mg->invalidate_cblock = cblock;
	mg->invalidate_oblock = oblock;
@@ -1748,26 +1720,12 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)

/*----------------------------------------------------------------*/

static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
	return (bio_data_dir(bio) == WRITE) &&
		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}

static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
{
	return writeback_mode(&cache->features) &&
		(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
}

static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
		   bool *commit_needed)
{
	int r, data_dir;
	bool rb, background_queued;
	dm_cblock_t cblock;
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);

	*commit_needed = false;

@@ -1816,6 +1774,8 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
	}

	if (r == -ENOENT) {
		struct per_bio_data *pb = get_per_bio_data(bio);

		/*
		 * Miss.
		 */
@@ -1823,7 +1783,6 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
		if (pb->req_nr == 0) {
			accounted_begin(cache, bio);
			remap_to_origin_clear_discard(cache, bio, block);

		} else {
			/*
			 * This is a duplicate writethrough io that is no
@@ -1842,18 +1801,17 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
		 * Passthrough always maps to the origin, invalidating any
		 * cache blocks that are written to.
		 */
		if (passthrough_mode(&cache->features)) {
		if (passthrough_mode(cache)) {
			if (bio_data_dir(bio) == WRITE) {
				bio_drop_shared_lock(cache, bio);
				atomic_inc(&cache->stats.demotion);
				invalidate_start(cache, cblock, block, bio);
			} else
				remap_to_origin_clear_discard(cache, bio, block);

		} else {
			if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
			if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
			    !is_dirty(cache, cblock)) {
				remap_to_origin_then_cache(cache, bio, block, cblock);
				remap_to_origin_and_cache(cache, bio, block, cblock);
				accounted_begin(cache, bio);
			} else
				remap_to_cache_dirty(cache, bio, block, cblock);
@@ -1922,8 +1880,7 @@ static blk_status_t commit_op(void *context)

static bool process_flush_bio(struct cache *cache, struct bio *bio)
{
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	if (!pb->req_nr)
		remap_to_origin(cache, bio);
@@ -1983,28 +1940,6 @@ static void process_deferred_bios(struct work_struct *ws)
		schedule_commit(&cache->committer);
}

static void process_deferred_writethrough_bios(struct work_struct *ws)
{
	struct cache *cache = container_of(ws, struct cache, deferred_writethrough_worker);

	unsigned long flags;
	struct bio_list bios;
	struct bio *bio;

	bio_list_init(&bios);

	spin_lock_irqsave(&cache->lock, flags);
	bio_list_merge(&bios, &cache->deferred_writethrough_bios);
	bio_list_init(&cache->deferred_writethrough_bios);
	spin_unlock_irqrestore(&cache->lock, flags);

	/*
	 * These bios have already been through accounted_begin()
	 */
	while ((bio = bio_list_pop(&bios)))
		generic_make_request(bio);
}

/*----------------------------------------------------------------
 * Main worker loop
 *--------------------------------------------------------------*/
@@ -2112,6 +2047,9 @@ static void destroy(struct cache *cache)
		kfree(cache->ctr_args[i]);
	kfree(cache->ctr_args);

	if (cache->bs)
		bioset_free(cache->bs);

	kfree(cache);
}

@@ -2555,8 +2493,15 @@ static int cache_create(struct cache_args *ca, struct cache **result)
	ti->discards_supported = true;
	ti->split_discard_bios = false;

	ti->per_io_data_size = sizeof(struct per_bio_data);

	cache->features = ca->features;
	ti->per_io_data_size = get_per_bio_data_size(cache);
	if (writethrough_mode(cache)) {
		/* Create bioset for writethrough bios issued to origin */
		cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0);
		if (!cache->bs)
			goto bad;
	}

	cache->callbacks.congested_fn = cache_is_congested;
	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
@@ -2618,7 +2563,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
		goto bad;
	}

	if (passthrough_mode(&cache->features)) {
	if (passthrough_mode(cache)) {
		bool all_clean;

		r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
@@ -2637,9 +2582,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
	}

	spin_lock_init(&cache->lock);
	INIT_LIST_HEAD(&cache->deferred_cells);
	bio_list_init(&cache->deferred_bios);
	bio_list_init(&cache->deferred_writethrough_bios);
	atomic_set(&cache->nr_allocated_migrations, 0);
	atomic_set(&cache->nr_io_migrations, 0);
	init_waitqueue_head(&cache->migration_wait);
@@ -2678,8 +2621,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
		goto bad;
	}
	INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
	INIT_WORK(&cache->deferred_writethrough_worker,
		  process_deferred_writethrough_bios);
	INIT_WORK(&cache->migration_worker, check_migrations);
	INIT_DELAYED_WORK(&cache->waker, do_waker);

@@ -2795,9 +2736,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
	int r;
	bool commit_needed;
	dm_oblock_t block = get_bio_block(cache, bio);
	size_t pb_data_size = get_per_bio_data_size(cache);

	init_per_bio_data(bio, pb_data_size);
	init_per_bio_data(bio);
	if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
		/*
		 * This can only occur if the io goes to a partial block at
@@ -2821,13 +2761,11 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
	return r;
}

static int cache_end_io(struct dm_target *ti, struct bio *bio,
		blk_status_t *error)
static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
{
	struct cache *cache = ti->private;
	unsigned long flags;
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	struct per_bio_data *pb = get_per_bio_data(bio);

	if (pb->tick) {
		policy_tick(cache->policy, false);
@@ -3243,13 +3181,13 @@ static void cache_status(struct dm_target *ti, status_type_t type,
		else
			DMEMIT("1 ");

		if (writethrough_mode(&cache->features))
		if (writethrough_mode(cache))
			DMEMIT("writethrough ");

		else if (passthrough_mode(&cache->features))
		else if (passthrough_mode(cache))
			DMEMIT("passthrough ");

		else if (writeback_mode(&cache->features))
		else if (writeback_mode(cache))
			DMEMIT("writeback ");

		else {
@@ -3415,7 +3353,7 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun
	unsigned i;
	struct cblock_range range;

	if (!passthrough_mode(&cache->features)) {
	if (!passthrough_mode(cache)) {
		DMERR("%s: cache has to be in passthrough mode for invalidation",
		      cache_device_name(cache));
		return -EPERM;
+2 −1
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@ struct dm_kobject_holder {
 * DM targets must _not_ deference a mapped_device to directly access its members!
 */
struct mapped_device {
	struct srcu_struct io_barrier;
	struct mutex suspend_lock;

	/*
@@ -127,6 +126,8 @@ struct mapped_device {
	struct blk_mq_tag_set *tag_set;
	bool use_blk_mq:1;
	bool init_tio_pdu:1;

	struct srcu_struct io_barrier;
};

void dm_init_md_queue(struct mapped_device *md);
Loading