Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3f8476fe authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper fixes from Mike Snitzer:

 - revert a request-based DM core change that caused IO latency to
   increase and adversely impact both throughput and system load

 - fix for a use after free bug in DM core's device cleanup

 - a couple DM btree removal fixes (used by dm-thinp)

 - a DM thinp fix for order-5 allocation failure

 - a DM thinp fix to not degrade to read-only metadata mode when in
   out-of-data-space mode for longer than the 'no_space_timeout'

 - fix a long-standing oversight in both dm-thinp and dm-cache by now
   exporting 'needs_check' in status if it was set in metadata

 - fix an embarrassing dm-cache busy-loop that caused worker threads to
   eat cpu even if no IO was actively being issued to the cache device

* tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: avoid calls to prealloc_free_structs() if possible
  dm cache: avoid preallocation if no work in writeback_some_dirty_blocks()
  dm cache: do not wake_worker() in free_migration()
  dm cache: display 'needs_check' in status if it is set
  dm thin: display 'needs_check' in status if it is set
  dm thin: stay in out-of-data-space mode once no_space_timeout expires
  dm: fix use after free crash due to incorrect cleanup sequence
  Revert "dm: only run the queue on completion if congested or no requests pending"
  dm btree: silence lockdep lock inversion in dm_btree_del()
  dm thin: allocate the cell_sort_array dynamically
  dm btree remove: fix bug in redistribute3
parents eb254374 665022d7
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -258,6 +258,12 @@ cache metadata mode : ro if read-only, rw if read-write
	no further I/O will be permitted and the status will just
	contain the string 'Fail'.  The userspace recovery tools
	should then be used.
needs_check		 : 'needs_check' if set, '-' if not set
	A metadata operation has failed, resulting in the needs_check
	flag being set in the metadata's superblock.  The metadata
	device must be deactivated and checked/repaired before the
	cache can be made fully operational again.  '-' indicates
	needs_check is not set.

Messages
--------
+8 −1
Original line number Diff line number Diff line
@@ -296,7 +296,7 @@ ii) Status
	underlying device.  When this is enabled when loading the table,
	it can get disabled if the underlying device doesn't support it.

    ro|rw
    ro|rw|out_of_data_space
	If the pool encounters certain types of device failures it will
	drop into a read-only metadata mode in which no changes to
	the pool metadata (like allocating new blocks) are permitted.
@@ -314,6 +314,13 @@ ii) Status
	module parameter can be used to change this timeout -- it
	defaults to 60 seconds but may be disabled using a value of 0.

    needs_check
	A metadata operation has failed, resulting in the needs_check
	flag being set in the metadata's superblock.  The metadata
	device must be deactivated and checked/repaired before the
	thin-pool can be made fully operational again.  '-' indicates
	needs_check is not set.

iii) Messages

    create_thin <dev id>
+23 −15
Original line number Diff line number Diff line
@@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg)
		wake_up(&cache->migration_wait);

	mempool_free(mg, cache->migration_pool);
	wake_worker(cache);
}

static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
@@ -1947,6 +1946,7 @@ static int commit_if_needed(struct cache *cache)

static void process_deferred_bios(struct cache *cache)
{
	bool prealloc_used = false;
	unsigned long flags;
	struct bio_list bios;
	struct bio *bio;
@@ -1981,13 +1981,16 @@ static void process_deferred_bios(struct cache *cache)
			process_discard_bio(cache, &structs, bio);
		else
			process_bio(cache, &structs, bio);
		prealloc_used = true;
	}

	if (prealloc_used)
		prealloc_free_structs(cache, &structs);
}

static void process_deferred_cells(struct cache *cache)
{
	bool prealloc_used = false;
	unsigned long flags;
	struct dm_bio_prison_cell *cell, *tmp;
	struct list_head cells;
@@ -2015,8 +2018,10 @@ static void process_deferred_cells(struct cache *cache)
		}

		process_cell(cache, &structs, cell);
		prealloc_used = true;
	}

	if (prealloc_used)
		prealloc_free_structs(cache, &structs);
}

@@ -2062,7 +2067,7 @@ static void process_deferred_writethrough_bios(struct cache *cache)

static void writeback_some_dirty_blocks(struct cache *cache)
{
	int r = 0;
	bool prealloc_used = false;
	dm_oblock_t oblock;
	dm_cblock_t cblock;
	struct prealloc structs;
@@ -2072,22 +2077,20 @@ static void writeback_some_dirty_blocks(struct cache *cache)
	memset(&structs, 0, sizeof(structs));

	while (spare_migration_bandwidth(cache)) {
		if (prealloc_data_structs(cache, &structs))
			break;
		if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
			break; /* no work to do */

		r = policy_writeback_work(cache->policy, &oblock, &cblock, busy);
		if (r)
			break;

		r = get_cell(cache, oblock, &structs, &old_ocell);
		if (r) {
		if (prealloc_data_structs(cache, &structs) ||
		    get_cell(cache, oblock, &structs, &old_ocell)) {
			policy_set_dirty(cache->policy, oblock);
			break;
		}

		writeback(cache, &structs, oblock, cblock, old_ocell);
		prealloc_used = true;
	}

	if (prealloc_used)
		prealloc_free_structs(cache, &structs);
}

@@ -3496,7 +3499,7 @@ static void cache_resume(struct dm_target *ti)
 * <#demotions> <#promotions> <#dirty>
 * <#features> <features>*
 * <#core args> <core args>
 * <policy name> <#policy args> <policy args>* <cache metadata mode>
 * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
 */
static void cache_status(struct dm_target *ti, status_type_t type,
			 unsigned status_flags, char *result, unsigned maxlen)
@@ -3582,6 +3585,11 @@ static void cache_status(struct dm_target *ti, status_type_t type,
		else
			DMEMIT("rw ");

		if (dm_cache_metadata_needs_check(cache->cmd))
			DMEMIT("needs_check ");
		else
			DMEMIT("- ");

		break;

	case STATUSTYPE_TABLE:
@@ -3820,7 +3828,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type cache_target = {
	.name = "cache",
	.version = {1, 7, 0},
	.version = {1, 8, 0},
	.module = THIS_MODULE,
	.ctr = cache_ctr,
	.dtr = cache_dtr,
+37 −7
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sort.h>
#include <linux/rbtree.h>

@@ -268,7 +269,7 @@ struct pool {
	process_mapping_fn process_prepared_mapping;
	process_mapping_fn process_prepared_discard;

	struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
	struct dm_bio_prison_cell **cell_sort_array;
};

static enum pool_mode get_pool_mode(struct pool *pool);
@@ -2281,18 +2282,23 @@ static void do_waker(struct work_struct *ws)
	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
}

static void notify_of_pool_mode_change_to_oods(struct pool *pool);

/*
 * We're holding onto IO to allow userland time to react.  After the
 * timeout either the pool will have been resized (and thus back in
 * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
 * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
 */
static void do_no_space_timeout(struct work_struct *ws)
{
	struct pool *pool = container_of(to_delayed_work(ws), struct pool,
					 no_space_timeout);

	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
		set_pool_mode(pool, PM_READ_ONLY);
	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
		pool->pf.error_if_no_space = true;
		notify_of_pool_mode_change_to_oods(pool);
		error_retry_list(pool);
	}
}

/*----------------------------------------------------------------*/
@@ -2370,6 +2376,14 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
	       dm_device_name(pool->pool_md), new_mode);
}

static void notify_of_pool_mode_change_to_oods(struct pool *pool)
{
	if (!pool->pf.error_if_no_space)
		notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
	else
		notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
}

static bool passdown_enabled(struct pool_c *pt)
{
	return pt->adjusted_pf.discard_passdown;
@@ -2454,7 +2468,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
		 * frequently seeing this mode.
		 */
		if (old_mode != new_mode)
			notify_of_pool_mode_change(pool, "out-of-data-space");
			notify_of_pool_mode_change_to_oods(pool);
		pool->process_bio = process_bio_read_only;
		pool->process_discard = process_discard_bio;
		pool->process_cell = process_cell_read_only;
@@ -2777,6 +2791,7 @@ static void __pool_destroy(struct pool *pool)
{
	__pool_table_remove(pool);

	vfree(pool->cell_sort_array);
	if (dm_pool_metadata_close(pool->pmd) < 0)
		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);

@@ -2889,6 +2904,13 @@ static struct pool *pool_create(struct mapped_device *pool_md,
		goto bad_mapping_pool;
	}

	pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
	if (!pool->cell_sort_array) {
		*error = "Error allocating cell sort array";
		err_p = ERR_PTR(-ENOMEM);
		goto bad_sort_array;
	}

	pool->ref_count = 1;
	pool->last_commit_jiffies = jiffies;
	pool->pool_md = pool_md;
@@ -2897,6 +2919,8 @@ static struct pool *pool_create(struct mapped_device *pool_md,

	return pool;

bad_sort_array:
	mempool_destroy(pool->mapping_pool);
bad_mapping_pool:
	dm_deferred_set_destroy(pool->all_io_ds);
bad_all_io_ds:
@@ -3714,6 +3738,7 @@ static void emit_flags(struct pool_features *pf, char *result,
 * Status line is:
 *    <transaction id> <used metadata sectors>/<total metadata sectors>
 *    <used data sectors>/<total data sectors> <held metadata root>
 *    <pool mode> <discard config> <no space config> <needs_check>
 */
static void pool_status(struct dm_target *ti, status_type_t type,
			unsigned status_flags, char *result, unsigned maxlen)
@@ -3815,6 +3840,11 @@ static void pool_status(struct dm_target *ti, status_type_t type,
		else
			DMEMIT("queue_if_no_space ");

		if (dm_pool_metadata_needs_check(pool->pmd))
			DMEMIT("needs_check ");
		else
			DMEMIT("- ");

		break;

	case STATUSTYPE_TABLE:
@@ -3918,7 +3948,7 @@ static struct target_type pool_target = {
	.name = "thin-pool",
	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
		    DM_TARGET_IMMUTABLE,
	.version = {1, 15, 0},
	.version = {1, 16, 0},
	.module = THIS_MODULE,
	.ctr = pool_ctr,
	.dtr = pool_dtr,
@@ -4305,7 +4335,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type thin_target = {
	.name = "thin",
	.version = {1, 15, 0},
	.version = {1, 16, 0},
	.module	= THIS_MODULE,
	.ctr = thin_ctr,
	.dtr = thin_dtr,
+4 −8
Original line number Diff line number Diff line
@@ -1067,13 +1067,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
 */
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{
	int nr_requests_pending;

	atomic_dec(&md->pending[rw]);

	/* nudge anyone waiting on suspend queue */
	nr_requests_pending = md_in_flight(md);
	if (!nr_requests_pending)
	if (!md_in_flight(md))
		wake_up(&md->wait);

	/*
@@ -1085,8 +1082,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
	if (run_queue) {
		if (md->queue->mq_ops)
			blk_mq_run_hw_queues(md->queue, true);
		else if (!nr_requests_pending ||
			 (nr_requests_pending >= md->queue->nr_congestion_on))
		else
			blk_run_queue_async(md->queue);
	}

@@ -2281,8 +2277,6 @@ static void dm_init_old_md_queue(struct mapped_device *md)

static void cleanup_mapped_device(struct mapped_device *md)
{
	cleanup_srcu_struct(&md->io_barrier);

	if (md->wq)
		destroy_workqueue(md->wq);
	if (md->kworker_task)
@@ -2294,6 +2288,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
	if (md->bs)
		bioset_free(md->bs);

	cleanup_srcu_struct(&md->io_barrier);

	if (md->disk) {
		spin_lock(&_minor_lock);
		md->disk->private_data = NULL;
Loading