Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dac94e29 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-4.12/dm-fixes-2' of...

Merge tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - a couple DM thin provisioning fixes

 - a few request-based DM and DM multipath fixes for issues that were
   made when merging Christoph's changes with Bart's changes for 4.12

 - a DM bufio unsigned overflow fix

 - a couple pure fixes for the DM cache target.

 - various very small tweaks to the DM cache target that enable
   considerable speed improvements in the face of continuous IO. Given
   that the cache target was significantly reworked for 4.12 I see no
   reason to sit on these advances until 4.13 considering the favorable
   results associated with such minimalist tweaks.

* tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: handle kmalloc failure allocating background_tracker struct
  dm bufio: make the parameter "retain_bytes" unsigned long
  dm mpath: multipath_clone_and_map must not return -EIO
  dm mpath: don't return -EIO from dm_report_EIO
  dm rq: add a missing break to map_request
  dm space map disk: fix some book keeping in the disk space map
  dm thin metadata: call precommit before saving the roots
  dm cache policy smq: don't do any writebacks unless IDLE
  dm cache: simplify the IDLE vs BUSY state calculation
  dm cache: track all IO to the cache rather than just the origin device's IO
  dm cache policy smq: stop preemptively demoting blocks
  dm cache policy smq: put newly promoted entries at the top of the multiqueue
  dm cache policy smq: be more aggressive about triggering a writeback
  dm cache policy smq: only demote entries in bottom half of the clean multiqueue
  dm cache: fix incorrect 'idle_time' reset in IO tracker
parents 243bfd2c 7e1b9521
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock);
 * Buffers are freed after this timeout
 */
static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;

static unsigned long dm_bufio_peak_allocated;
static unsigned long dm_bufio_allocated_kmem_cache;
@@ -1558,10 +1558,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
	return true;
}

static unsigned get_retain_buffers(struct dm_bufio_client *c)
static unsigned long get_retain_buffers(struct dm_bufio_client *c)
{
        unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
        return retain_bytes / c->block_size;
        unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
        return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
}

static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
@@ -1571,7 +1571,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
	struct dm_buffer *b, *tmp;
	unsigned long freed = 0;
	unsigned long count = nr_to_scan;
	unsigned retain_target = get_retain_buffers(c);
	unsigned long retain_target = get_retain_buffers(c);

	for (l = 0; l < LIST_SIZE; l++) {
		list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
@@ -1794,8 +1794,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz)
static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
{
	struct dm_buffer *b, *tmp;
	unsigned retain_target = get_retain_buffers(c);
	unsigned count;
	unsigned long retain_target = get_retain_buffers(c);
	unsigned long count;
	LIST_HEAD(write_list);

	dm_bufio_lock(c);
@@ -1955,7 +1955,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");

module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");

module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
+5 −0
Original line number Diff line number Diff line
@@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work)
{
	struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);

	if (!b) {
		DMERR("couldn't create background_tracker");
		return NULL;
	}

	b->max_work = max_work;
	atomic_set(&b->pending_promotes, 0);
	atomic_set(&b->pending_writebacks, 0);
+12 −19
Original line number Diff line number Diff line
@@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
	 * Cache entries may not be populated.  So we cannot rely on the
	 * size of the clean queue.
	 */
	unsigned nr_clean;

	if (idle) {
		/*
		 * We'd like to clean everything.
@@ -1129,18 +1127,16 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
		return q_size(&mq->dirty) == 0u;
	}

	nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
	return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
		percent_to_target(mq, CLEAN_TARGET);
	/*
	 * If we're busy we don't worry about cleaning at all.
	 */
	return true;
}

static bool free_target_met(struct smq_policy *mq, bool idle)
static bool free_target_met(struct smq_policy *mq)
{
	unsigned nr_free;

	if (!idle)
		return true;

	nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
	return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
		percent_to_target(mq, FREE_TARGET);
@@ -1190,9 +1186,9 @@ static void queue_demotion(struct smq_policy *mq)
	if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
		return;

	e = q_peek(&mq->clean, mq->clean.nr_levels, true);
	e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
	if (!e) {
		if (!clean_target_met(mq, false))
		if (!clean_target_met(mq, true))
			queue_writeback(mq);
		return;
	}
@@ -1220,7 +1216,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
		 * We always claim to be 'idle' to ensure some demotions happen
		 * with continuous loads.
		 */
		if (!free_target_met(mq, true))
		if (!free_target_met(mq))
			queue_demotion(mq);
		return;
	}
@@ -1421,15 +1417,11 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
	spin_lock_irqsave(&mq->lock, flags);
	r = btracker_issue(mq->bg_work, result);
	if (r == -ENODATA) {
		/* find some writeback work to do */
		if (mq->migrations_allowed && !free_target_met(mq, idle))
			queue_demotion(mq);

		else if (!clean_target_met(mq, idle))
		if (!clean_target_met(mq, idle)) {
			queue_writeback(mq);

			r = btracker_issue(mq->bg_work, result);
		}
	}
	spin_unlock_irqrestore(&mq->lock, flags);

	return r;
@@ -1452,6 +1444,7 @@ static void __complete_background_work(struct smq_policy *mq,
		clear_pending(mq, e);
		if (success) {
			e->oblock = work->oblock;
			e->level = NR_CACHE_LEVELS - 1;
			push(mq, e);
			// h, q, a
		} else {
+13 −14
Original line number Diff line number Diff line
@@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len)

static void __iot_io_end(struct io_tracker *iot, sector_t len)
{
	if (!len)
		return;

	iot->in_flight -= len;
	if (!iot->in_flight)
		iot->idle_time = jiffies;
@@ -474,7 +477,7 @@ struct cache {
	spinlock_t invalidation_lock;
	struct list_head invalidation_requests;

	struct io_tracker origin_tracker;
	struct io_tracker tracker;

	struct work_struct commit_ws;
	struct batcher committer;
@@ -901,8 +904,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)

static bool accountable_bio(struct cache *cache, struct bio *bio)
{
	return ((bio->bi_bdev == cache->origin_dev->bdev) &&
		bio_op(bio) != REQ_OP_DISCARD);
	return bio_op(bio) != REQ_OP_DISCARD;
}

static void accounted_begin(struct cache *cache, struct bio *bio)
@@ -912,7 +914,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)

	if (accountable_bio(cache, bio)) {
		pb->len = bio_sectors(bio);
		iot_io_begin(&cache->origin_tracker, pb->len);
		iot_io_begin(&cache->tracker, pb->len);
	}
}

@@ -921,7 +923,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);

	iot_io_end(&cache->origin_tracker, pb->len);
	iot_io_end(&cache->tracker, pb->len);
}

static void accounted_request(struct cache *cache, struct bio *bio)
@@ -1716,20 +1718,19 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,

enum busy {
	IDLE,
	MODERATE,
	BUSY
};

static enum busy spare_migration_bandwidth(struct cache *cache)
{
	bool idle = iot_idle_for(&cache->origin_tracker, HZ);
	bool idle = iot_idle_for(&cache->tracker, HZ);
	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
		cache->sectors_per_block;

	if (current_volume <= cache->migration_threshold)
		return idle ? IDLE : MODERATE;
	if (idle && current_volume <= cache->migration_threshold)
		return IDLE;
	else
		return idle ? MODERATE : BUSY;
		return BUSY;
}

static void inc_hit_counter(struct cache *cache, struct bio *bio)
@@ -2045,8 +2046,6 @@ static void check_migrations(struct work_struct *ws)

	for (;;) {
		b = spare_migration_bandwidth(cache);
		if (b == BUSY)
			break;

		r = policy_get_background_work(cache->policy, b == IDLE, &op);
		if (r == -ENODATA)
@@ -2717,7 +2716,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)

	batcher_init(&cache->committer, commit_op, cache,
		     issue_op, cache, cache->wq);
	iot_init(&cache->origin_tracker);
	iot_init(&cache->tracker);

	init_rwsem(&cache->background_work_lock);
	prevent_background_work(cache);
@@ -2941,7 +2940,7 @@ static void cache_postsuspend(struct dm_target *ti)

	cancel_delayed_work(&cache->waker);
	flush_workqueue(cache->wq);
	WARN_ON(cache->origin_tracker.in_flight);
	WARN_ON(cache->tracker.in_flight);

	/*
	 * If it's a flush suspend there won't be any deferred bios, so this
+11 −8
Original line number Diff line number Diff line
@@ -447,7 +447,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
 * it has been invoked.
 */
#define dm_report_EIO(m)						\
({									\
do {									\
	struct mapped_device *md = dm_table_get_md((m)->ti->table);	\
									\
	pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
@@ -455,8 +455,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
		 test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags),	\
		 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags),	\
		 dm_noflush_suspending((m)->ti));			\
	-EIO;								\
})
} while (0)

/*
 * Map cloned requests (request-based multipath)
@@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
	if (!pgpath) {
		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
			return DM_MAPIO_DELAY_REQUEUE;
		return dm_report_EIO(m);	/* Failed */
		dm_report_EIO(m);	/* Failed */
		return DM_MAPIO_KILL;
	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
		   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
		if (pg_init_all_paths(m))
@@ -558,7 +558,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
	if (!pgpath) {
		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
			return DM_MAPIO_REQUEUE;
		return dm_report_EIO(m);
		dm_report_EIO(m);
		return -EIO;
	}

	mpio->pgpath = pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
		if (atomic_read(&m->nr_valid_paths) == 0 &&
		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
			if (error == -EIO)
				error = dm_report_EIO(m);
				dm_report_EIO(m);
			/* complete with the original error */
			r = DM_ENDIO_DONE;
		}
@@ -1524,8 +1525,10 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
		fail_path(mpio->pgpath);

	if (atomic_read(&m->nr_valid_paths) == 0 &&
	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
		return dm_report_EIO(m);
	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
		dm_report_EIO(m);
		return -EIO;
	}

	/* Queue for the daemon to resubmit */
	dm_bio_restore(get_bio_details_from_bio(clone), clone);
Loading