Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ea8c5356 authored by Coly Li's avatar Coly Li Committed by Jens Axboe
Browse files

bcache: set max writeback rate when I/O request is idle



Commit b1092c9a ("bcache: allow quick writeback when backing idle")
allows the writeback rate to be faster if there is no I/O request on a
bcache device. It works well if there is only one bcache device attached
to the cache set. If there are many bcache devices attached to a cache
set, it may introduce performance regression because multiple faster
writeback threads of the idle bcache devices will compete the btree level
locks with the bcache device who have I/O requests coming.

This patch fixes the above issue by only permitting fast writebac when
all bcache devices attached on the cache set are idle. And if one of the
bcache devices has new I/O request coming, minimized all writeback
throughput immediately and let PI controller __update_writeback_rate()
to decide the upcoming writeback rate for each bcache device.

Also when all bcache devices are idle, limited wrieback rate to a small
number is wast of thoughput, especially when backing devices are slower
non-rotation devices (e.g. SATA SSD). This patch sets a max writeback
rate for each backing device if the whole cache set is idle. A faster
writeback rate in idle time means new I/Os may have more available space
for dirty data, and people may observe a better write performance then.

Please note bcache may change its cache mode in run time, and this patch
still works if the cache mode is switched from writeback mode and there
is still dirty data on cache.

Fixes: Commit b1092c9a ("bcache: allow quick writeback when backing idle")
Cc: stable@vger.kernel.org #4.16+
Signed-off-by: default avatarColy Li <colyli@suse.de>
Tested-by: default avatarKai Krakow <kai@kaishome.de>
Tested-by: default avatarStefan Priebe <s.priebe@profihost.ag>
Cc: Michael Lyle <mlyle@lyle.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent b467a6ac
Loading
Loading
Loading
Loading
+3 −7
Original line number Diff line number Diff line
@@ -328,13 +328,6 @@ struct cached_dev {
	 */
	atomic_t		has_dirty;

	/*
	 * Set to zero by things that touch the backing volume-- except
	 * writeback.  Incremented by writeback.  Used to determine when to
	 * accelerate idle writeback.
	 */
	atomic_t		backing_idle;

	struct bch_ratelimit	writeback_rate;
	struct delayed_work	writeback_rate_update;

@@ -515,6 +508,8 @@ struct cache_set {
	struct cache_accounting accounting;

	unsigned long		flags;
	atomic_t		idle_counter;
	atomic_t		at_max_writeback_rate;

	struct cache_sb		sb;

@@ -524,6 +519,7 @@ struct cache_set {

	struct bcache_device	**devices;
	unsigned		devices_max_used;
	atomic_t		attached_dev_nr;
	struct list_head	cached_devs;
	uint64_t		cached_dev_sectors;
	atomic_long_t		flash_dev_dirty_sectors;
+57 −2
Original line number Diff line number Diff line
@@ -1103,6 +1103,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
		generic_make_request(bio);
}

static void quit_max_writeback_rate(struct cache_set *c,
				    struct cached_dev *this_dc)
{
	int i;
	struct bcache_device *d;
	struct cached_dev *dc;

	/*
	 * mutex bch_register_lock may compete with other parallel requesters,
	 * or attach/detach operations on other backing device. Waiting to
	 * the mutex lock may increase I/O request latency for seconds or more.
	 * To avoid such situation, if mutext_trylock() failed, only writeback
	 * rate of current cached device is set to 1, and __update_write_back()
	 * will decide writeback rate of other cached devices (remember now
	 * c->idle_counter is 0 already).
	 */
	if (mutex_trylock(&bch_register_lock)) {
		for (i = 0; i < c->devices_max_used; i++) {
			if (!c->devices[i])
				continue;

			if (UUID_FLASH_ONLY(&c->uuids[i]))
				continue;

			d = c->devices[i];
			dc = container_of(d, struct cached_dev, disk);
			/*
			 * set writeback rate to default minimum value,
			 * then let update_writeback_rate() to decide the
			 * upcoming rate.
			 */
			atomic_long_set(&dc->writeback_rate.rate, 1);
		}
		mutex_unlock(&bch_register_lock);
	} else
		atomic_long_set(&this_dc->writeback_rate.rate, 1);
}

/* Cached devices - read & write stuff */

static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -1120,8 +1158,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
		return BLK_QC_T_NONE;
	}

	atomic_set(&dc->backing_idle, 0);
	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
	if (likely(d->c)) {
		if (atomic_read(&d->c->idle_counter))
			atomic_set(&d->c->idle_counter, 0);
		/*
		 * If at_max_writeback_rate of cache set is true and new I/O
		 * comes, quit max writeback rate of all cached devices
		 * attached to this cache set, and set at_max_writeback_rate
		 * to false.
		 */
		if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
			atomic_set(&d->c->at_max_writeback_rate, 0);
			quit_max_writeback_rate(d->c, dc);
		}
	}

	generic_start_io_acct(q,
			      bio_op(bio),
			      bio_sectors(bio),
			      &d->disk->part0);

	bio_set_dev(bio, dc->bdev);
	bio->bi_iter.bi_sector += dc->sb.data_offset;
+4 −0
Original line number Diff line number Diff line
@@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d)
{
	lockdep_assert_held(&bch_register_lock);

	atomic_dec(&d->c->attached_dev_nr);

	if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
		struct uuid_entry *u = d->c->uuids + d->id;

@@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,

	bch_cached_dev_run(dc);
	bcache_device_link(&dc->disk, c, "bdev");
	atomic_inc(&c->attached_dev_nr);

	/* Allow the writeback thread to proceed */
	up_write(&dc->writeback_lock);
@@ -1696,6 +1699,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
	c->block_bits		= ilog2(sb->block_size);
	c->nr_uuids		= bucket_bytes(c) / sizeof(struct uuid_entry);
	c->devices_max_used	= 0;
	atomic_set(&c->attached_dev_nr, 0);
	c->btree_pages		= bucket_pages(c);
	if (c->btree_pages > BTREE_MAX_PAGES)
		c->btree_pages = max_t(int, c->btree_pages / 4,
+11 −4
Original line number Diff line number Diff line
@@ -171,7 +171,8 @@ SHOW(__bch_cached_dev)
	var_printf(writeback_running,	"%i");
	var_print(writeback_delay);
	var_print(writeback_percent);
	sysfs_hprint(writeback_rate,	wb ? dc->writeback_rate.rate << 9 : 0);
	sysfs_hprint(writeback_rate,
		     wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
	sysfs_hprint(io_errors,		atomic_read(&dc->io_errors));
	sysfs_printf(io_error_limit,	"%i", dc->error_limit);
	sysfs_printf(io_disable,	"%i", dc->io_disable);
@@ -193,7 +194,9 @@ SHOW(__bch_cached_dev)
		 * Except for dirty and target, other values should
		 * be 0 if writeback is not running.
		 */
		bch_hprint(rate, wb ? dc->writeback_rate.rate << 9 : 0);
		bch_hprint(rate,
			   wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
			      : 0);
		bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
		bch_hprint(target, dc->writeback_rate_target << 9);
		bch_hprint(proportional,
@@ -261,8 +264,12 @@ STORE(__cached_dev)

	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);

	sysfs_strtoul_clamp(writeback_rate,
			    dc->writeback_rate.rate, 1, INT_MAX);
	if (attr == &sysfs_writeback_rate) {
		int v;

		sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX);
		atomic_long_set(&dc->writeback_rate.rate, v);
	}

	sysfs_strtoul_clamp(writeback_rate_update_seconds,
			    dc->writeback_rate_update_seconds,
+1 −1
Original line number Diff line number Diff line
@@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
	uint64_t now = local_clock();

	d->next += div_u64(done * NSEC_PER_SEC, d->rate);
	d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));

	/* Bound the time.  Don't let us fall further than 2 seconds behind
	 * (this prevents unnecessary backlog that would make it impossible
Loading