Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 78365411 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcache: Rework allocator reserves



We need a reserve for allocating buckets for new btree nodes - and now that
we've got multiple btrees, it really needs to be per btree.

This reworks the reserves so we've got separate freelists for each reserve
instead of watermarks, which seems to make things a bit cleaner, and it adds
some code so that btree_split() can make sure the reserve is available before it
starts.

Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 1dd13c8d
Loading
Loading
Loading
Loading
+44 −28
Original line number Diff line number Diff line
@@ -132,10 +132,16 @@ bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
{
	BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));

	if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] &&
	    CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO)
		return false;
	if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
		unsigned i;

		for (i = 0; i < RESERVE_NONE; i++)
			if (!fifo_full(&ca->free[i]))
				goto add;

		return false;
	}
add:
	b->prio = 0;

	if (can_inc_bucket_gen(b) &&
@@ -304,6 +310,21 @@ do { \
	__set_current_state(TASK_RUNNING);				\
} while (0)

static int bch_allocator_push(struct cache *ca, long bucket)
{
	unsigned i;

	/* Prios/gens are actually the most important reserve */
	if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
		return true;

	for (i = 0; i < RESERVE_NR; i++)
		if (fifo_push(&ca->free[i], bucket))
			return true;

	return false;
}

static int bch_allocator_thread(void *arg)
{
	struct cache *ca = arg;
@@ -336,9 +357,7 @@ static int bch_allocator_thread(void *arg)
				mutex_lock(&ca->set->bucket_lock);
			}

			allocator_wait(ca, !fifo_full(&ca->free));

			fifo_push(&ca->free, bucket);
			allocator_wait(ca, bch_allocator_push(ca, bucket));
			wake_up(&ca->set->bucket_wait);
		}

@@ -365,34 +384,29 @@ static int bch_allocator_thread(void *arg)
	}
}

long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
	DEFINE_WAIT(w);
	struct bucket *b;
	long r;

	/* fastpath */
	if (fifo_used(&ca->free) > ca->watermark[watermark]) {
		fifo_pop(&ca->free, r);
	if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
	    fifo_pop(&ca->free[reserve], r))
		goto out;
	}

	if (!wait)
		return -1;

	while (1) {
		if (fifo_used(&ca->free) > ca->watermark[watermark]) {
			fifo_pop(&ca->free, r);
			break;
		}

	do {
		prepare_to_wait(&ca->set->bucket_wait, &w,
				TASK_UNINTERRUPTIBLE);

		mutex_unlock(&ca->set->bucket_lock);
		schedule();
		mutex_lock(&ca->set->bucket_lock);
	}
	} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
		 !fifo_pop(&ca->free[reserve], r));

	finish_wait(&ca->set->bucket_wait, &w);
out:
@@ -401,11 +415,13 @@ out:
	if (expensive_debug_checks(ca->set)) {
		size_t iter;
		long i;
		unsigned j;

		for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
			BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);

		fifo_for_each(i, &ca->free, iter)
		for (j = 0; j < RESERVE_NR; j++)
			fifo_for_each(i, &ca->free[j], iter)
				BUG_ON(i == r);
		fifo_for_each(i, &ca->free_inc, iter)
			BUG_ON(i == r);
@@ -419,7 +435,7 @@ out:

	SET_GC_SECTORS_USED(b, ca->sb.bucket_size);

	if (watermark <= WATERMARK_METADATA) {
	if (reserve <= RESERVE_PRIO) {
		SET_GC_MARK(b, GC_MARK_METADATA);
		SET_GC_MOVE(b, 0);
		b->prio = BTREE_PRIO;
@@ -445,7 +461,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
	}
}

int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
			   struct bkey *k, int n, bool wait)
{
	int i;
@@ -459,7 +475,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,

	for (i = 0; i < n; i++) {
		struct cache *ca = c->cache_by_alloc[i];
		long b = bch_bucket_alloc(ca, watermark, wait);
		long b = bch_bucket_alloc(ca, reserve, wait);

		if (b == -1)
			goto err;
@@ -478,12 +494,12 @@ err:
	return -1;
}

int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
			 struct bkey *k, int n, bool wait)
{
	int ret;
	mutex_lock(&c->bucket_lock);
	ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
	ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
	mutex_unlock(&c->bucket_lock);
	return ret;
}
@@ -573,8 +589,8 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,

	while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
		unsigned watermark = write_prio
			? WATERMARK_MOVINGGC
			: WATERMARK_NONE;
			? RESERVE_MOVINGGC
			: RESERVE_NONE;

		spin_unlock(&c->data_bucket_lock);

@@ -689,7 +705,7 @@ int bch_cache_allocator_init(struct cache *ca)
	 * Then 8 for btree allocations
	 * Then half for the moving garbage collector
	 */

#if 0
	ca->watermark[WATERMARK_PRIO] = 0;

	ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
@@ -699,6 +715,6 @@ int bch_cache_allocator_init(struct cache *ca)

	ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
		ca->watermark[WATERMARK_MOVINGGC];

#endif
	return 0;
}
+7 −9
Original line number Diff line number Diff line
@@ -383,12 +383,12 @@ struct cached_dev {
	unsigned		writeback_rate_p_term_inverse;
};

enum alloc_watermarks {
	WATERMARK_PRIO,
	WATERMARK_METADATA,
	WATERMARK_MOVINGGC,
	WATERMARK_NONE,
	WATERMARK_MAX
enum alloc_reserve {
	RESERVE_BTREE,
	RESERVE_PRIO,
	RESERVE_MOVINGGC,
	RESERVE_NONE,
	RESERVE_NR,
};

struct cache {
@@ -400,8 +400,6 @@ struct cache {
	struct kobject		kobj;
	struct block_device	*bdev;

	unsigned		watermark[WATERMARK_MAX];

	struct task_struct	*alloc_thread;

	struct closure		prio;
@@ -430,7 +428,7 @@ struct cache {
	 * because all the data they contained was overwritten), so we only
	 * need to discard them before they can be moved to the free list.
	 */
	DECLARE_FIFO(long, free);
	DECLARE_FIFO(long, free)[RESERVE_NR];
	DECLARE_FIFO(long, free_inc);
	DECLARE_FIFO(long, unused);

+32 −2
Original line number Diff line number Diff line
@@ -167,6 +167,8 @@ static inline bool should_split(struct btree *b)
			_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__);	\
		}							\
		rw_unlock(_w, _b);					\
		if (_r == -EINTR)					\
			schedule();					\
		bch_cannibalize_unlock(c);				\
		if (_r == -ENOSPC) {					\
			wait_event((c)->try_wait,			\
@@ -175,6 +177,7 @@ static inline bool should_split(struct btree *b)
		}							\
	} while (_r == -EINTR);						\
									\
	finish_wait(&(c)->bucket_wait, &(op)->wait);			\
	_r;								\
})

@@ -1075,7 +1078,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait)

	mutex_lock(&c->bucket_lock);
retry:
	if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, wait))
	if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
		goto err;

	bkey_put(c, &k.key);
@@ -1132,6 +1135,28 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
	atomic_inc(&b->c->prio_blocked);
}

static int btree_check_reserve(struct btree *b, struct btree_op *op)
{
	struct cache_set *c = b->c;
	struct cache *ca;
	unsigned i, reserve = c->root->level * 2 + 1;
	int ret = 0;

	mutex_lock(&c->bucket_lock);

	for_each_cache(ca, c, i)
		if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) {
			if (op)
				prepare_to_wait(&c->bucket_wait, &op->wait,
						TASK_UNINTERRUPTIBLE);
			ret = -EINTR;
			break;
		}

	mutex_unlock(&c->bucket_lock);
	return ret;
}

/* Garbage collection */

uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
@@ -1428,7 +1453,8 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,

		if (!IS_ERR(last->b)) {
			should_rewrite = btree_gc_mark_node(last->b, gc);
			if (should_rewrite) {
			if (should_rewrite &&
			    !btree_check_reserve(b, NULL)) {
				n = btree_node_alloc_replacement(last->b,
								 false);

@@ -2071,6 +2097,10 @@ static int btree_split(struct btree *b, struct btree_op *op,
	closure_init_stack(&cl);
	bch_keylist_init(&parent_keys);

	if (!b->level &&
	    btree_check_reserve(b, op))
		return -EINTR;

	n1 = btree_node_alloc_replacement(b, true);
	if (IS_ERR(n1))
		goto err;
+4 −0
Original line number Diff line number Diff line
@@ -241,6 +241,9 @@ void bkey_put(struct cache_set *c, struct bkey *k);
/* Recursing down the btree */

struct btree_op {
	/* for waiting on btree reserve in btree_split() */
	wait_queue_t		wait;

	/* Btree level at which we start taking write locks */
	short			lock;

@@ -250,6 +253,7 @@ struct btree_op {
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
	memset(op, 0, sizeof(struct btree_op));
	init_wait(&op->wait);
	op->lock = write_lock_level;
}

+1 −1
Original line number Diff line number Diff line
@@ -211,7 +211,7 @@ void bch_moving_gc(struct cache_set *c)
	for_each_cache(ca, c, i) {
		unsigned sectors_to_move = 0;
		unsigned reserve_sectors = ca->sb.bucket_size *
			min(fifo_used(&ca->free), ca->free.size / 2);
			fifo_used(&ca->free[RESERVE_MOVINGGC]);

		ca->heap.used = 0;

Loading