Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2531d9ee authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcache: Kill unused freelist



This was originally added as at optimization that for various reasons isn't
needed anymore, but it does add a lot of nasty corner cases (and it was
responsible for some recently fixed bugs). Just get rid of it now.

Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 0a63b66d
Loading
Loading
Loading
Loading
+58 −82
Original line number Diff line number Diff line
@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
	ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
	WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);

	if (CACHE_SYNC(&ca->set->sb)) {
		ca->need_save_prio = max(ca->need_save_prio,
					 bucket_disk_gen(b));
		WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
	}

	return ret;
}

@@ -120,58 +114,46 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
	mutex_unlock(&c->bucket_lock);
}

/* Allocation */
/*
 * Background allocation thread: scans for buckets to be invalidated,
 * invalidates them, rewrites prios/gens (marking them as invalidated on disk),
 * then optionally issues discard commands to the newly free buckets, then puts
 * them on the various freelists.
 */

static inline bool can_inc_bucket_gen(struct bucket *b)
{
	return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
		bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
	return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
}

bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
	BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));

	if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
		unsigned i;

		for (i = 0; i < RESERVE_NONE; i++)
			if (!fifo_full(&ca->free[i]))
				goto add;

		return false;
	}
add:
	b->prio = 0;

	if (can_inc_bucket_gen(b) &&
	    fifo_push(&ca->unused, b - ca->buckets)) {
		atomic_inc(&b->pin);
		return true;
	}
	BUG_ON(!ca->set->gc_mark_valid);

	return false;
}

static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
	return (!GC_MARK(b) ||
		GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
		!atomic_read(&b->pin) &&
		can_inc_bucket_gen(b);
}

static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
	size_t bucket = b - ca->buckets;
	lockdep_assert_held(&ca->set->bucket_lock);
	BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);

	if (GC_SECTORS_USED(b))
		trace_bcache_invalidate(ca, bucket);
		trace_bcache_invalidate(ca, b - ca->buckets);

	bch_inc_gen(ca, b);
	b->prio = INITIAL_PRIO;
	atomic_inc(&b->pin);
	fifo_push(&ca->free_inc, bucket);
}

static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
	__bch_invalidate_one_bucket(ca, b);

	fifo_push(&ca->free_inc, b - ca->buckets);
}

/*
@@ -201,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
	ca->heap.used = 0;

	for_each_bucket(b, ca) {
		/*
		 * If we fill up the unused list, if we then return before
		 * adding anything to the free_inc list we'll skip writing
		 * prios/gens and just go back to allocating from the unused
		 * list:
		 */
		if (fifo_full(&ca->unused))
			return;

		if (!can_invalidate_bucket(ca, b))
			continue;

		if (!GC_SECTORS_USED(b) &&
		    bch_bucket_add_unused(ca, b))
		if (!bch_can_invalidate_bucket(ca, b))
			continue;

		if (!heap_full(&ca->heap))
@@ -239,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
			return;
		}

		invalidate_one_bucket(ca, b);
		bch_invalidate_one_bucket(ca, b);
	}
}

@@ -255,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)

		b = ca->buckets + ca->fifo_last_bucket++;

		if (can_invalidate_bucket(ca, b))
			invalidate_one_bucket(ca, b);
		if (bch_can_invalidate_bucket(ca, b))
			bch_invalidate_one_bucket(ca, b);

		if (++checked >= ca->sb.nbuckets) {
			ca->invalidate_needs_gc = 1;
@@ -280,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)

		b = ca->buckets + n;

		if (can_invalidate_bucket(ca, b))
			invalidate_one_bucket(ca, b);
		if (bch_can_invalidate_bucket(ca, b))
			bch_invalidate_one_bucket(ca, b);

		if (++checked >= ca->sb.nbuckets / 2) {
			ca->invalidate_needs_gc = 1;
@@ -293,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)

static void invalidate_buckets(struct cache *ca)
{
	if (ca->invalidate_needs_gc)
		return;
	BUG_ON(ca->invalidate_needs_gc);

	switch (CACHE_REPLACEMENT(&ca->sb)) {
	case CACHE_REPLACEMENT_LRU:
@@ -354,17 +322,10 @@ static int bch_allocator_thread(void *arg)
		 * possibly issue discards to them, then we add the bucket to
		 * the free list:
		 */
		while (1) {
		while (!fifo_empty(&ca->free_inc)) {
			long bucket;

			if ((!atomic_read(&ca->set->prio_blocked) ||
			     !CACHE_SYNC(&ca->set->sb)) &&
			    !fifo_empty(&ca->unused))
				fifo_pop(&ca->unused, bucket);
			else if (!fifo_empty(&ca->free_inc))
			fifo_pop(&ca->free_inc, bucket);
			else
				break;

			if (ca->discard) {
				mutex_unlock(&ca->set->bucket_lock);
@@ -385,9 +346,9 @@ static int bch_allocator_thread(void *arg)
		 * them to the free_inc list:
		 */

retry_invalidate:
		allocator_wait(ca, ca->set->gc_mark_valid &&
			       (ca->need_save_prio > 64 ||
				!ca->invalidate_needs_gc));
			       !ca->invalidate_needs_gc);
		invalidate_buckets(ca);

		/*
@@ -395,12 +356,27 @@ static int bch_allocator_thread(void *arg)
		 * new stuff to them:
		 */
		allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
		if (CACHE_SYNC(&ca->set->sb) &&
		    (!fifo_empty(&ca->free_inc) ||
		     ca->need_save_prio > 64))
		if (CACHE_SYNC(&ca->set->sb)) {
			/*
			 * This could deadlock if an allocation with a btree
			 * node locked ever blocked - having the btree node
			 * locked would block garbage collection, but here we're
			 * waiting on garbage collection before we invalidate
			 * and free anything.
			 *
			 * But this should be safe since the btree code always
			 * uses btree_check_reserve() before allocating now, and
			 * if it fails it blocks without btree nodes locked.
			 */
			if (!fifo_full(&ca->free_inc))
				goto retry_invalidate;

			bch_prio_write(ca);
		}
	}
}

/* Allocation */

long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
@@ -447,8 +423,6 @@ out:
				BUG_ON(i == r);
		fifo_for_each(i, &ca->free_inc, iter)
			BUG_ON(i == r);
		fifo_for_each(i, &ca->unused, iter)
			BUG_ON(i == r);
	}

	b = ca->buckets + r;
@@ -470,17 +444,19 @@ out:
	return r;
}

void bch_bucket_free(struct cache_set *c, struct bkey *k)
void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
	unsigned i;

	for (i = 0; i < KEY_PTRS(k); i++) {
		struct bucket *b = PTR_BUCKET(c, k, i);

	SET_GC_MARK(b, 0);
	SET_GC_SECTORS_USED(b, 0);
		bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
}

void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
	unsigned i;

	for (i = 0; i < KEY_PTRS(k); i++)
		__bch_bucket_free(PTR_CACHE(c, k, i),
				  PTR_BUCKET(c, k, i));
}

int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+5 −23
Original line number Diff line number Diff line
@@ -195,7 +195,6 @@ struct bucket {
	atomic_t	pin;
	uint16_t	prio;
	uint8_t		gen;
	uint8_t		disk_gen;
	uint8_t		last_gc; /* Most out of date gen in the btree */
	uint8_t		gc_gen;
	uint16_t	gc_mark; /* Bitfield used by GC. See below for field */
@@ -426,14 +425,9 @@ struct cache {
	 * their new gen to disk. After prio_write() finishes writing the new
	 * gens/prios, they'll be moved to the free list (and possibly discarded
	 * in the process)
	 *
	 * unused: GC found nothing pointing into these buckets (possibly
	 * because all the data they contained was overwritten), so we only
	 * need to discard them before they can be moved to the free list.
	 */
	DECLARE_FIFO(long, free)[RESERVE_NR];
	DECLARE_FIFO(long, free_inc);
	DECLARE_FIFO(long, unused);

	size_t			fifo_last_bucket;

@@ -442,12 +436,6 @@ struct cache {

	DECLARE_HEAP(struct bucket *, heap);

	/*
	 * max(gen - disk_gen) for all buckets. When it gets too big we have to
	 * call prio_write() to keep gens from wrapping.
	 */
	uint8_t			need_save_prio;

	/*
	 * If nonzero, we know we aren't going to find any buckets to invalidate
	 * until a gc finishes - otherwise we could pointlessly burn a ton of
@@ -848,9 +836,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
/*
 * bucket_gc_gen() returns the difference between the bucket's current gen and
 * the oldest gen of any pointer into that bucket in the btree (last_gc).
 *
 * bucket_disk_gen() returns the difference between the current gen and the gen
 * on disk; they're both used to make sure gens don't wrap around.
 */

static inline uint8_t bucket_gc_gen(struct bucket *b)
@@ -858,13 +843,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
	return b->gen - b->last_gc;
}

static inline uint8_t bucket_disk_gen(struct bucket *b)
{
	return b->gen - b->disk_gen;
}

#define BUCKET_GC_GEN_MAX	96U
#define BUCKET_DISK_GEN_MAX	64U

#define kobj_attribute_write(n, fn)					\
	static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
@@ -897,11 +876,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);

uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *);

long bch_bucket_alloc(struct cache *, unsigned, bool);
bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);

void __bch_bucket_free(struct cache *, struct bucket *);
void bch_bucket_free(struct cache_set *, struct bkey *);

long bch_bucket_alloc(struct cache *, unsigned, bool);
int __bch_bucket_alloc_set(struct cache_set *, unsigned,
			   struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned,
+37 −4
Original line number Diff line number Diff line
@@ -1641,7 +1641,7 @@ static void btree_gc_start(struct cache_set *c)
	mutex_unlock(&c->bucket_lock);
}

size_t bch_btree_gc_finish(struct cache_set *c)
static size_t bch_btree_gc_finish(struct cache_set *c)
{
	size_t available = 0;
	struct bucket *b;
@@ -1703,9 +1703,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)

			if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
				available++;

			if (!GC_MARK(b))
				bch_bucket_add_unused(ca, b);
		}
	}

@@ -1836,6 +1833,42 @@ int bch_btree_check(struct cache_set *c)
	return btree_root(check_recurse, c, &op);
}

void bch_initial_gc_finish(struct cache_set *c)
{
	struct cache *ca;
	struct bucket *b;
	unsigned i;

	bch_btree_gc_finish(c);

	mutex_lock(&c->bucket_lock);

	/*
	 * We need to put some unused buckets directly on the prio freelist in
	 * order to get the allocator thread started - it needs freed buckets in
	 * order to rewrite the prios and gens, and it needs to rewrite prios
	 * and gens in order to free buckets.
	 *
	 * This is only safe for buckets that have no live data in them, which
	 * there should always be some of.
	 */
	for_each_cache(ca, c, i) {
		for_each_bucket(b, ca) {
			if (fifo_full(&ca->free[RESERVE_PRIO]))
				break;

			if (bch_can_invalidate_bucket(ca, b) &&
			    !GC_MARK(b)) {
				__bch_invalidate_one_bucket(ca, b);
				fifo_push(&ca->free[RESERVE_PRIO],
					  b - ca->buckets);
			}
		}
	}

	mutex_unlock(&c->bucket_lock);
}

/* Btree insertion */

static bool btree_insert_key(struct btree *b, struct bkey *k,
+1 −1
Original line number Diff line number Diff line
@@ -252,7 +252,7 @@ int bch_btree_insert(struct cache_set *, struct keylist *,
		     atomic_t *, struct bkey *);

int bch_gc_thread_start(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *);
void bch_initial_gc_finish(struct cache_set *);
void bch_moving_gc(struct cache_set *);
int bch_btree_check(struct cache_set *);
void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
+9 −15
Original line number Diff line number Diff line
@@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
	closure_sync(cl);
}

#define buckets_free(c)	"free %zu, free_inc %zu, unused %zu",		\
	fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused)

void bch_prio_write(struct cache *ca)
{
	int i;
@@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca)

	lockdep_assert_held(&ca->set->bucket_lock);

	for (b = ca->buckets;
	     b < ca->buckets + ca->sb.nbuckets; b++)
		b->disk_gen = b->gen;

	ca->disk_buckets->seq++;

	atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
@@ -601,15 +594,18 @@ void bch_prio_write(struct cache *ca)

	mutex_lock(&ca->set->bucket_lock);

	ca->need_save_prio = 0;

	/*
	 * Don't want the old priorities to get garbage collected until after we
	 * finish writing the new ones, and they're journalled
	 */
	for (i = 0; i < prio_buckets(ca); i++)
	for (i = 0; i < prio_buckets(ca); i++) {
		if (ca->prio_last_buckets[i])
			__bch_bucket_free(ca,
				&ca->buckets[ca->prio_last_buckets[i]]);

		ca->prio_last_buckets[i] = ca->prio_buckets[i];
	}
}

static void prio_read(struct cache *ca, uint64_t bucket)
{
@@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
		}

		b->prio = le16_to_cpu(d->prio);
		b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen;
		b->gen = b->last_gc = b->gc_gen = d->gen;
	}
}

@@ -1606,7 +1602,7 @@ static void run_cache_set(struct cache_set *c)
			goto err;

		bch_journal_mark(c, &journal);
		bch_btree_gc_finish(c);
		bch_initial_gc_finish(c);
		pr_debug("btree_check() done");

		/*
@@ -1648,7 +1644,7 @@ static void run_cache_set(struct cache_set *c)
				ca->sb.d[j] = ca->sb.first_bucket + j;
		}

		bch_btree_gc_finish(c);
		bch_initial_gc_finish(c);

		err = "error starting allocator thread";
		for_each_cache(ca, c, i)
@@ -1794,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj)
	vfree(ca->buckets);

	free_heap(&ca->heap);
	free_fifo(&ca->unused);
	free_fifo(&ca->free_inc);

	for (i = 0; i < RESERVE_NR; i++)
@@ -1831,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
	    !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
	    !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
	    !init_fifo(&ca->free_inc,	free << 2, GFP_KERNEL) ||
	    !init_fifo(&ca->unused,	free << 2, GFP_KERNEL) ||
	    !init_heap(&ca->heap,	free << 3, GFP_KERNEL) ||
	    !(ca->buckets	= vzalloc(sizeof(struct bucket) *
					  ca->sb.nbuckets)) ||
Loading