Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e84987a1 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'bcache-for-3.15' of git://evilpiepirate.org/~kent/linux-bcache into for-3.15/drivers

Kent writes:

Jens, here's the bcache changes for 3.15. Lots of bugfixes, and some
refactoring and cleanups.
parents 5eb9291c cb851149
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG
	Keeps all active closures in a linked list and provides a debugfs
	interface to list them, which makes it possible to see asynchronous
	operations that get stuck.

# cgroup code needs to be updated:
#
#config CGROUP_BCACHE
#	bool "Cgroup controls for bcache"
#	depends on BCACHE && BLK_CGROUP
#	---help---
#	TODO
+68 −105
Original line number Diff line number Diff line
@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
	ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
	WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);

	if (CACHE_SYNC(&ca->set->sb)) {
		ca->need_save_prio = max(ca->need_save_prio,
					 bucket_disk_gen(b));
		WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
	}

	return ret;
}

@@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
	mutex_unlock(&c->bucket_lock);
}

/* Allocation */
/*
 * Background allocation thread: scans for buckets to be invalidated,
 * invalidates them, rewrites prios/gens (marking them as invalidated on disk),
 * then optionally issues discard commands to the newly free buckets, then puts
 * them on the various freelists.
 */

static inline bool can_inc_bucket_gen(struct bucket *b)
{
	return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
		bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
	return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
}

bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
	BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));

	if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
		unsigned i;

		for (i = 0; i < RESERVE_NONE; i++)
			if (!fifo_full(&ca->free[i]))
				goto add;

		return false;
	}
add:
	b->prio = 0;

	if (can_inc_bucket_gen(b) &&
	    fifo_push(&ca->unused, b - ca->buckets)) {
		atomic_inc(&b->pin);
		return true;
	}
	BUG_ON(!ca->set->gc_mark_valid);

	return false;
}

static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
	return GC_MARK(b) == GC_MARK_RECLAIMABLE &&
	return (!GC_MARK(b) ||
		GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
		!atomic_read(&b->pin) &&
		can_inc_bucket_gen(b);
}

static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
	lockdep_assert_held(&ca->set->bucket_lock);
	BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);

	if (GC_SECTORS_USED(b))
		trace_bcache_invalidate(ca, b - ca->buckets);

	bch_inc_gen(ca, b);
	b->prio = INITIAL_PRIO;
	atomic_inc(&b->pin);
}

static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
	__bch_invalidate_one_bucket(ca, b);

	fifo_push(&ca->free_inc, b - ca->buckets);
}

@@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
	ca->heap.used = 0;

	for_each_bucket(b, ca) {
		/*
		 * If we fill up the unused list, if we then return before
		 * adding anything to the free_inc list we'll skip writing
		 * prios/gens and just go back to allocating from the unused
		 * list:
		 */
		if (fifo_full(&ca->unused))
			return;

		if (!can_invalidate_bucket(ca, b))
			continue;

		if (!GC_SECTORS_USED(b) &&
		    bch_bucket_add_unused(ca, b))
		if (!bch_can_invalidate_bucket(ca, b))
			continue;

		if (!heap_full(&ca->heap))
@@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
			return;
		}

		invalidate_one_bucket(ca, b);
		bch_invalidate_one_bucket(ca, b);
	}
}

@@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)

		b = ca->buckets + ca->fifo_last_bucket++;

		if (can_invalidate_bucket(ca, b))
			invalidate_one_bucket(ca, b);
		if (bch_can_invalidate_bucket(ca, b))
			bch_invalidate_one_bucket(ca, b);

		if (++checked >= ca->sb.nbuckets) {
			ca->invalidate_needs_gc = 1;
@@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)

		b = ca->buckets + n;

		if (can_invalidate_bucket(ca, b))
			invalidate_one_bucket(ca, b);
		if (bch_can_invalidate_bucket(ca, b))
			bch_invalidate_one_bucket(ca, b);

		if (++checked >= ca->sb.nbuckets / 2) {
			ca->invalidate_needs_gc = 1;
@@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)

static void invalidate_buckets(struct cache *ca)
{
	if (ca->invalidate_needs_gc)
		return;
	BUG_ON(ca->invalidate_needs_gc);

	switch (CACHE_REPLACEMENT(&ca->sb)) {
	case CACHE_REPLACEMENT_LRU:
@@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca)
		invalidate_buckets_random(ca);
		break;
	}

	trace_bcache_alloc_invalidate(ca);
}

#define allocator_wait(ca, cond)					\
@@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg)
		 * possibly issue discards to them, then we add the bucket to
		 * the free list:
		 */
		while (1) {
		while (!fifo_empty(&ca->free_inc)) {
			long bucket;

			if ((!atomic_read(&ca->set->prio_blocked) ||
			     !CACHE_SYNC(&ca->set->sb)) &&
			    !fifo_empty(&ca->unused))
				fifo_pop(&ca->unused, bucket);
			else if (!fifo_empty(&ca->free_inc))
			fifo_pop(&ca->free_inc, bucket);
			else
				break;

			if (ca->discard) {
				mutex_unlock(&ca->set->bucket_lock);
@@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg)
			}

			allocator_wait(ca, bch_allocator_push(ca, bucket));
			wake_up(&ca->set->btree_cache_wait);
			wake_up(&ca->set->bucket_wait);
		}

@@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg)
		 * them to the free_inc list:
		 */

retry_invalidate:
		allocator_wait(ca, ca->set->gc_mark_valid &&
			       (ca->need_save_prio > 64 ||
				!ca->invalidate_needs_gc));
			       !ca->invalidate_needs_gc);
		invalidate_buckets(ca);

		/*
@@ -390,12 +356,27 @@ static int bch_allocator_thread(void *arg)
		 * new stuff to them:
		 */
		allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
		if (CACHE_SYNC(&ca->set->sb) &&
		    (!fifo_empty(&ca->free_inc) ||
		     ca->need_save_prio > 64))
		if (CACHE_SYNC(&ca->set->sb)) {
			/*
			 * This could deadlock if an allocation with a btree
			 * node locked ever blocked - having the btree node
			 * locked would block garbage collection, but here we're
			 * waiting on garbage collection before we invalidate
			 * and free anything.
			 *
			 * But this should be safe since the btree code always
			 * uses btree_check_reserve() before allocating now, and
			 * if it fails it blocks without btree nodes locked.
			 */
			if (!fifo_full(&ca->free_inc))
				goto retry_invalidate;

			bch_prio_write(ca);
		}
	}
}

/* Allocation */

long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
@@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
	    fifo_pop(&ca->free[reserve], r))
		goto out;

	if (!wait)
	if (!wait) {
		trace_bcache_alloc_fail(ca, reserve);
		return -1;
	}

	do {
		prepare_to_wait(&ca->set->bucket_wait, &w,
@@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
out:
	wake_up_process(ca->alloc_thread);

	trace_bcache_alloc(ca, reserve);

	if (expensive_debug_checks(ca->set)) {
		size_t iter;
		long i;
@@ -438,8 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
				BUG_ON(i == r);
		fifo_for_each(i, &ca->free_inc, iter)
			BUG_ON(i == r);
		fifo_for_each(i, &ca->unused, iter)
			BUG_ON(i == r);
	}

	b = ca->buckets + r;
@@ -461,17 +444,19 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
	return r;
}

void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
	SET_GC_MARK(b, 0);
	SET_GC_SECTORS_USED(b, 0);
}

void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
	unsigned i;

	for (i = 0; i < KEY_PTRS(k); i++) {
		struct bucket *b = PTR_BUCKET(c, k, i);

		SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
		SET_GC_SECTORS_USED(b, 0);
		bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
	}
	for (i = 0; i < KEY_PTRS(k); i++)
		__bch_bucket_free(PTR_CACHE(c, k, i),
				  PTR_BUCKET(c, k, i));
}

int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
@@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca)
	ca->alloc_thread = k;
	return 0;
}

int bch_cache_allocator_init(struct cache *ca)
{
	/*
	 * Reserve:
	 * Prio/gen writes first
	 * Then 8 for btree allocations
	 * Then half for the moving garbage collector
	 */
#if 0
	ca->watermark[WATERMARK_PRIO] = 0;

	ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);

	ca->watermark[WATERMARK_MOVINGGC] = 8 +
		ca->watermark[WATERMARK_METADATA];

	ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
		ca->watermark[WATERMARK_MOVINGGC];
#endif
	return 0;
}
+16 −40
Original line number Diff line number Diff line
@@ -195,9 +195,7 @@ struct bucket {
	atomic_t	pin;
	uint16_t	prio;
	uint8_t		gen;
	uint8_t		disk_gen;
	uint8_t		last_gc; /* Most out of date gen in the btree */
	uint8_t		gc_gen;
	uint16_t	gc_mark; /* Bitfield used by GC. See below for field */
};

@@ -207,9 +205,9 @@ struct bucket {
 */

BITMASK(GC_MARK,	 struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE	0
#define GC_MARK_DIRTY		1
#define GC_MARK_METADATA	2
#define GC_MARK_RECLAIMABLE	1
#define GC_MARK_DIRTY		2
#define GC_MARK_METADATA	3
#define GC_SECTORS_USED_SIZE	13
#define MAX_GC_SECTORS_USED	(~(~0ULL << GC_SECTORS_USED_SIZE))
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
@@ -426,14 +424,9 @@ struct cache {
	 * their new gen to disk. After prio_write() finishes writing the new
	 * gens/prios, they'll be moved to the free list (and possibly discarded
	 * in the process)
	 *
	 * unused: GC found nothing pointing into these buckets (possibly
	 * because all the data they contained was overwritten), so we only
	 * need to discard them before they can be moved to the free list.
	 */
	DECLARE_FIFO(long, free)[RESERVE_NR];
	DECLARE_FIFO(long, free_inc);
	DECLARE_FIFO(long, unused);

	size_t			fifo_last_bucket;

@@ -442,12 +435,6 @@ struct cache {

	DECLARE_HEAP(struct bucket *, heap);

	/*
	 * max(gen - disk_gen) for all buckets. When it gets too big we have to
	 * call prio_write() to keep gens from wrapping.
	 */
	uint8_t			need_save_prio;

	/*
	 * If nonzero, we know we aren't going to find any buckets to invalidate
	 * until a gc finishes - otherwise we could pointlessly burn a ton of
@@ -562,19 +549,16 @@ struct cache_set {
	struct list_head	btree_cache_freed;

	/* Number of elements in btree_cache + btree_cache_freeable lists */
	unsigned		bucket_cache_used;
	unsigned		btree_cache_used;

	/*
	 * If we need to allocate memory for a new btree node and that
	 * allocation fails, we can cannibalize another node in the btree cache
	 * to satisfy the allocation. However, only one thread can be doing this
	 * at a time, for obvious reasons - try_harder and try_wait are
	 * basically a lock for this that we can wait on asynchronously. The
	 * btree_root() macro releases the lock when it returns.
	 * to satisfy the allocation - lock to guarantee only one thread does
	 * this at a time:
	 */
	struct task_struct	*try_harder;
	wait_queue_head_t	try_wait;
	uint64_t		try_harder_start;
	wait_queue_head_t	btree_cache_wait;
	struct task_struct	*btree_cache_alloc_lock;

	/*
	 * When we free a btree node, we increment the gen of the bucket the
@@ -603,7 +587,7 @@ struct cache_set {
	uint16_t		min_prio;

	/*
	 * max(gen - gc_gen) for all buckets. When it gets too big we have to gc
	 * max(gen - last_gc) for all buckets. When it gets too big we have to gc
	 * to keep gens from wrapping around.
	 */
	uint8_t			need_gc;
@@ -628,6 +612,8 @@ struct cache_set {
	/* Number of moving GC bios in flight */
	struct semaphore	moving_in_flight;

	struct workqueue_struct	*moving_gc_wq;

	struct btree		*root;

#ifdef CONFIG_BCACHE_DEBUG
@@ -667,7 +653,6 @@ struct cache_set {
	struct time_stats	btree_gc_time;
	struct time_stats	btree_split_time;
	struct time_stats	btree_read_time;
	struct time_stats	try_harder_time;

	atomic_long_t		cache_read_races;
	atomic_long_t		writeback_keys_done;
@@ -850,9 +835,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
/*
 * bucket_gc_gen() returns the difference between the bucket's current gen and
 * the oldest gen of any pointer into that bucket in the btree (last_gc).
 *
 * bucket_disk_gen() returns the difference between the current gen and the gen
 * on disk; they're both used to make sure gens don't wrap around.
 */

static inline uint8_t bucket_gc_gen(struct bucket *b)
@@ -860,13 +842,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
	return b->gen - b->last_gc;
}

static inline uint8_t bucket_disk_gen(struct bucket *b)
{
	return b->gen - b->disk_gen;
}

#define BUCKET_GC_GEN_MAX	96U
#define BUCKET_DISK_GEN_MAX	64U

#define kobj_attribute_write(n, fn)					\
	static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
@@ -899,11 +875,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);

uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *);

long bch_bucket_alloc(struct cache *, unsigned, bool);
bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);

void __bch_bucket_free(struct cache *, struct bucket *);
void bch_bucket_free(struct cache_set *, struct bkey *);

long bch_bucket_alloc(struct cache *, unsigned, bool);
int __bch_bucket_alloc_set(struct cache_set *, unsigned,
			   struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned,
@@ -954,13 +933,10 @@ int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *);

int bch_cache_allocator_start(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca);

void bch_debug_exit(void);
int bch_debug_init(struct kobject *);
void bch_request_exit(void);
int bch_request_init(void);
void bch_btree_exit(void);
int bch_btree_init(void);

#endif /* _BCACHE_H */
+2 −2
Original line number Diff line number Diff line
@@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
	for (k = i->start; k < bset_bkey_last(i); k = next) {
		next = bkey_next(k);

		printk(KERN_ERR "block %u key %li/%u: ", set,
		       (uint64_t *) k - i->d, i->keys);
		printk(KERN_ERR "block %u key %u/%u: ", set,
		       (unsigned) ((u64 *) k - i->d), i->keys);

		if (b->ops->key_dump)
			b->ops->key_dump(b, k);
+6 −0
Original line number Diff line number Diff line
@@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l)
	l->top_p = l->keys_p = l->inline_keys;
}

static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k)
{
	l->keys = k;
	l->top = bkey_next(k);
}

static inline void bch_keylist_push(struct keylist *l)
{
	l->top = bkey_next(l->top);
Loading