Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9834717 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: BUG to BUG_ON changes
  Btrfs: remove dead code
  Btrfs: remove dead code
  Btrfs: fix typos in comments
  Btrfs: remove unused ftrace include
  Btrfs: fix __ucmpdi2 compile bug on 32 bit builds
  Btrfs: free inode struct when btrfs_new_inode fails
  Btrfs: fix race in worker_loop
  Btrfs: add flushoncommit mount option
  Btrfs: notreelog mount option
  Btrfs: introduce btrfs_show_options
  Btrfs: rework allocation clustering
  Btrfs: Optimize locking in btrfs_next_leaf()
  Btrfs: break up btrfs_search_slot into smaller pieces
  Btrfs: kill the pinned_mutex
  Btrfs: kill the block group alloc mutex
  Btrfs: clean up find_free_extent
  Btrfs: free space cache cleanups
  Btrfs: unplug in the async bio submission threads
  Btrfs: keep processing bios for a given bdev if our proc is batching
parents 5a3ae276 c293498b
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/freezer.h>
#include <linux/ftrace.h>
#include "async-thread.h"

#define WORK_QUEUED_BIT 0
@@ -195,6 +194,9 @@ again_locked:
				if (!list_empty(&worker->pending))
					continue;

				if (kthread_should_stop())
					break;

				/* still no more work?, sleep for real */
				spin_lock_irq(&worker->lock);
				set_current_state(TASK_INTERRUPTIBLE);
@@ -208,6 +210,7 @@ again_locked:
				worker->working = 0;
				spin_unlock_irq(&worker->lock);

				if (!kthread_should_stop())
					schedule();
			}
			__set_current_state(TASK_RUNNING);
+197 −115
Original line number Diff line number Diff line
@@ -1244,7 +1244,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 * readahead one full node of leaves, finding things that are close
 * to the block in 'slot', and triggering ra on them.
 */
static noinline void reada_for_search(struct btrfs_root *root,
static void reada_for_search(struct btrfs_root *root,
			     struct btrfs_path *path,
			     int level, int slot, u64 objectid)
{
@@ -1446,6 +1446,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
	}
}

/*
 * helper function for btrfs_search_slot.  The goal is to find a block
 * in cache without setting the path to blocking.  If we find the block
 * we return zero and the path is unchanged.
 *
 * If we can't find the block, we set the path blocking and do some
 * reada.  -EAGAIN is returned and the search must be repeated.
 */
static int
read_block_for_search(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, struct btrfs_path *p,
		       struct extent_buffer **eb_ret, int level, int slot,
		       struct btrfs_key *key)
{
	u64 blocknr;
	u64 gen;
	u32 blocksize;
	struct extent_buffer *b = *eb_ret;
	struct extent_buffer *tmp;

	blocknr = btrfs_node_blockptr(b, slot);
	gen = btrfs_node_ptr_generation(b, slot);
	blocksize = btrfs_level_size(root, level - 1);

	tmp = btrfs_find_tree_block(root, blocknr, blocksize);
	if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
		*eb_ret = tmp;
		return 0;
	}

	/*
	 * reduce lock contention at high levels
	 * of the btree by dropping locks before
	 * we read.
	 */
	btrfs_release_path(NULL, p);
	if (tmp)
		free_extent_buffer(tmp);
	if (p->reada)
		reada_for_search(root, p, level, slot, key->objectid);

	tmp = read_tree_block(root, blocknr, blocksize, gen);
	if (tmp)
		free_extent_buffer(tmp);
	return -EAGAIN;
}

/*
 * helper function for btrfs_search_slot.  This does all of the checks
 * for node-level blocks and does any balancing required based on
 * the ins_len.
 *
 * If no extra work was required, zero is returned.  If we had to
 * drop the path, -EAGAIN is returned and btrfs_search_slot must
 * start over
 */
static int
setup_nodes_for_search(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, struct btrfs_path *p,
		       struct extent_buffer *b, int level, int ins_len)
{
	int ret;
	if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
	    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
		int sret;

		sret = reada_for_balance(root, p, level);
		if (sret)
			goto again;

		btrfs_set_path_blocking(p);
		sret = split_node(trans, root, p, level);
		btrfs_clear_path_blocking(p, NULL);

		BUG_ON(sret > 0);
		if (sret) {
			ret = sret;
			goto done;
		}
		b = p->nodes[level];
	} else if (ins_len < 0 && btrfs_header_nritems(b) <
		   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
		int sret;

		sret = reada_for_balance(root, p, level);
		if (sret)
			goto again;

		btrfs_set_path_blocking(p);
		sret = balance_level(trans, root, p, level);
		btrfs_clear_path_blocking(p, NULL);

		if (sret) {
			ret = sret;
			goto done;
		}
		b = p->nodes[level];
		if (!b) {
			btrfs_release_path(NULL, p);
			goto again;
		}
		BUG_ON(btrfs_header_nritems(b) == 1);
	}
	return 0;

again:
	ret = -EAGAIN;
done:
	return ret;
}

/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
@@ -1464,16 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
		      ins_len, int cow)
{
	struct extent_buffer *b;
	struct extent_buffer *tmp;
	int slot;
	int ret;
	int level;
	int should_reada = p->reada;
	int lowest_unlock = 1;
	int blocksize;
	u8 lowest_level = 0;
	u64 blocknr;
	u64 gen;

	lowest_level = p->lowest_level;
	WARN_ON(lowest_level && ins_len > 0);
@@ -1502,7 +1608,11 @@ again:
		if (cow) {
			int wret;

			/* is a cow on this block not required */
			/*
			 * if we don't really need to cow this block
			 * then we don't want to set the path blocking,
			 * so we test it here
			 */
			if (btrfs_header_generation(b) == trans->transid &&
			    btrfs_header_owner(b) == root->root_key.objectid &&
			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
@@ -1557,51 +1667,15 @@ cow_done:
			if (ret && slot > 0)
				slot -= 1;
			p->slots[level] = slot;
			if ((p->search_for_split || ins_len > 0) &&
			    btrfs_header_nritems(b) >=
			    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
				int sret;

				sret = reada_for_balance(root, p, level);
				if (sret)
			ret = setup_nodes_for_search(trans, root, p, b, level,
						     ins_len);
			if (ret == -EAGAIN)
				goto again;

				btrfs_set_path_blocking(p);
				sret = split_node(trans, root, p, level);
				btrfs_clear_path_blocking(p, NULL);

				BUG_ON(sret > 0);
				if (sret) {
					ret = sret;
			else if (ret)
				goto done;
				}
			b = p->nodes[level];
			slot = p->slots[level];
			} else if (ins_len < 0 &&
				   btrfs_header_nritems(b) <
				   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
				int sret;

				sret = reada_for_balance(root, p, level);
				if (sret)
					goto again;

				btrfs_set_path_blocking(p);
				sret = balance_level(trans, root, p, level);
				btrfs_clear_path_blocking(p, NULL);

				if (sret) {
					ret = sret;
					goto done;
				}
				b = p->nodes[level];
				if (!b) {
					btrfs_release_path(NULL, p);
					goto again;
				}
				slot = p->slots[level];
				BUG_ON(btrfs_header_nritems(b) == 1);
			}
			unlock_up(p, level, lowest_unlock);

			/* this is only true while dropping a snapshot */
@@ -1610,44 +1684,11 @@ cow_done:
				goto done;
			}

			blocknr = btrfs_node_blockptr(b, slot);
			gen = btrfs_node_ptr_generation(b, slot);
			blocksize = btrfs_level_size(root, level - 1);

			tmp = btrfs_find_tree_block(root, blocknr, blocksize);
			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
				b = tmp;
			} else {
				/*
				 * reduce lock contention at high levels
				 * of the btree by dropping locks before
				 * we read.
				 */
				if (level > 0) {
					btrfs_release_path(NULL, p);
					if (tmp)
						free_extent_buffer(tmp);
					if (should_reada)
						reada_for_search(root, p,
								 level, slot,
								 key->objectid);

					tmp = read_tree_block(root, blocknr,
							 blocksize, gen);
					if (tmp)
						free_extent_buffer(tmp);
			ret = read_block_for_search(trans, root, p,
						    &b, level, slot, key);
			if (ret == -EAGAIN)
				goto again;
				} else {
					btrfs_set_path_blocking(p);
					if (tmp)
						free_extent_buffer(tmp);
					if (should_reada)
						reada_for_search(root, p,
								 level, slot,
								 key->objectid);
					b = read_node_slot(root, b, slot);
				}
			}

			if (!p->skip_locking) {
				int lret;

@@ -2116,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
	BUG_ON(!path->nodes[level]);
	lower = path->nodes[level];
	nritems = btrfs_header_nritems(lower);
	if (slot > nritems)
		BUG();
	BUG_ON(slot > nritems);
	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
		BUG();
	if (slot != nritems) {
@@ -4086,28 +4126,44 @@ next:
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
	int slot;
	int level = 1;
	int level;
	struct extent_buffer *c;
	struct extent_buffer *next = NULL;
	struct extent_buffer *next;
	struct btrfs_key key;
	u32 nritems;
	int ret;
	int old_spinning = path->leave_spinning;
	int force_blocking = 0;

	nritems = btrfs_header_nritems(path->nodes[0]);
	if (nritems == 0)
		return 1;

	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
	/*
	 * we take the blocks in an order that upsets lockdep.  Using
	 * blocking mode is the only way around it.
	 */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	force_blocking = 1;
#endif

	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
again:
	level = 1;
	next = NULL;
	btrfs_release_path(root, path);

	path->keep_locks = 1;

	if (!force_blocking)
		path->leave_spinning = 1;

	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	path->keep_locks = 0;

	if (ret < 0)
		return ret;

	btrfs_set_path_blocking(path);
	nritems = btrfs_header_nritems(path->nodes[0]);
	/*
	 * by releasing the path above we dropped all our locks.  A balance
@@ -4117,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
	 */
	if (nritems > 0 && path->slots[0] < nritems - 1) {
		path->slots[0]++;
		ret = 0;
		goto done;
	}

	while (level < BTRFS_MAX_LEVEL) {
		if (!path->nodes[level])
			return 1;
		if (!path->nodes[level]) {
			ret = 1;
			goto done;
		}

		slot = path->slots[level] + 1;
		c = path->nodes[level];
		if (slot >= btrfs_header_nritems(c)) {
			level++;
			if (level == BTRFS_MAX_LEVEL)
				return 1;
			if (level == BTRFS_MAX_LEVEL) {
				ret = 1;
				goto done;
			}
			continue;
		}

@@ -4138,15 +4199,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
			free_extent_buffer(next);
		}

		/* the path was set to blocking above */
		if (level == 1 && (path->locks[1] || path->skip_locking) &&
		    path->reada)
			reada_for_search(root, path, level, slot, 0);
		next = c;
		ret = read_block_for_search(NULL, root, path, &next, level,
					    slot, &key);
		if (ret == -EAGAIN)
			goto again;

		next = read_node_slot(root, c, slot);
		if (!path->skip_locking) {
			btrfs_assert_tree_locked(c);
			ret = btrfs_try_spin_lock(next);
			if (!ret) {
				btrfs_set_path_blocking(path);
				btrfs_tree_lock(next);
				if (!force_blocking)
					btrfs_clear_path_blocking(path, next);
			}
			if (force_blocking)
				btrfs_set_lock_blocking(next);
		}
		break;
@@ -4157,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
		c = path->nodes[level];
		if (path->locks[level])
			btrfs_tree_unlock(c);

		free_extent_buffer(c);
		path->nodes[level] = next;
		path->slots[level] = 0;
		if (!path->skip_locking)
			path->locks[level] = 1;

		if (!level)
			break;

		btrfs_set_path_blocking(path);
		if (level == 1 && path->locks[1] && path->reada)
			reada_for_search(root, path, level, slot, 0);
		next = read_node_slot(root, next, 0);
		ret = read_block_for_search(NULL, root, path, &next, level,
					    0, &key);
		if (ret == -EAGAIN)
			goto again;

		if (!path->skip_locking) {
			btrfs_assert_tree_locked(path->nodes[level]);
			ret = btrfs_try_spin_lock(next);
			if (!ret) {
				btrfs_set_path_blocking(path);
				btrfs_tree_lock(next);
				if (!force_blocking)
					btrfs_clear_path_blocking(path, next);
			}
			if (force_blocking)
				btrfs_set_lock_blocking(next);
		}
	}
	ret = 0;
done:
	unlock_up(path, 0, 1);
	return 0;
	path->leave_spinning = old_spinning;
	if (!old_spinning)
		btrfs_set_path_blocking(path);

	return ret;
}

/*
+49 −35
Original line number Diff line number Diff line
@@ -143,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_FT_MAX		9

/*
 * the key defines the order in the tree, and so it also defines (optimal)
 * block layout.  objectid corresonds to the inode number.  The flags
 * tells us things about the object, and is a kind of stream selector.
 * so for a given inode, keys with flags of 1 might refer to the inode
 * data, flags of 2 may point to file data in the btree and flags == 3
 * may point to extents.
 * The key defines the order in the tree, and so it also defines (optimal)
 * block layout.
 *
 * objectid corresponds to the inode number.
 *
 * type tells us things about the object, and is a kind of stream selector.
 * so for a given inode, keys with type of 1 might refer to the inode data,
 * type of 2 may point to file data in the btree and type == 3 may point to
 * extents.
 *
 * offset is the starting byte offset for this key in the stream.
 *
@@ -200,7 +203,7 @@ struct btrfs_dev_item {

	/*
	 * starting byte of this partition on the device,
	 * to allowr for stripe alignment in the future
	 * to allow for stripe alignment in the future
	 */
	__le64 start_offset;

@@ -633,18 +636,35 @@ struct btrfs_space_info {
	struct rw_semaphore groups_sem;
};

struct btrfs_free_space {
	struct rb_node bytes_index;
	struct rb_node offset_index;
	u64 offset;
	u64 bytes;
/*
 * free clusters are used to claim free space in relatively large chunks,
 * allowing us to do less seeky writes.  They are used for all metadata
 * allocations and data allocations in ssd mode.
 */
struct btrfs_free_cluster {
	spinlock_t lock;
	spinlock_t refill_lock;
	struct rb_root root;

	/* largest extent in this cluster */
	u64 max_size;

	/* first extent starting offset */
	u64 window_start;

	struct btrfs_block_group_cache *block_group;
	/*
	 * when a cluster is allocated from a block group, we put the
	 * cluster onto a list in the block group so that it can
	 * be freed before the block group is freed.
	 */
	struct list_head block_group_list;
};

struct btrfs_block_group_cache {
	struct btrfs_key key;
	struct btrfs_block_group_item item;
	spinlock_t lock;
	struct mutex alloc_mutex;
	struct mutex cache_mutex;
	u64 pinned;
	u64 reserved;
@@ -656,6 +676,7 @@ struct btrfs_block_group_cache {
	struct btrfs_space_info *space_info;

	/* free space cache stuff */
	spinlock_t tree_lock;
	struct rb_root free_space_bytes;
	struct rb_root free_space_offset;

@@ -667,6 +688,11 @@ struct btrfs_block_group_cache {

	/* usage count */
	atomic_t count;

	/* List of struct btrfs_free_clusters for this block group.
	 * Today it will only have one thing on it, but that may change
	 */
	struct list_head cluster_list;
};

struct btrfs_leaf_ref_tree {
@@ -728,7 +754,6 @@ struct btrfs_fs_info {
	struct mutex tree_log_mutex;
	struct mutex transaction_kthread_mutex;
	struct mutex cleaner_mutex;
	struct mutex pinned_mutex;
	struct mutex chunk_mutex;
	struct mutex drop_mutex;
	struct mutex volume_mutex;
@@ -839,8 +864,12 @@ struct btrfs_fs_info {
	spinlock_t delalloc_lock;
	spinlock_t new_trans_lock;
	u64 delalloc_bytes;
	u64 last_alloc;
	u64 last_data_alloc;

	/* data_alloc_cluster is only used in ssd mode */
	struct btrfs_free_cluster data_alloc_cluster;

	/* all metadata allocations go through this cluster */
	struct btrfs_free_cluster meta_alloc_cluster;

	spinlock_t ref_cache_lock;
	u64 total_ref_cache_size;
@@ -932,7 +961,6 @@ struct btrfs_root {
};

/*

 * inode items have the data typically returned from stat and store other
 * info about object characteristics.  There is one for every file and dir in
 * the FS
@@ -963,7 +991,7 @@ struct btrfs_root {
#define BTRFS_EXTENT_CSUM_KEY	128

/*
 * root items point to tree roots.  There are typically in the root
 * root items point to tree roots.  They are typically in the root
 * tree used by the super block to find all the other trees
 */
#define BTRFS_ROOT_ITEM_KEY	132
@@ -1010,6 +1038,8 @@ struct btrfs_root {
#define BTRFS_MOUNT_SSD			(1 << 3)
#define BTRFS_MOUNT_DEGRADED		(1 << 4)
#define BTRFS_MOUNT_COMPRESS		(1 << 5)
#define BTRFS_MOUNT_NOTREELOG           (1 << 6)
#define BTRFS_MOUNT_FLUSHONCOMMIT       (1 << 7)

#define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
@@ -1748,6 +1778,7 @@ static inline struct dentry *fdentry(struct file *file)
}

/* extent-tree.c */
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
@@ -2174,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask);
int btrfs_init_acl(struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode);

/* free-space-cache.c */
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
			 u64 bytenr, u64 size);
int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
			      u64 offset, u64 bytes);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
			    u64 bytenr, u64 size);
int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
				 u64 offset, u64 bytes);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
				   *block_group);
struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
					       *block_group, u64 offset,
					       u64 bytes);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
			   u64 bytes);
u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
#endif
+0 −1
Original line number Diff line number Diff line
@@ -18,7 +18,6 @@

#include <linux/sched.h>
#include <linux/sort.h>
#include <linux/ftrace.h>
#include "ctree.h"
#include "delayed-ref.h"
#include "transaction.h"
+5 −3
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include "locking.h"
#include "ref-cache.h"
#include "tree-log.h"
#include "free-space-cache.h"

static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
@@ -1412,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio)

	ret = extent_range_uptodate(io_tree, start + length,
				    start + buf_len - 1);
	if (ret == 1)
		return ret;
	return ret;
}

@@ -1647,12 +1646,15 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	mutex_init(&fs_info->ordered_operations_mutex);
	mutex_init(&fs_info->tree_log_mutex);
	mutex_init(&fs_info->drop_mutex);
	mutex_init(&fs_info->pinned_mutex);
	mutex_init(&fs_info->chunk_mutex);
	mutex_init(&fs_info->transaction_kthread_mutex);
	mutex_init(&fs_info->cleaner_mutex);
	mutex_init(&fs_info->volume_mutex);
	mutex_init(&fs_info->tree_reloc_mutex);

	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);

	init_waitqueue_head(&fs_info->transaction_throttle);
	init_waitqueue_head(&fs_info->transaction_wait);
	init_waitqueue_head(&fs_info->async_submit_wait);
Loading