Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53c56662 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs changes from Chris Mason:
 "This is a pretty long stream of bug fixes and performance fixes.

  Qu Wenruo has replaced the btrfs async threads with regular kernel
  workqueues.  We'll keep an eye out for performance differences, but
  it's nice to be using more generic code for this.

  We still have some corruption fixes and other patches coming in for
  the merge window, but this batch is tested and ready to go"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (108 commits)
  Btrfs: fix a crash of clone with inline extents's split
  btrfs: fix uninit variable warning
  Btrfs: take into account total references when doing backref lookup
  Btrfs: part 2, fix incremental send's decision to delay a dir move/rename
  Btrfs: fix incremental send's decision to delay a dir move/rename
  Btrfs: remove unnecessary inode generation lookup in send
  Btrfs: fix race when updating existing ref head
  btrfs: Add trace for btrfs_workqueue alloc/destroy
  Btrfs: less fs tree lock contention when using autodefrag
  Btrfs: return EPERM when deleting a default subvolume
  Btrfs: add missing kfree in btrfs_destroy_workqueue
  Btrfs: cache extent states in defrag code path
  Btrfs: fix deadlock with nested trans handles
  Btrfs: fix possible empty list access when flushing the delalloc inodes
  Btrfs: split the global ordered extents mutex
  Btrfs: don't flush all delalloc inodes when we doesn't get s_umount lock
  Btrfs: reclaim delalloc metadata more aggressively
  Btrfs: remove unnecessary lock in may_commit_transaction()
  Btrfs: remove the unnecessary flush when preparing the pages
  Btrfs: just do dirty page flush for the inode with compression before direct IO
  ...
parents 34917f97 00fdf13a
Loading
Loading
Loading
Loading
+227 −621

File changed.

Preview size limit exceeded, changes collapsed.

+27 −94
Original line number Diff line number Diff line
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 * Copyright (C) 2014 Fujitsu.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
@@ -19,103 +20,35 @@
#ifndef __BTRFS_ASYNC_THREAD_
#define __BTRFS_ASYNC_THREAD_

struct btrfs_worker_thread;
struct btrfs_workqueue;
/* Internal use only */
struct __btrfs_workqueue;
struct btrfs_work;
typedef void (*btrfs_func_t)(struct btrfs_work *arg);

/*
 * This is similar to a workqueue, but it is meant to spread the operations
 * across all available cpus instead of just the CPU that was used to
 * queue the work.  There is also some batching introduced to try and
 * cut down on context switches.
 *
 * By default threads are added on demand up to 2 * the number of cpus.
 * Changing struct btrfs_workers->max_workers is one way to prevent
 * demand creation of kthreads.
 *
 * the basic model of these worker threads is to embed a btrfs_work
 * structure in your own data struct, and use container_of in a
 * work function to get back to your data struct.
 */
struct btrfs_work {
	/*
	 * func should be set to the function you want called
	 * your work struct is passed as the only arg
	 *
	 * ordered_func must be set for work sent to an ordered work queue,
	 * and it is called to complete a given work item in the same
	 * order they were sent to the queue.
	 */
	void (*func)(struct btrfs_work *work);
	void (*ordered_func)(struct btrfs_work *work);
	void (*ordered_free)(struct btrfs_work *work);

	/*
	 * flags should be set to zero.  It is used to make sure the
	 * struct is only inserted once into the list.
	 */
	btrfs_func_t func;
	btrfs_func_t ordered_func;
	btrfs_func_t ordered_free;

	/* Don't touch things below */
	struct work_struct normal_work;
	struct list_head ordered_list;
	struct __btrfs_workqueue *wq;
	unsigned long flags;

	/* don't touch these */
	struct btrfs_worker_thread *worker;
	struct list_head list;
	struct list_head order_list;
};

struct btrfs_workers {
	/* current number of running workers */
	int num_workers;

	int num_workers_starting;

	/* max number of workers allowed.  changed by btrfs_start_workers */
	int max_workers;

	/* once a worker has this many requests or fewer, it is idle */
	int idle_thresh;

	/* force completions in the order they were queued */
	int ordered;

	/* more workers required, but in an interrupt handler */
	int atomic_start_pending;

	/*
	 * are we allowed to sleep while starting workers or are we required
	 * to start them at a later time?  If we can't sleep, this indicates
	 * which queue we need to use to schedule thread creation.
	 */
	struct btrfs_workers *atomic_worker_start;

	/* list with all the work threads.  The workers on the idle thread
	 * may be actively servicing jobs, but they haven't yet hit the
	 * idle thresh limit above.
	 */
	struct list_head worker_list;
	struct list_head idle_list;

	/*
	 * when operating in ordered mode, this maintains the list
	 * of work items waiting for completion
	 */
	struct list_head order_list;
	struct list_head prio_order_list;

	/* lock for finding the next worker thread to queue on */
	spinlock_t lock;

	/* lock for the ordered lists */
	spinlock_t order_lock;

	/* extra name for this worker, used for current->name */
	char *name;

	int stopping;
};

void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
int btrfs_start_workers(struct btrfs_workers *workers);
void btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
			struct btrfs_workers *async_starter);
void btrfs_requeue_work(struct btrfs_work *work);
void btrfs_set_work_high_prio(struct btrfs_work *work);
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
					      int flags,
					      int max_active,
					      int thresh);
void btrfs_init_work(struct btrfs_work *work,
		     btrfs_func_t func,
		     btrfs_func_t ordered_func,
		     btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
		      struct btrfs_work *work);
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
#endif
+30 −54
Original line number Diff line number Diff line
@@ -220,7 +220,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,

static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
			   struct ulist *parents, struct __prelim_ref *ref,
			   int level, u64 time_seq, const u64 *extent_item_pos)
			   int level, u64 time_seq, const u64 *extent_item_pos,
			   u64 total_refs)
{
	int ret = 0;
	int slot;
@@ -249,7 +250,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
		ret = btrfs_next_old_leaf(root, path, time_seq);

	while (!ret && count < ref->count) {
	while (!ret && count < total_refs) {
		eb = path->nodes[0];
		slot = path->slots[0];

@@ -306,7 +307,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
				  struct btrfs_path *path, u64 time_seq,
				  struct __prelim_ref *ref,
				  struct ulist *parents,
				  const u64 *extent_item_pos)
				  const u64 *extent_item_pos, u64 total_refs)
{
	struct btrfs_root *root;
	struct btrfs_key root_key;
@@ -361,7 +362,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
	}

	ret = add_all_parents(root, path, parents, ref, level, time_seq,
			      extent_item_pos);
			      extent_item_pos, total_refs);
out:
	path->lowest_level = 0;
	btrfs_release_path(path);
@@ -374,7 +375,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
				   struct btrfs_path *path, u64 time_seq,
				   struct list_head *head,
				   const u64 *extent_item_pos)
				   const u64 *extent_item_pos, u64 total_refs)
{
	int err;
	int ret = 0;
@@ -400,7 +401,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
		if (ref->count == 0)
			continue;
		err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
					     parents, extent_item_pos);
					     parents, extent_item_pos,
					     total_refs);
		/*
		 * we can only tolerate ENOENT,otherwise,we should catch error
		 * and return directly.
@@ -557,7 +559,7 @@ static void __merge_refs(struct list_head *head, int mode)
 * smaller or equal that seq to the list
 */
static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			      struct list_head *prefs)
			      struct list_head *prefs, u64 *total_refs)
{
	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
	struct rb_node *n = &head->node.rb_node;
@@ -593,6 +595,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
		default:
			BUG_ON(1);
		}
		*total_refs += (node->ref_mod * sgn);
		switch (node->type) {
		case BTRFS_TREE_BLOCK_REF_KEY: {
			struct btrfs_delayed_tree_ref *ref;
@@ -653,7 +656,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
 */
static int __add_inline_refs(struct btrfs_fs_info *fs_info,
			     struct btrfs_path *path, u64 bytenr,
			     int *info_level, struct list_head *prefs)
			     int *info_level, struct list_head *prefs,
			     u64 *total_refs)
{
	int ret = 0;
	int slot;
@@ -677,6 +681,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,

	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
	flags = btrfs_extent_flags(leaf, ei);
	*total_refs += btrfs_extent_refs(leaf, ei);
	btrfs_item_key_to_cpu(leaf, &found_key, slot);

	ptr = (unsigned long)(ei + 1);
@@ -859,6 +864,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
	struct list_head prefs;
	struct __prelim_ref *ref;
	struct extent_inode_elem *eie = NULL;
	u64 total_refs = 0;

	INIT_LIST_HEAD(&prefs);
	INIT_LIST_HEAD(&prefs_delayed);
@@ -873,8 +879,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	if (!trans)
	if (!trans) {
		path->search_commit_root = 1;
		path->skip_locking = 1;
	}

	/*
	 * grab both a lock on the path and a lock on the delayed ref head.
@@ -915,7 +923,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
			}
			spin_unlock(&delayed_refs->lock);
			ret = __add_delayed_refs(head, time_seq,
						 &prefs_delayed);
						 &prefs_delayed, &total_refs);
			mutex_unlock(&head->mutex);
			if (ret)
				goto out;
@@ -936,7 +944,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
		    (key.type == BTRFS_EXTENT_ITEM_KEY ||
		     key.type == BTRFS_METADATA_ITEM_KEY)) {
			ret = __add_inline_refs(fs_info, path, bytenr,
						&info_level, &prefs);
						&info_level, &prefs,
						&total_refs);
			if (ret)
				goto out;
			ret = __add_keyed_refs(fs_info, path, bytenr,
@@ -956,7 +965,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
	__merge_refs(&prefs, 1);

	ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
				      extent_item_pos);
				      extent_item_pos, total_refs);
	if (ret)
		goto out;

@@ -965,7 +974,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
	while (!list_empty(&prefs)) {
		ref = list_first_entry(&prefs, struct __prelim_ref, list);
		WARN_ON(ref->count < 0);
		if (ref->count && ref->root_id && ref->parent == 0) {
		if (roots && ref->count && ref->root_id && ref->parent == 0) {
			/* no parent == root of tree */
			ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
			if (ret < 0)
@@ -1061,22 +1070,14 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
				u64 time_seq, struct ulist **leafs,
				const u64 *extent_item_pos)
{
	struct ulist *tmp;
	int ret;

	tmp = ulist_alloc(GFP_NOFS);
	if (!tmp)
		return -ENOMEM;
	*leafs = ulist_alloc(GFP_NOFS);
	if (!*leafs) {
		ulist_free(tmp);
	if (!*leafs)
		return -ENOMEM;
	}

	ret = find_parent_nodes(trans, fs_info, bytenr,
				time_seq, *leafs, tmp, extent_item_pos);
	ulist_free(tmp);

				time_seq, *leafs, NULL, extent_item_pos);
	if (ret < 0 && ret != -ENOENT) {
		free_leaf_list(*leafs);
		return ret;
@@ -1333,38 +1334,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
	if (ret < 0)
		return ret;

	while (1) {
		u32 nritems;
		if (path->slots[0] == 0) {
			btrfs_set_path_blocking(path);
			ret = btrfs_prev_leaf(fs_info->extent_root, path);
			if (ret != 0) {
				if (ret > 0) {
					pr_debug("logical %llu is not within "
						 "any extent\n", logical);
	ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0);
	if (ret) {
		if (ret > 0)
			ret = -ENOENT;
				}
		return ret;
	}
		} else {
			path->slots[0]--;
		}
		nritems = btrfs_header_nritems(path->nodes[0]);
		if (nritems == 0) {
			pr_debug("logical %llu is not within any extent\n",
				 logical);
			return -ENOENT;
		}
		if (path->slots[0] == nritems)
			path->slots[0]--;

		btrfs_item_key_to_cpu(path->nodes[0], found_key,
				      path->slots[0]);
		if (found_key->type == BTRFS_EXTENT_ITEM_KEY ||
		    found_key->type == BTRFS_METADATA_ITEM_KEY)
			break;
	}

	btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
	if (found_key->type == BTRFS_METADATA_ITEM_KEY)
		size = fs_info->extent_root->leafsize;
	else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
+7 −7
Original line number Diff line number Diff line
@@ -109,14 +109,17 @@ struct btrfs_inode {
	u64 last_trans;

	/*
	 * log transid when this inode was last modified
	 * transid that last logged this inode
	 */
	u64 last_sub_trans;
	u64 logged_trans;

	/*
	 * transid that last logged this inode
	 * log transid when this inode was last modified
	 */
	u64 logged_trans;
	int last_sub_trans;

	/* a local copy of root's last_log_commit */
	int last_log_commit;

	/* total number of bytes pending delalloc, used by stat to calc the
	 * real block usage of the file
@@ -155,9 +158,6 @@ struct btrfs_inode {
	/* flags field from the on disk inode */
	u32 flags;

	/* a local copy of root's last_log_commit */
	unsigned long last_log_commit;

	/*
	 * Counters to keep track of the number of extent item's we may use due
	 * to delalloc and such.  outstanding_extents is the number of extent
+10 −1
Original line number Diff line number Diff line
@@ -5376,6 +5376,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
	int advance_right;
	u64 left_blockptr;
	u64 right_blockptr;
	u64 left_gen;
	u64 right_gen;
	u64 left_start_ctransid;
	u64 right_start_ctransid;
	u64 ctransid;
@@ -5640,7 +5642,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
				right_blockptr = btrfs_node_blockptr(
						right_path->nodes[right_level],
						right_path->slots[right_level]);
				if (left_blockptr == right_blockptr) {
				left_gen = btrfs_node_ptr_generation(
						left_path->nodes[left_level],
						left_path->slots[left_level]);
				right_gen = btrfs_node_ptr_generation(
						right_path->nodes[right_level],
						right_path->slots[right_level]);
				if (left_blockptr == right_blockptr &&
				    left_gen == right_gen) {
					/*
					 * As we're on a shared block, don't
					 * allow to go deeper.
Loading