Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b7c09ad4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from Chris Mason:
 "This is against 3.11-rc7, but was pulled and tested against your tree
  as of yesterday.  We do have two small incrementals queued up, but I
  wanted to get this bunch out the door before I hop on an airplane.

  This is a fairly large batch of fixes, performance improvements, and
  cleanups from the usual Btrfs suspects.

  We've included Stefan Behren's work to index subvolume UUIDs, which is
  targeted at speeding up send/receive with many subvolumes or snapshots
  in place.  It closes a long standing performance issue that was built
  in to the disk format.

  Mark Fasheh's offline dedup work is also here.  In this case offline
  means the FS is mounted and active, but the dedup work is not done
  inline during file IO.  This is a building block where utilities are
  able to ask the FS to dedup a series of extents.  The kernel takes
  care of verifying the data involved really is the same.  Today this
  involves reading both extents, but we'll continue to evolve the
  patches"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits)
  Btrfs: optimize key searches in btrfs_search_slot
  Btrfs: don't use an async starter for most of our workers
  Btrfs: only update disk_i_size as we remove extents
  Btrfs: fix deadlock in uuid scan kthread
  Btrfs: stop refusing the relocation of chunk 0
  Btrfs: fix memory leak of uuid_root in free_fs_info
  btrfs: reuse kbasename helper
  btrfs: return btrfs error code for dev excl ops err
  Btrfs: allow partial ordered extent completion
  Btrfs: convert all bug_ons in free-space-cache.c
  Btrfs: add support for asserts
  Btrfs: adjust the fs_devices->missing count on unmount
  Btrf: cleanup: don't check for root_refs == 0 twice
  Btrfs: fix for patch "cleanup: don't check the same thing twice"
  Btrfs: get rid of one BUG() in write_all_supers()
  Btrfs: allocate prelim_ref with a slab allocater
  Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC
  Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl
  Btrfs: fix race between removing a dev and writing sbs
  Btrfs: remove ourselves from the cluster list under lock
  ...
parents 18129977 d7396f07
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -72,3 +72,12 @@ config BTRFS_DEBUG
	  performance, or export extra information via sysfs.

	  If unsure, say N.

config BTRFS_ASSERT
	bool "Btrfs assert support"
	depends on BTRFS_FS
	help
	  Enable run-time assertion checking.  This will result in panics if
	  any of the assertions trip.  This is meant for btrfs developers only.

	  If unsure, say N.
+4 −1
Original line number Diff line number Diff line
@@ -8,7 +8,10 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
	   export.o tree-log.o free-space-cache.o zlib.o lzo.o \
	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o
	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
	   uuid-tree.o

btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o
+55 −38
Original line number Diff line number Diff line
@@ -119,6 +119,26 @@ struct __prelim_ref {
	u64 wanted_disk_byte;
};

static struct kmem_cache *btrfs_prelim_ref_cache;

int __init btrfs_prelim_ref_init(void)
{
	btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
					sizeof(struct __prelim_ref),
					0,
					SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
					NULL);
	if (!btrfs_prelim_ref_cache)
		return -ENOMEM;
	return 0;
}

void btrfs_prelim_ref_exit(void)
{
	if (btrfs_prelim_ref_cache)
		kmem_cache_destroy(btrfs_prelim_ref_cache);
}

/*
 * the rules for all callers of this function are:
 * - obtaining the parent is the goal
@@ -160,12 +180,12 @@ struct __prelim_ref {

static int __add_prelim_ref(struct list_head *head, u64 root_id,
			    struct btrfs_key *key, int level,
			    u64 parent, u64 wanted_disk_byte, int count)
			    u64 parent, u64 wanted_disk_byte, int count,
			    gfp_t gfp_mask)
{
	struct __prelim_ref *ref;

	/* in case we're adding delayed refs, we're holding the refs spinlock */
	ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
	ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
	if (!ref)
		return -ENOMEM;

@@ -295,10 +315,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
	ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
	pr_debug("search slot in root %llu (level %d, ref count %d) returned "
		 "%d for key (%llu %u %llu)\n",
		 (unsigned long long)ref->root_id, level, ref->count, ret,
		 (unsigned long long)ref->key_for_search.objectid,
		 ref->key_for_search.type,
		 (unsigned long long)ref->key_for_search.offset);
		 ref->root_id, level, ref->count, ret,
		 ref->key_for_search.objectid, ref->key_for_search.type,
		 ref->key_for_search.offset);
	if (ret < 0)
		goto out;

@@ -365,11 +384,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
		node = ulist_next(parents, &uiter);
		ref->parent = node ? node->val : 0;
		ref->inode_list = node ?
			(struct extent_inode_elem *)(uintptr_t)node->aux : 0;
			(struct extent_inode_elem *)(uintptr_t)node->aux : NULL;

		/* additional parents require new refs being added here */
		while ((node = ulist_next(parents, &uiter))) {
			new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
			new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
						   GFP_NOFS);
			if (!new_ref) {
				ret = -ENOMEM;
				goto out;
@@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode)
			ref1->count += ref2->count;

			list_del(&ref2->list);
			kfree(ref2);
			kmem_cache_free(btrfs_prelim_ref_cache, ref2);
		}

	}
@@ -548,7 +568,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			ref = btrfs_delayed_node_to_tree_ref(node);
			ret = __add_prelim_ref(prefs, ref->root, &op_key,
					       ref->level + 1, 0, node->bytenr,
					       node->ref_mod * sgn);
					       node->ref_mod * sgn, GFP_ATOMIC);
			break;
		}
		case BTRFS_SHARED_BLOCK_REF_KEY: {
@@ -558,7 +578,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			ret = __add_prelim_ref(prefs, ref->root, NULL,
					       ref->level + 1, ref->parent,
					       node->bytenr,
					       node->ref_mod * sgn);
					       node->ref_mod * sgn, GFP_ATOMIC);
			break;
		}
		case BTRFS_EXTENT_DATA_REF_KEY: {
@@ -570,7 +590,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			key.offset = ref->offset;
			ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
					       node->bytenr,
					       node->ref_mod * sgn);
					       node->ref_mod * sgn, GFP_ATOMIC);
			break;
		}
		case BTRFS_SHARED_DATA_REF_KEY: {
@@ -583,7 +603,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			key.offset = ref->offset;
			ret = __add_prelim_ref(prefs, ref->root, &key, 0,
					       ref->parent, node->bytenr,
					       node->ref_mod * sgn);
					       node->ref_mod * sgn, GFP_ATOMIC);
			break;
		}
		default:
@@ -657,7 +677,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
		case BTRFS_SHARED_BLOCK_REF_KEY:
			ret = __add_prelim_ref(prefs, 0, NULL,
						*info_level + 1, offset,
						bytenr, 1);
						bytenr, 1, GFP_NOFS);
			break;
		case BTRFS_SHARED_DATA_REF_KEY: {
			struct btrfs_shared_data_ref *sdref;
@@ -666,13 +686,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
			sdref = (struct btrfs_shared_data_ref *)(iref + 1);
			count = btrfs_shared_data_ref_count(leaf, sdref);
			ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
					       bytenr, count);
					       bytenr, count, GFP_NOFS);
			break;
		}
		case BTRFS_TREE_BLOCK_REF_KEY:
			ret = __add_prelim_ref(prefs, offset, NULL,
					       *info_level + 1, 0,
					       bytenr, 1);
					       bytenr, 1, GFP_NOFS);
			break;
		case BTRFS_EXTENT_DATA_REF_KEY: {
			struct btrfs_extent_data_ref *dref;
@@ -687,7 +707,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
			key.offset = btrfs_extent_data_ref_offset(leaf, dref);
			root = btrfs_extent_data_ref_root(leaf, dref);
			ret = __add_prelim_ref(prefs, root, &key, 0, 0,
					       bytenr, count);
					       bytenr, count, GFP_NOFS);
			break;
		}
		default:
@@ -738,7 +758,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
		case BTRFS_SHARED_BLOCK_REF_KEY:
			ret = __add_prelim_ref(prefs, 0, NULL,
						info_level + 1, key.offset,
						bytenr, 1);
						bytenr, 1, GFP_NOFS);
			break;
		case BTRFS_SHARED_DATA_REF_KEY: {
			struct btrfs_shared_data_ref *sdref;
@@ -748,13 +768,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
					      struct btrfs_shared_data_ref);
			count = btrfs_shared_data_ref_count(leaf, sdref);
			ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
						bytenr, count);
						bytenr, count, GFP_NOFS);
			break;
		}
		case BTRFS_TREE_BLOCK_REF_KEY:
			ret = __add_prelim_ref(prefs, key.offset, NULL,
					       info_level + 1, 0,
					       bytenr, 1);
					       bytenr, 1, GFP_NOFS);
			break;
		case BTRFS_EXTENT_DATA_REF_KEY: {
			struct btrfs_extent_data_ref *dref;
@@ -770,7 +790,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
			key.offset = btrfs_extent_data_ref_offset(leaf, dref);
			root = btrfs_extent_data_ref_root(leaf, dref);
			ret = __add_prelim_ref(prefs, root, &key, 0, 0,
					       bytenr, count);
					       bytenr, count, GFP_NOFS);
			break;
		}
		default:
@@ -911,7 +931,6 @@ again:

	while (!list_empty(&prefs)) {
		ref = list_first_entry(&prefs, struct __prelim_ref, list);
		list_del(&ref->list);
		WARN_ON(ref->count < 0);
		if (ref->count && ref->root_id && ref->parent == 0) {
			/* no parent == root of tree */
@@ -935,8 +954,10 @@ again:
				}
				ret = find_extent_in_eb(eb, bytenr,
							*extent_item_pos, &eie);
				ref->inode_list = eie;
				free_extent_buffer(eb);
				if (ret < 0)
					goto out;
				ref->inode_list = eie;
			}
			ret = ulist_add_merge(refs, ref->parent,
					      (uintptr_t)ref->inode_list,
@@ -954,7 +975,8 @@ again:
				eie->next = ref->inode_list;
			}
		}
		kfree(ref);
		list_del(&ref->list);
		kmem_cache_free(btrfs_prelim_ref_cache, ref);
	}

out:
@@ -962,13 +984,13 @@ out:
	while (!list_empty(&prefs)) {
		ref = list_first_entry(&prefs, struct __prelim_ref, list);
		list_del(&ref->list);
		kfree(ref);
		kmem_cache_free(btrfs_prelim_ref_cache, ref);
	}
	while (!list_empty(&prefs_delayed)) {
		ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
				       list);
		list_del(&ref->list);
		kfree(ref);
		kmem_cache_free(btrfs_prelim_ref_cache, ref);
	}

	return ret;
@@ -1326,8 +1348,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
	     found_key->type != BTRFS_METADATA_ITEM_KEY) ||
	    found_key->objectid > logical ||
	    found_key->objectid + size <= logical) {
		pr_debug("logical %llu is not within any extent\n",
			 (unsigned long long)logical);
		pr_debug("logical %llu is not within any extent\n", logical);
		return -ENOENT;
	}

@@ -1340,11 +1361,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,

	pr_debug("logical %llu is at position %llu within the extent (%llu "
		 "EXTENT_ITEM %llu) flags %#llx size %u\n",
		 (unsigned long long)logical,
		 (unsigned long long)(logical - found_key->objectid),
		 (unsigned long long)found_key->objectid,
		 (unsigned long long)found_key->offset,
		 (unsigned long long)flags, item_size);
		 logical, logical - found_key->objectid, found_key->objectid,
		 found_key->offset, flags, item_size);

	WARN_ON(!flags_ret);
	if (flags_ret) {
@@ -1516,7 +1534,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
		while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
			pr_debug("root %llu references leaf %llu, data list "
				 "%#llx\n", root_node->val, ref_node->val,
				 (long long)ref_node->aux);
				 ref_node->aux);
			ret = iterate_leaf_refs((struct extent_inode_elem *)
						(uintptr_t)ref_node->aux,
						root_node->val,
@@ -1608,9 +1626,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
			name_len = btrfs_inode_ref_name_len(eb, iref);
			/* path must be released before calling iterate()! */
			pr_debug("following ref at offset %u for inode %llu in "
				 "tree %llu\n", cur,
				 (unsigned long long)found_key.objectid,
				 (unsigned long long)fs_root->objectid);
				 "tree %llu\n", cur, found_key.objectid,
				 fs_root->objectid);
			ret = iterate(parent, name_len,
				      (unsigned long)(iref + 1), eb, ctx);
			if (ret)
+2 −0
Original line number Diff line number Diff line
@@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
			  struct btrfs_inode_extref **ret_extref,
			  u64 *found_off);

int __init btrfs_prelim_ref_init(void);
void btrfs_prelim_ref_exit(void);
#endif
+21 −0
Original line number Diff line number Diff line
@@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
	return 0;
}

struct btrfs_dio_private {
	struct inode *inode;
	u64 logical_offset;
	u64 disk_bytenr;
	u64 bytes;
	void *private;

	/* number of bios pending for this dio */
	atomic_t pending_bios;

	/* IO errors */
	int errors;

	/* orig_bio is our btrfs_io_bio */
	struct bio *orig_bio;

	/* dio_bio came from fs/direct-io.c */
	struct bio *dio_bio;
	u8 csum[0];
};

/*
 * Disable DIO read nolock optimization, so new dio readers will be forced
 * to grab i_mutex. It is used to avoid the endless truncate due to
Loading