Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit adff377b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (24 commits)
  Btrfs: fix free space cache leak
  Btrfs: avoid taking the chunk_mutex in do_chunk_alloc
  Btrfs end_bio_extent_readpage should look for locked bits
  Btrfs: don't force chunk allocation in find_free_extent
  Btrfs: Check validity before setting an acl
  Btrfs: Fix incorrect inode nlink in btrfs_link()
  Btrfs: Check if btrfs_next_leaf() returns error in btrfs_real_readdir()
  Btrfs: Check if btrfs_next_leaf() returns error in btrfs_listxattr()
  Btrfs: make uncache_state unconditional
  btrfs: using cached extent_state in set/unlock combinations
  Btrfs: avoid taking the trans_mutex in btrfs_end_transaction
  Btrfs: fix subvolume mount by name problem when default mount subvolume is set
  fix user annotation in ioctl.c
  Btrfs: check for duplicate iov_base's when doing dio reads
  btrfs: properly handle overlapping areas in memmove_extent_buffer
  Btrfs: fix memory leaks in btrfs_new_inode()
  Btrfs: check for duplicate iov_base's when doing dio reads
  Btrfs: reuse the extent_map we found when calling btrfs_get_extent
  Btrfs: do not use async submit for small DIO io's
  Btrfs: don't split dio bios if we don't have to
  ...
parents d8bdc59f f65647c2
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,

	if (value) {
		acl = posix_acl_from_xattr(value, size);
		if (acl == NULL) {
			value = NULL;
			size = 0;
		if (acl) {
			ret = posix_acl_valid(acl);
			if (ret)
				goto out;
		} else if (IS_ERR(acl)) {
			return PTR_ERR(acl);
		}
	}

	ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);

out:
	posix_acl_release(acl);

	return ret;
+8 −1
Original line number Diff line number Diff line
@@ -740,8 +740,10 @@ struct btrfs_space_info {
	 */
	unsigned long reservation_progress;

	int full;		/* indicates that we cannot allocate any more
	int full:1;		/* indicates that we cannot allocate any more
				   chunks for this space */
	int chunk_alloc:1;	/* set if we are allocating a chunk */

	int force_alloc;	/* set if we need to force a chunk alloc for
				   this space */

@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
			      struct inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
void btrfs_drop_pages(struct page **pages, size_t num_pages);
int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
		      struct page **pages, size_t num_pages,
		      loff_t pos, size_t write_bytes,
		      struct extent_state **cached);

/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+1 −1
Original line number Diff line number Diff line
@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
		btrfs_destroy_pinned_extent(root,
					    root->fs_info->pinned_extents);

		t->use_count = 0;
		atomic_set(&t->use_count, 0);
		list_del_init(&t->list);
		memset(t, 0, sizeof(*t));
		kmem_cache_free(btrfs_transaction_cachep, t);
+98 −27
Original line number Diff line number Diff line
@@ -33,6 +33,25 @@
#include "locking.h"
#include "free-space-cache.h"

/* control flags for do_chunk_alloc's force field
 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
 * if we really need one.
 *
 * CHUNK_ALLOC_FORCE means it must try to allocate one
 *
 * CHUNK_ALLOC_LIMITED means to only try and allocate one
 * if we have very few chunks already allocated.  This is
 * used as part of the clustering code to help make sure
 * we have a good pool of storage to cluster in, without
 * filling the FS with empty chunks
 *
 */
enum {
	CHUNK_ALLOC_NO_FORCE = 0,
	CHUNK_ALLOC_FORCE = 1,
	CHUNK_ALLOC_LIMITED = 2,
};

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
	found->bytes_readonly = 0;
	found->bytes_may_use = 0;
	found->full = 0;
	found->force_alloc = 0;
	found->force_alloc = CHUNK_ALLOC_NO_FORCE;
	found->chunk_alloc = 0;
	*space_info = found;
	list_add_rcu(&found->list, &info->space_info);
	atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
		if (!data_sinfo->full && alloc_chunk) {
			u64 alloc_target;

			data_sinfo->force_alloc = 1;
			data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
			spin_unlock(&data_sinfo->lock);
alloc:
			alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)

			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
					     bytes + 2 * 1024 * 1024,
					     alloc_target, 0);
					     alloc_target,
					     CHUNK_ALLOC_NO_FORCE);
			btrfs_end_transaction(trans, root);
			if (ret < 0) {
				if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
	rcu_read_lock();
	list_for_each_entry_rcu(found, head, list) {
		if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
			found->force_alloc = 1;
			found->force_alloc = CHUNK_ALLOC_FORCE;
	}
	rcu_read_unlock();
}

static int should_alloc_chunk(struct btrfs_root *root,
			      struct btrfs_space_info *sinfo, u64 alloc_bytes)
			      struct btrfs_space_info *sinfo, u64 alloc_bytes,
			      int force)
{
	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
	u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
	u64 thresh;

	if (sinfo->bytes_used + sinfo->bytes_reserved +
	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
	if (force == CHUNK_ALLOC_FORCE)
		return 1;

	/*
	 * in limited mode, we want to have some free space up to
	 * about 1% of the FS size.
	 */
	if (force == CHUNK_ALLOC_LIMITED) {
		thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
		thresh = max_t(u64, 64 * 1024 * 1024,
			       div_factor_fine(thresh, 1));

		if (num_bytes - num_allocated < thresh)
			return 1;
	}

	/*
	 * we have two similar checks here, one based on percentage
	 * and once based on a hard number of 256MB.  The idea
	 * is that if we have a good amount of free
	 * room, don't allocate a chunk.  A good mount is
	 * less than 80% utilized of the chunks we have allocated,
	 * or more than 256MB free
	 */
	if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
		return 0;

	if (sinfo->bytes_used + sinfo->bytes_reserved +
	    alloc_bytes < div_factor(num_bytes, 8))
	if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
		return 0;

	thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);

	/* 256MB or 5% of the FS */
	thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));

	if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
		return 0;

	return 1;
}

@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
{
	struct btrfs_space_info *space_info;
	struct btrfs_fs_info *fs_info = extent_root->fs_info;
	int wait_for_alloc = 0;
	int ret = 0;

	mutex_lock(&fs_info->chunk_mutex);

	flags = btrfs_reduce_alloc_profile(extent_root, flags);

	space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
	}
	BUG_ON(!space_info);

again:
	spin_lock(&space_info->lock);
	if (space_info->force_alloc)
		force = 1;
		force = space_info->force_alloc;
	if (space_info->full) {
		spin_unlock(&space_info->lock);
		goto out;
		return 0;
	}

	if (!force && !should_alloc_chunk(extent_root, space_info,
					  alloc_bytes)) {
	if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
		spin_unlock(&space_info->lock);
		goto out;
		return 0;
	} else if (space_info->chunk_alloc) {
		wait_for_alloc = 1;
	} else {
		space_info->chunk_alloc = 1;
	}

	spin_unlock(&space_info->lock);

	mutex_lock(&fs_info->chunk_mutex);

	/*
	 * The chunk_mutex is held throughout the entirety of a chunk
	 * allocation, so once we've acquired the chunk_mutex we know that the
	 * other guy is done and we need to recheck and see if we should
	 * allocate.
	 */
	if (wait_for_alloc) {
		mutex_unlock(&fs_info->chunk_mutex);
		wait_for_alloc = 0;
		goto again;
	}

	/*
	 * If we have mixed data/metadata chunks we want to make sure we keep
	 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
		space_info->full = 1;
	else
		ret = 1;
	space_info->force_alloc = 0;

	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
	space_info->chunk_alloc = 0;
	spin_unlock(&space_info->lock);
out:
	mutex_unlock(&extent_root->fs_info->chunk_mutex);
	return ret;
}
@@ -5303,11 +5368,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,

		if (allowed_chunk_alloc) {
			ret = do_chunk_alloc(trans, root, num_bytes +
					     2 * 1024 * 1024, data, 1);
					     2 * 1024 * 1024, data,
					     CHUNK_ALLOC_LIMITED);
			allowed_chunk_alloc = 0;
			done_chunk_alloc = 1;
		} else if (!done_chunk_alloc) {
			space_info->force_alloc = 1;
		} else if (!done_chunk_alloc &&
			   space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
			space_info->force_alloc = CHUNK_ALLOC_LIMITED;
		}

		if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
	 */
	if (empty_size || root->ref_cows)
		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
				     num_bytes + 2 * 1024 * 1024, data, 0);
				     num_bytes + 2 * 1024 * 1024, data,
				     CHUNK_ALLOC_NO_FORCE);

	WARN_ON(num_bytes < root->sectorsize);
	ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
		num_bytes = num_bytes & ~(root->sectorsize - 1);
		num_bytes = max(num_bytes, min_alloc_size);
		do_chunk_alloc(trans, root->fs_info->extent_root,
			       num_bytes, data, 1);
			       num_bytes, data, CHUNK_ALLOC_FORCE);
		goto again;
	}
	if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,

	alloc_flags = update_block_group_flags(root, cache->flags);
	if (alloc_flags != cache->flags)
		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
			       CHUNK_ALLOC_FORCE);

	ret = set_block_group_ro(cache);
	if (!ret)
		goto out;
	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
			     CHUNK_ALLOC_FORCE);
	if (ret < 0)
		goto out;
	ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root, u64 type)
{
	u64 alloc_flags = get_alloc_profile(root, type);
	return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
	return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
			      CHUNK_ALLOC_FORCE);
}

/*
+62 −20
Original line number Diff line number Diff line
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
	}
}

static void uncache_state(struct extent_state **cached_ptr)
{
	if (cached_ptr && (*cached_ptr)) {
		struct extent_state *state = *cached_ptr;
		*cached_ptr = NULL;
		free_extent_state(state);
	}
}

/*
 * set some bits on a range in the tree.  This may require allocations or
 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
}

int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			gfp_t mask)
			struct extent_state **cached_state, gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
			      NULL, mask);
	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
			      NULL, cached_state, mask);
}

static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
				mask);
}

int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
		  gfp_t mask)
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
{
	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
				mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)

	do {
		struct page *page = bvec->bv_page;
		struct extent_state *cached = NULL;
		struct extent_state *state;

		tree = &BTRFS_I(page->mapping->host)->io_tree;

		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
		if (++bvec <= bvec_end)
			prefetchw(&bvec->bv_page->flags);

		spin_lock(&tree->lock);
		state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
		if (state && state->start == start) {
			/*
			 * take a reference on the state, unlock will drop
			 * the ref
			 */
			cache_state(state, &cached);
		}
		spin_unlock(&tree->lock);

		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
			ret = tree->ops->readpage_end_io_hook(page, start, end,
							      NULL);
							      state);
			if (ret)
				uptodate = 0;
		}
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
					test_bit(BIO_UPTODATE, &bio->bi_flags);
				if (err)
					uptodate = 0;
				uncache_state(&cached);
				continue;
			}
		}

		if (uptodate) {
			set_extent_uptodate(tree, start, end,
			set_extent_uptodate(tree, start, end, &cached,
					    GFP_ATOMIC);
		}
		unlock_extent(tree, start, end, GFP_ATOMIC);
		unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);

		if (whole_page) {
			if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)

	do {
		struct page *page = bvec->bv_page;
		struct extent_state *cached = NULL;
		tree = &BTRFS_I(page->mapping->host)->io_tree;

		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
			prefetchw(&bvec->bv_page->flags);

		if (uptodate) {
			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
			set_extent_uptodate(tree, start, end, &cached,
					    GFP_ATOMIC);
		} else {
			ClearPageUptodate(page);
			SetPageError(page);
		}

		unlock_extent(tree, start, end, GFP_ATOMIC);
		unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);

	} while (bvec >= bio->bi_io_vec);

@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
	while (cur <= end) {
		if (cur >= last_byte) {
			char *userpage;
			struct extent_state *cached = NULL;

			iosize = PAGE_CACHE_SIZE - page_offset;
			userpage = kmap_atomic(page, KM_USER0);
			memset(userpage + page_offset, 0, iosize);
			flush_dcache_page(page);
			kunmap_atomic(userpage, KM_USER0);
			set_extent_uptodate(tree, cur, cur + iosize - 1,
					    GFP_NOFS);
			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
					    &cached, GFP_NOFS);
			unlock_extent_cached(tree, cur, cur + iosize - 1,
					     &cached, GFP_NOFS);
			break;
		}
		em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
		/* we've found a hole, just zero and go on */
		if (block_start == EXTENT_MAP_HOLE) {
			char *userpage;
			struct extent_state *cached = NULL;

			userpage = kmap_atomic(page, KM_USER0);
			memset(userpage + page_offset, 0, iosize);
			flush_dcache_page(page);
			kunmap_atomic(userpage, KM_USER0);

			set_extent_uptodate(tree, cur, cur + iosize - 1,
					    GFP_NOFS);
			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
					    &cached, GFP_NOFS);
			unlock_extent_cached(tree, cur, cur + iosize - 1,
			                     &cached, GFP_NOFS);
			cur = cur + iosize;
			page_offset += iosize;
			continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
			iocount++;
			block_start = block_start + iosize;
		} else {
			set_extent_uptodate(tree, block_start, cur_end,
			struct extent_state *cached = NULL;

			set_extent_uptodate(tree, block_start, cur_end, &cached,
					    GFP_NOFS);
			unlock_extent(tree, block_start, cur_end, GFP_NOFS);
			unlock_extent_cached(tree, block_start, cur_end,
					     &cached, GFP_NOFS);
			block_start = cur_end + 1;
		}
		page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
	num_pages = num_extent_pages(eb->start, eb->len);

	set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
			    GFP_NOFS);
			    NULL, GFP_NOFS);
	for (i = 0; i < num_pages; i++) {
		page = extent_buffer_page(eb, i);
		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
	kunmap_atomic(dst_kaddr, KM_USER0);
}

static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
	unsigned long distance = (src > dst) ? src - dst : dst - src;
	return distance < len;
}

static void copy_pages(struct page *dst_page, struct page *src_page,
		       unsigned long dst_off, unsigned long src_off,
		       unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
	char *src_kaddr;

	if (dst_page != src_page)
	if (dst_page != src_page) {
		src_kaddr = kmap_atomic(src_page, KM_USER1);
	else
	} else {
		src_kaddr = dst_kaddr;
		BUG_ON(areas_overlap(src_off, dst_off, len));
	}

	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
	kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
		       "len %lu len %lu\n", dst_offset, len, dst->len);
		BUG_ON(1);
	}
	if (dst_offset < src_offset) {
	if (!areas_overlap(src_offset, dst_offset, len)) {
		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
		return;
	}
Loading