Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 31466f3e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "Features or user visible changes:

   - fallocate: implement zero range mode

   - avoid losing data raid profile when deleting a device

   - tree item checker: more checks for directory items and xattrs

  Notable fixes:

   - raid56 recovery: don't use cached stripes, that could be
     potentially changed and a later RMW or recovery would lead to
     corruptions or failures

   - let raid56 try harder to rebuild damaged data, reading from all
     stripes if necessary

   - fix scrub to repair raid56 in a similar way as in the case above

  Other:

   - cleanups: device freeing, removed some call indirections, redundant
     bio_put/_get, unused parameters, refactorings and renames

   - RCU list traversal fixups

   - simplify mount callchain, remove recursing back when mounting a
     subvolume

   - plug for fsync, may improve bio merging on multiple devices

   - compression heurisic: replace heap sort with radix sort, gains some
     performance

   - add extent map selftests, buffered write vs dio"

* tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits)
  btrfs: drop devid as device_list_add() arg
  btrfs: get device pointer from device_list_add()
  btrfs: set the total_devices in device_list_add()
  btrfs: move pr_info into device_list_add
  btrfs: make btrfs_free_stale_devices() to match the path
  btrfs: rename btrfs_free_stale_devices() arg to skip_dev
  btrfs: make btrfs_free_stale_devices() argument optional
  btrfs: make btrfs_free_stale_device() to iterate all stales
  btrfs: no need to check for btrfs_fs_devices::seeding
  btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it
  Btrfs: noinline merge_extent_mapping
  Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping
  Btrfs: extent map selftest: dio write vs dio read
  Btrfs: extent map selftest: buffered write vs dio read
  Btrfs: add extent map selftests
  Btrfs: move extent map specific code to extent_map.c
  Btrfs: add helper for em merge logic
  Btrfs: fix unexpected EEXIST from btrfs_get_extent
  Btrfs: fix incorrect block_len in merge_extent_mapping
  btrfs: Remove unused readahead spinlock
  ...
parents 6787dc24 3acbcbfc
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -19,4 +19,4 @@ btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
	tests/extent-buffer-tests.o tests/btrfs-tests.o \
	tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
	tests/free-space-tree-tests.o
	tests/free-space-tree-tests.o tests/extent-map-tests.o
+2 −1
Original line number Diff line number Diff line
@@ -216,7 +216,8 @@ static int prelim_ref_compare(struct prelim_ref *ref1,
	return 0;
}

void update_share_count(struct share_check *sc, int oldcount, int newcount)
static void update_share_count(struct share_check *sc, int oldcount,
			       int newcount)
{
	if ((!sc) || (oldcount == 0 && newcount < 1))
		return;
+118 −19
Original line number Diff line number Diff line
@@ -33,7 +33,6 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/log2.h>
#include "ctree.h"
#include "disk-io.h"
@@ -45,6 +44,21 @@
#include "extent_io.h"
#include "extent_map.h"

static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };

const char* btrfs_compress_type2str(enum btrfs_compression_type type)
{
	switch (type) {
	case BTRFS_COMPRESS_ZLIB:
	case BTRFS_COMPRESS_LZO:
	case BTRFS_COMPRESS_ZSTD:
	case BTRFS_COMPRESS_NONE:
		return btrfs_compress_types[type];
	}

	return NULL;
}

static int btrfs_decompress_bio(struct compressed_bio *cb);

static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
		page->mapping = NULL;
		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
		    PAGE_SIZE) {
			bio_get(bio);

			/*
			 * inc the count before we submit the bio so
			 * we know the end IO handler won't happen before
@@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
				bio_endio(bio);
			}

			bio_put(bio);

			bio = btrfs_bio_alloc(bdev, first_byte);
			bio->bi_opf = REQ_OP_WRITE | write_flags;
			bio->bi_private = cb;
@@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
		first_byte += PAGE_SIZE;
		cond_resched();
	}
	bio_get(bio);

	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
	BUG_ON(ret); /* -ENOMEM */
@@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
		bio_endio(bio);
	}

	bio_put(bio);
	return 0;
}

@@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
		page->mapping = NULL;
		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
		    PAGE_SIZE) {
			bio_get(comp_bio);

			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
						  BTRFS_WQ_ENDIO_DATA);
			BUG_ON(ret); /* -ENOMEM */
@@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
				bio_endio(comp_bio);
			}

			bio_put(comp_bio);

			comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
			bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
			comp_bio->bi_private = cb;
@@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
		}
		cur_disk_byte += PAGE_SIZE;
	}
	bio_get(comp_bio);

	ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
	BUG_ON(ret); /* -ENOMEM */
@@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
		bio_endio(comp_bio);
	}

	bio_put(comp_bio);
	return 0;

fail2:
@@ -752,6 +754,8 @@ struct heuristic_ws {
	u32 sample_size;
	/* Buckets store counters for each byte value */
	struct bucket_item *bucket;
	/* Sorting buffer */
	struct bucket_item *bucket_b;
	struct list_head list;
};

@@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws)

	kvfree(workspace->sample);
	kfree(workspace->bucket);
	kfree(workspace->bucket_b);
	kfree(workspace);
}

@@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void)
	if (!ws->bucket)
		goto fail;

	ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
	if (!ws->bucket_b)
		goto fail;

	INIT_LIST_HEAD(&ws->list);
	return &ws->list;
fail:
@@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws)
	return entropy_sum * 100 / entropy_max;
}

/* Compare buckets by size, ascending */
static int bucket_comp_rev(const void *lv, const void *rv)
#define RADIX_BASE		4U
#define COUNTERS_SIZE		(1U << RADIX_BASE)

static u8 get4bits(u64 num, int shift) {
	u8 low4bits;

	num >>= shift;
	/* Reverse order */
	low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
	return low4bits;
}

/*
 * Use 4 bits as radix base
 * Use 16 u32 counters for calculating new possition in buf array
 *
 * @array     - array that will be sorted
 * @array_buf - buffer array to store sorting results
 *              must be equal in size to @array
 * @num       - array size
 */
static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
		       int num)
{
	const struct bucket_item *l = (const struct bucket_item *)lv;
	const struct bucket_item *r = (const struct bucket_item *)rv;
	u64 max_num;
	u64 buf_num;
	u32 counters[COUNTERS_SIZE];
	u32 new_addr;
	u32 addr;
	int bitlen;
	int shift;
	int i;

	return r->count - l->count;
	/*
	 * Try avoid useless loop iterations for small numbers stored in big
	 * counters.  Example: 48 33 4 ... in 64bit array
	 */
	max_num = array[0].count;
	for (i = 1; i < num; i++) {
		buf_num = array[i].count;
		if (buf_num > max_num)
			max_num = buf_num;
	}

	buf_num = ilog2(max_num);
	bitlen = ALIGN(buf_num, RADIX_BASE * 2);

	shift = 0;
	while (shift < bitlen) {
		memset(counters, 0, sizeof(counters));

		for (i = 0; i < num; i++) {
			buf_num = array[i].count;
			addr = get4bits(buf_num, shift);
			counters[addr]++;
		}

		for (i = 1; i < COUNTERS_SIZE; i++)
			counters[i] += counters[i - 1];

		for (i = num - 1; i >= 0; i--) {
			buf_num = array[i].count;
			addr = get4bits(buf_num, shift);
			counters[addr]--;
			new_addr = counters[addr];
			array_buf[new_addr] = array[i];
		}

		shift += RADIX_BASE;

		/*
		 * Normal radix expects to move data from a temporary array, to
		 * the main one.  But that requires some CPU time. Avoid that
		 * by doing another sort iteration to original array instead of
		 * memcpy()
		 */
		memset(counters, 0, sizeof(counters));

		for (i = 0; i < num; i ++) {
			buf_num = array_buf[i].count;
			addr = get4bits(buf_num, shift);
			counters[addr]++;
		}

		for (i = 1; i < COUNTERS_SIZE; i++)
			counters[i] += counters[i - 1];

		for (i = num - 1; i >= 0; i--) {
			buf_num = array_buf[i].count;
			addr = get4bits(buf_num, shift);
			counters[addr]--;
			new_addr = counters[addr];
			array[new_addr] = array_buf[i];
		}

		shift += RADIX_BASE;
	}
}

/*
@@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws)
	struct bucket_item *bucket = ws->bucket;

	/* Sort in reverse order */
	sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
	radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);

	for (i = 0; i < BYTE_CORE_SET_LOW; i++)
		coreset_sum += bucket[i].count;
+3 −1
Original line number Diff line number Diff line
@@ -75,7 +75,7 @@ struct compressed_bio {
	u32 sums;
};

void btrfs_init_compress(void);
void __init btrfs_init_compress(void);
void btrfs_exit_compress(void);

int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
@@ -137,6 +137,8 @@ extern const struct btrfs_compress_op btrfs_zlib_compress;
extern const struct btrfs_compress_op btrfs_lzo_compress;
extern const struct btrfs_compress_op btrfs_zstd_compress;

const char* btrfs_compress_type2str(enum btrfs_compression_type type);

int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);

#endif
+34 −22
Original line number Diff line number Diff line
@@ -1807,7 +1807,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 * simple bin_search frontend that does the right thing for
 * leaves vs nodes
 */
static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
		     int level, int *slot)
{
	if (level == 0)
@@ -1824,12 +1824,6 @@ static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
					  slot);
}

int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
		     int level, int *slot)
{
	return bin_search(eb, key, level, slot);
}

static void root_add_used(struct btrfs_root *root, u32 size)
{
	spin_lock(&root->accounting_lock);
@@ -2614,7 +2608,7 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
		      int level, int *prev_cmp, int *slot)
{
	if (*prev_cmp != 0) {
		*prev_cmp = bin_search(b, key, level, slot);
		*prev_cmp = btrfs_bin_search(b, key, level, slot);
		return *prev_cmp;
	}

@@ -2660,17 +2654,29 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
}

/*
 * look for key in the tree.  path is filled in with nodes along the way
 * if key is found, we return zero and you can find the item in the leaf
 * level of the path (level 0)
 * btrfs_search_slot - look for a key in a tree and perform necessary
 * modifications to preserve tree invariants.
 *
 * If the key isn't found, the path points to the slot where it should
 * be inserted, and 1 is returned.  If there are other errors during the
 * search a negative error number is returned.
 * @trans:	Handle of transaction, used when modifying the tree
 * @p:		Holds all btree nodes along the search path
 * @root:	The root node of the tree
 * @key:	The key we are looking for
 * @ins_len:	Indicates purpose of search, for inserts it is 1, for
 *		deletions it's -1. 0 for plain searches
 * @cow:	boolean should CoW operations be performed. Must always be 1
 *		when modifying the tree.
 *
 * if ins_len > 0, nodes and leaves will be split as we walk down the
 * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
 * possible)
 * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
 * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
 *
 * If @key is found, 0 is returned and you can find the item in the leaf level
 * of the path (level 0)
 *
 * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
 * points to the slot where it should be inserted
 *
 * If an error is encountered while searching the tree a negative error number
 * is returned
 */
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		      const struct btrfs_key *key, struct btrfs_path *p,
@@ -2774,6 +2780,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		 * contention with the cow code
		 */
		if (cow) {
			bool last_level = (level == (BTRFS_MAX_LEVEL - 1));

			/*
			 * if we don't really need to cow this block
			 * then we don't want to set the path blocking,
@@ -2798,6 +2806,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
			}

			btrfs_set_path_blocking(p);
			if (last_level)
				err = btrfs_cow_block(trans, root, b, NULL, 0,
						      &b);
			else
				err = btrfs_cow_block(trans, root, b,
						      p->nodes[level + 1],
						      p->slots[level + 1], &b);
@@ -5175,7 +5187,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
	while (1) {
		nritems = btrfs_header_nritems(cur);
		level = btrfs_header_level(cur);
		sret = bin_search(cur, min_key, level, &slot);
		sret = btrfs_bin_search(cur, min_key, level, &slot);

		/* at the lowest level, we're done, setup the path and exit */
		if (level == path->lowest_level) {
Loading