Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit af36d15f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: remove free-space-cache.c WARN during log replay
  Btrfs: sectorsize align offsets in fiemap
  Btrfs: clear pages dirty for io and set them extent mapped
  Btrfs: wait on caching if we're loading the free space cache
  Btrfs: prefix resize related printks with btrfs:
  btrfs: fix stat blocks accounting
  Btrfs: avoid unnecessary bitmap search for cluster setup
  Btrfs: fix to search one more bitmap for cluster setup
  btrfs: mirror_num should be int, not u64
  btrfs: Fix up 32/64-bit compatibility for new ioctls
  Btrfs: fix barrier flushes
  Btrfs: fix tree corruption after multi-thread snapshots and inode_cache flush
parents 8ba8ed54 24a70313
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -683,7 +683,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
		return PTR_ERR(fspath);

	if (fspath > fspath_min) {
		ipath->fspath->val[i] = (u64)fspath;
		ipath->fspath->val[i] = (u64)(unsigned long)fspath;
		++ipath->fspath->elem_cnt;
		ipath->fspath->bytes_left = fspath - fspath_min;
	} else {
+16 −1
Original line number Diff line number Diff line
@@ -514,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct extent_buffer *buf)
{
	/* ensure we can see the force_cow */
	smp_rmb();

	/*
	 * We do not need to cow a block if
	 * 1) this block is not created or changed in this transaction;
	 * 2) this block does not belong to TREE_RELOC tree;
	 * 3) the root is not forced COW.
	 *
	 * What is forced COW:
	 *    when we create snapshot during commiting the transaction,
	 *    after we've finished coping src root, we must COW the shared
	 *    block to ensure the metadata consistency.
	 */
	if (btrfs_header_generation(buf) == trans->transid &&
	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
	    !root->force_cow)
		return 0;
	return 1;
}
+4 −1
Original line number Diff line number Diff line
@@ -848,7 +848,8 @@ struct btrfs_free_cluster {
enum btrfs_caching_type {
	BTRFS_CACHE_NO		= 0,
	BTRFS_CACHE_STARTED	= 1,
	BTRFS_CACHE_FINISHED	= 2,
	BTRFS_CACHE_FAST	= 2,
	BTRFS_CACHE_FINISHED	= 3,
};

enum btrfs_disk_cache_state {
@@ -1271,6 +1272,8 @@ struct btrfs_root {
	 * for stat.  It may be used for more later
	 */
	dev_t anon_dev;

	int force_cow;
};

struct btrfs_ioctl_defrag_range_args {
+129 −18
Original line number Diff line number Diff line
@@ -620,7 +620,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,

static int btree_io_failed_hook(struct bio *failed_bio,
			 struct page *page, u64 start, u64 end,
			 u64 mirror_num, struct extent_state *state)
			 int mirror_num, struct extent_state *state)
{
	struct extent_io_tree *tree;
	unsigned long len;
@@ -2573,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,
	int errors = 0;
	u32 crc;
	u64 bytenr;
	int last_barrier = 0;

	if (max_mirrors == 0)
		max_mirrors = BTRFS_SUPER_MIRROR_MAX;

	/* make sure only the last submit_bh does a barrier */
	if (do_barriers) {
		for (i = 0; i < max_mirrors; i++) {
			bytenr = btrfs_sb_offset(i);
			if (bytenr + BTRFS_SUPER_INFO_SIZE >=
			    device->total_bytes)
				break;
			last_barrier = i;
		}
	}

	for (i = 0; i < max_mirrors; i++) {
		bytenr = btrfs_sb_offset(i);
		if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2634,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,
			bh->b_end_io = btrfs_end_buffer_write_sync;
		}

		if (i == last_barrier && do_barriers)
			ret = submit_bh(WRITE_FLUSH_FUA, bh);
		else
			ret = submit_bh(WRITE_SYNC, bh);

		/*
		 * we fua the first super.  The others we allow
		 * to go down lazy.
		 */
		ret = submit_bh(WRITE_FUA, bh);
		if (ret)
			errors++;
	}
	return errors < i ? 0 : -1;
}

/*
 * endio for the write_dev_flush, this will wake anyone waiting
 * for the barrier when it is done
 */
static void btrfs_end_empty_barrier(struct bio *bio, int err)
{
	if (err) {
		if (err == -EOPNOTSUPP)
			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
		clear_bit(BIO_UPTODATE, &bio->bi_flags);
	}
	if (bio->bi_private)
		complete(bio->bi_private);
	bio_put(bio);
}

/*
 * trigger flushes for one the devices.  If you pass wait == 0, the flushes are
 * sent down.  With wait == 1, it waits for the previous flush.
 *
 * any device where the flush fails with eopnotsupp are flagged as not-barrier
 * capable
 */
static int write_dev_flush(struct btrfs_device *device, int wait)
{
	struct bio *bio;
	int ret = 0;

	if (device->nobarriers)
		return 0;

	if (wait) {
		bio = device->flush_bio;
		if (!bio)
			return 0;

		wait_for_completion(&device->flush_wait);

		if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
			printk("btrfs: disabling barriers on dev %s\n",
			       device->name);
			device->nobarriers = 1;
		}
		if (!bio_flagged(bio, BIO_UPTODATE)) {
			ret = -EIO;
		}

		/* drop the reference from the wait == 0 run */
		bio_put(bio);
		device->flush_bio = NULL;

		return ret;
	}

	/*
	 * one reference for us, and we leave it for the
	 * caller
	 */
	device->flush_bio = NULL;;
	bio = bio_alloc(GFP_NOFS, 0);
	if (!bio)
		return -ENOMEM;

	bio->bi_end_io = btrfs_end_empty_barrier;
	bio->bi_bdev = device->bdev;
	init_completion(&device->flush_wait);
	bio->bi_private = &device->flush_wait;
	device->flush_bio = bio;

	bio_get(bio);
	submit_bio(WRITE_FLUSH, bio);

	return 0;
}

/*
 * send an empty flush down to each device in parallel,
 * then wait for them
 */
static int barrier_all_devices(struct btrfs_fs_info *info)
{
	struct list_head *head;
	struct btrfs_device *dev;
	int errors = 0;
	int ret;

	/* send down all the barriers */
	head = &info->fs_devices->devices;
	list_for_each_entry_rcu(dev, head, dev_list) {
		if (!dev->bdev) {
			errors++;
			continue;
		}
		if (!dev->in_fs_metadata || !dev->writeable)
			continue;

		ret = write_dev_flush(dev, 0);
		if (ret)
			errors++;
	}

	/* wait for all the barriers */
	list_for_each_entry_rcu(dev, head, dev_list) {
		if (!dev->bdev) {
			errors++;
			continue;
		}
		if (!dev->in_fs_metadata || !dev->writeable)
			continue;

		ret = write_dev_flush(dev, 1);
		if (ret)
			errors++;
	}
	if (errors)
		return -EIO;
	return 0;
}

int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
	struct list_head *head;
@@ -2666,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)

	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
	head = &root->fs_info->fs_devices->devices;

	if (do_barriers)
		barrier_all_devices(root->fs_info);

	list_for_each_entry_rcu(dev, head, dev_list) {
		if (!dev->bdev) {
			total_errors++;
+79 −40
Original line number Diff line number Diff line
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
			     struct btrfs_root *root,
			     int load_cache_only)
{
	DEFINE_WAIT(wait);
	struct btrfs_fs_info *fs_info = cache->fs_info;
	struct btrfs_caching_control *caching_ctl;
	int ret = 0;

	smp_mb();
	if (cache->cached != BTRFS_CACHE_NO)
	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
	BUG_ON(!caching_ctl);

	INIT_LIST_HEAD(&caching_ctl->list);
	mutex_init(&caching_ctl->mutex);
	init_waitqueue_head(&caching_ctl->wait);
	caching_ctl->block_group = cache;
	caching_ctl->progress = cache->key.objectid;
	atomic_set(&caching_ctl->count, 1);
	caching_ctl->work.func = caching_thread;

	spin_lock(&cache->lock);
	/*
	 * This should be a rare occasion, but this could happen I think in the
	 * case where one thread starts to load the space cache info, and then
	 * some other thread starts a transaction commit which tries to do an
	 * allocation while the other thread is still loading the space cache
	 * info.  The previous loop should have kept us from choosing this block
	 * group, but if we've moved to the state where we will wait on caching
	 * block groups we need to first check if we're doing a fast load here,
	 * so we can wait for it to finish, otherwise we could end up allocating
	 * from a block group who's cache gets evicted for one reason or
	 * another.
	 */
	while (cache->cached == BTRFS_CACHE_FAST) {
		struct btrfs_caching_control *ctl;

		ctl = cache->caching_ctl;
		atomic_inc(&ctl->count);
		prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
		spin_unlock(&cache->lock);

		schedule();

		finish_wait(&ctl->wait, &wait);
		put_caching_control(ctl);
		spin_lock(&cache->lock);
	}

	if (cache->cached != BTRFS_CACHE_NO) {
		spin_unlock(&cache->lock);
		kfree(caching_ctl);
		return 0;
	}
	WARN_ON(cache->caching_ctl);
	cache->caching_ctl = caching_ctl;
	cache->cached = BTRFS_CACHE_FAST;
	spin_unlock(&cache->lock);

	/*
	 * We can't do the read from on-disk cache during a commit since we need
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
	if (trans && (!trans->transaction->in_commit) &&
	    (root && root != root->fs_info->tree_root) &&
	    btrfs_test_opt(root, SPACE_CACHE)) {
		spin_lock(&cache->lock);
		if (cache->cached != BTRFS_CACHE_NO) {
			spin_unlock(&cache->lock);
			return 0;
		}
		cache->cached = BTRFS_CACHE_STARTED;
		spin_unlock(&cache->lock);

		ret = load_free_space_cache(fs_info, cache);

		spin_lock(&cache->lock);
		if (ret == 1) {
			cache->caching_ctl = NULL;
			cache->cached = BTRFS_CACHE_FINISHED;
			cache->last_byte_to_unpin = (u64)-1;
		} else {
			if (load_cache_only) {
				cache->caching_ctl = NULL;
				cache->cached = BTRFS_CACHE_NO;
			} else {
				cache->cached = BTRFS_CACHE_STARTED;
			}
		}
		spin_unlock(&cache->lock);
		wake_up(&caching_ctl->wait);
		if (ret == 1) {
			put_caching_control(caching_ctl);
			free_excluded_extents(fs_info->extent_root, cache);
			return 0;
		}
	}

	if (load_cache_only)
		return 0;

	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
	BUG_ON(!caching_ctl);

	INIT_LIST_HEAD(&caching_ctl->list);
	mutex_init(&caching_ctl->mutex);
	init_waitqueue_head(&caching_ctl->wait);
	caching_ctl->block_group = cache;
	caching_ctl->progress = cache->key.objectid;
	/* one for caching kthread, one for caching block group list */
	atomic_set(&caching_ctl->count, 2);
	caching_ctl->work.func = caching_thread;

	} else {
		/*
		 * We are not going to do the fast caching, set cached to the
		 * appropriate value and wakeup any waiters.
		 */
		spin_lock(&cache->lock);
	if (cache->cached != BTRFS_CACHE_NO) {
		if (load_cache_only) {
			cache->caching_ctl = NULL;
			cache->cached = BTRFS_CACHE_NO;
		} else {
			cache->cached = BTRFS_CACHE_STARTED;
		}
		spin_unlock(&cache->lock);
		kfree(caching_ctl);
		wake_up(&caching_ctl->wait);
	}

	if (load_cache_only) {
		put_caching_control(caching_ctl);
		return 0;
	}
	cache->caching_ctl = caching_ctl;
	cache->cached = BTRFS_CACHE_STARTED;
	spin_unlock(&cache->lock);

	down_write(&fs_info->extent_commit_sem);
	atomic_inc(&caching_ctl->count);
	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
	up_write(&fs_info->extent_commit_sem);

@@ -5178,13 +5219,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
		}

have_block_group:
		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
		cached = block_group_cache_done(block_group);
		if (unlikely(!cached)) {
			u64 free_percent;

			found_uncached_bg = true;
			ret = cache_block_group(block_group, trans,
						orig_root, 1);
			if (block_group->cached == BTRFS_CACHE_FINISHED)
				goto have_block_group;
				goto alloc;

			free_percent = btrfs_block_group_used(&block_group->item);
			free_percent *= 100;
@@ -5206,7 +5249,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
							orig_root, 0);
				BUG_ON(ret);
			}
			found_uncached_bg = true;

			/*
			 * If loop is set for cached only, try the next block
@@ -5216,10 +5258,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
				goto loop;
		}

		cached = block_group_cache_done(block_group);
		if (unlikely(!cached))
			found_uncached_bg = true;

alloc:
		if (unlikely(block_group->ro))
			goto loop;

Loading