Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5404525b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - fix for improper fsync after hardlink

 - fix for a corruption during file deduplication

 - use after free fixes

 - RCU warning fix

 - fix for buffered write to nodatacow file

* tag 'for-4.19-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: Fix suspicious RCU usage warning in btrfs_debug_in_rcu
  btrfs: use after free in btrfs_quota_enable
  btrfs: btrfs_shrink_device should call commit transaction at the end
  btrfs: fix qgroup_free wrong num_bytes in btrfs_subvolume_reserve_metadata
  Btrfs: fix data corruption when deduplicating between different files
  Btrfs: sync log after logging new name
  Btrfs: fix unexpected failure of nocow buffered writes after snapshotting when low on space
parents b36fdc68 b6fdfbff
Loading
Loading
Loading
Loading
+10 −2
Original line number Diff line number Diff line
@@ -1280,6 +1280,7 @@ struct btrfs_root {
	int send_in_progress;
	struct btrfs_subvolume_writers *subv_writers;
	atomic_t will_be_snapshotted;
	atomic_t snapshot_force_cow;

	/* For qgroup metadata reserved space */
	spinlock_t qgroup_meta_rsv_lock;
@@ -3390,9 +3391,9 @@ do { \
#define btrfs_debug(fs_info, fmt, args...) \
	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
	btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
	btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
#define btrfs_debug_rl(fs_info, fmt, args...) \
	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
#endif
@@ -3404,6 +3405,13 @@ do { \
	rcu_read_unlock();				\
} while (0)

#define btrfs_no_printk_in_rcu(fs_info, fmt, args...)	\
do {							\
	rcu_read_lock();				\
	btrfs_no_printk(fs_info, fmt, ##args);		\
	rcu_read_unlock();				\
} while (0)

#define btrfs_printk_ratelimited(fs_info, fmt, args...)		\
do {								\
	static DEFINE_RATELIMIT_STATE(_rs,			\
+1 −0
Original line number Diff line number Diff line
@@ -1187,6 +1187,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
	atomic_set(&root->log_batch, 0);
	refcount_set(&root->refs, 1);
	atomic_set(&root->will_be_snapshotted, 0);
	atomic_set(&root->snapshot_force_cow, 0);
	root->log_transid = 0;
	root->log_transid_committed = -1;
	root->last_log_commit = 0;
+8 −9
Original line number Diff line number Diff line
@@ -5800,7 +5800,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
 * root: the root of the parent directory
 * rsv: block reservation
 * items: the number of items that we need do reservation
 * qgroup_reserved: used to return the reserved size in qgroup
 * use_global_rsv: allow fallback to the global block reservation
 *
 * This function is used to reserve the space for snapshot/subvolume
 * creation and deletion. Those operations are different with the
@@ -5810,10 +5810,10 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
 * the space reservation mechanism in start_transaction().
 */
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
				     struct btrfs_block_rsv *rsv,
				     int items,
				     struct btrfs_block_rsv *rsv, int items,
				     bool use_global_rsv)
{
	u64 qgroup_num_bytes = 0;
	u64 num_bytes;
	int ret;
	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5821,12 +5821,11 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,

	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
		/* One for parent inode, two for dir entries */
		num_bytes = 3 * fs_info->nodesize;
		ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
		qgroup_num_bytes = 3 * fs_info->nodesize;
		ret = btrfs_qgroup_reserve_meta_prealloc(root,
				qgroup_num_bytes, true);
		if (ret)
			return ret;
	} else {
		num_bytes = 0;
	}

	num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
@@ -5838,8 +5837,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
	if (ret == -ENOSPC && use_global_rsv)
		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);

	if (ret && num_bytes)
		btrfs_qgroup_free_meta_prealloc(root, num_bytes);
	if (ret && qgroup_num_bytes)
		btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);

	return ret;
}
+84 −33
Original line number Diff line number Diff line
@@ -1271,7 +1271,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
	u64 disk_num_bytes;
	u64 ram_bytes;
	int extent_type;
	int ret, err;
	int ret;
	int type;
	int nocow;
	int check_prev = 1;
@@ -1403,11 +1403,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
			 * if there are pending snapshots for this root,
			 * we fall into common COW way.
			 */
			if (!nolock) {
				err = btrfs_start_write_no_snapshotting(root);
				if (!err)
			if (!nolock && atomic_read(&root->snapshot_force_cow))
				goto out_check;
			}
			/*
			 * force cow if csum exists in the range.
			 * this ensure that csum for a given extent are
@@ -1416,9 +1413,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
			ret = csum_exist_in_range(fs_info, disk_bytenr,
						  num_bytes);
			if (ret) {
				if (!nolock)
					btrfs_end_write_no_snapshotting(root);

				/*
				 * ret could be -EIO if the above fails to read
				 * metadata.
@@ -1431,11 +1425,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
				WARN_ON_ONCE(nolock);
				goto out_check;
			}
			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
				if (!nolock)
					btrfs_end_write_no_snapshotting(root);
			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
				goto out_check;
			}
			nocow = 1;
		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
			extent_end = found_key.offset +
@@ -1448,8 +1439,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
out_check:
		if (extent_end <= start) {
			path->slots[0]++;
			if (!nolock && nocow)
				btrfs_end_write_no_snapshotting(root);
			if (nocow)
				btrfs_dec_nocow_writers(fs_info, disk_bytenr);
			goto next_slot;
@@ -1471,8 +1460,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
					     end, page_started, nr_written, 1,
					     NULL);
			if (ret) {
				if (!nolock && nocow)
					btrfs_end_write_no_snapshotting(root);
				if (nocow)
					btrfs_dec_nocow_writers(fs_info,
								disk_bytenr);
@@ -1492,8 +1479,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
					  ram_bytes, BTRFS_COMPRESS_NONE,
					  BTRFS_ORDERED_PREALLOC);
			if (IS_ERR(em)) {
				if (!nolock && nocow)
					btrfs_end_write_no_snapshotting(root);
				if (nocow)
					btrfs_dec_nocow_writers(fs_info,
								disk_bytenr);
@@ -1532,8 +1517,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
					     EXTENT_CLEAR_DATA_RESV,
					     PAGE_UNLOCK | PAGE_SET_PRIVATE2);

		if (!nolock && nocow)
			btrfs_end_write_no_snapshotting(root);
		cur_offset = extent_end;

		/*
@@ -6639,6 +6622,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
		drop_inode = 1;
	} else {
		struct dentry *parent = dentry->d_parent;
		int ret;

		err = btrfs_update_inode(trans, root, inode);
		if (err)
			goto fail;
@@ -6652,7 +6637,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
				goto fail;
		}
		d_instantiate(dentry, inode);
		btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
		ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
					 true, NULL);
		if (ret == BTRFS_NEED_TRANS_COMMIT) {
			err = btrfs_commit_transaction(trans);
			trans = NULL;
		}
	}

fail:
@@ -9388,14 +9378,21 @@ static int btrfs_rename_exchange(struct inode *old_dir,
	u64 new_idx = 0;
	u64 root_objectid;
	int ret;
	int ret2;
	bool root_log_pinned = false;
	bool dest_log_pinned = false;
	struct btrfs_log_ctx ctx_root;
	struct btrfs_log_ctx ctx_dest;
	bool sync_log_root = false;
	bool sync_log_dest = false;
	bool commit_transaction = false;

	/* we only allow rename subvolume link between subvolumes */
	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
		return -EXDEV;

	btrfs_init_log_ctx(&ctx_root, old_inode);
	btrfs_init_log_ctx(&ctx_dest, new_inode);

	/* close the race window with snapshot create/destroy ioctl */
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
		down_read(&fs_info->subvol_sem);
@@ -9542,15 +9539,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,

	if (root_log_pinned) {
		parent = new_dentry->d_parent;
		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
				parent);
		ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
					 BTRFS_I(old_dir), parent,
					 false, &ctx_root);
		if (ret == BTRFS_NEED_LOG_SYNC)
			sync_log_root = true;
		else if (ret == BTRFS_NEED_TRANS_COMMIT)
			commit_transaction = true;
		ret = 0;
		btrfs_end_log_trans(root);
		root_log_pinned = false;
	}
	if (dest_log_pinned) {
		if (!commit_transaction) {
			parent = old_dentry->d_parent;
		btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
				parent);
			ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
						 BTRFS_I(new_dir), parent,
						 false, &ctx_dest);
			if (ret == BTRFS_NEED_LOG_SYNC)
				sync_log_dest = true;
			else if (ret == BTRFS_NEED_TRANS_COMMIT)
				commit_transaction = true;
			ret = 0;
		}
		btrfs_end_log_trans(dest);
		dest_log_pinned = false;
	}
@@ -9583,8 +9594,26 @@ static int btrfs_rename_exchange(struct inode *old_dir,
			dest_log_pinned = false;
		}
	}
	if (!ret && sync_log_root && !commit_transaction) {
		ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
				     &ctx_root);
		if (ret)
			commit_transaction = true;
	}
	if (!ret && sync_log_dest && !commit_transaction) {
		ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
				     &ctx_dest);
		if (ret)
			commit_transaction = true;
	}
	if (commit_transaction) {
		ret = btrfs_commit_transaction(trans);
	} else {
		int ret2;

		ret2 = btrfs_end_transaction(trans);
		ret = ret ? ret : ret2;
	}
out_notrans:
	if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
		up_read(&fs_info->subvol_sem);
@@ -9661,6 +9690,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
	int ret;
	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
	bool log_pinned = false;
	struct btrfs_log_ctx ctx;
	bool sync_log = false;
	bool commit_transaction = false;

	if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
		return -EPERM;
@@ -9818,8 +9850,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
	if (log_pinned) {
		struct dentry *parent = new_dentry->d_parent;

		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
				parent);
		btrfs_init_log_ctx(&ctx, old_inode);
		ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
					 BTRFS_I(old_dir), parent,
					 false, &ctx);
		if (ret == BTRFS_NEED_LOG_SYNC)
			sync_log = true;
		else if (ret == BTRFS_NEED_TRANS_COMMIT)
			commit_transaction = true;
		ret = 0;
		btrfs_end_log_trans(root);
		log_pinned = false;
	}
@@ -9856,7 +9895,19 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
		btrfs_end_log_trans(root);
		log_pinned = false;
	}
	btrfs_end_transaction(trans);
	if (!ret && sync_log) {
		ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
		if (ret)
			commit_transaction = true;
	}
	if (commit_transaction) {
		ret = btrfs_commit_transaction(trans);
	} else {
		int ret2;

		ret2 = btrfs_end_transaction(trans);
		ret = ret ? ret : ret2;
	}
out_notrans:
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
		up_read(&fs_info->subvol_sem);
+35 −0
Original line number Diff line number Diff line
@@ -747,6 +747,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
	struct btrfs_pending_snapshot *pending_snapshot;
	struct btrfs_trans_handle *trans;
	int ret;
	bool snapshot_force_cow = false;

	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
		return -EINVAL;
@@ -763,6 +764,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
		goto free_pending;
	}

	/*
	 * Force new buffered writes to reserve space even when NOCOW is
	 * possible. This is to avoid later writeback (running dealloc) to
	 * fallback to COW mode and unexpectedly fail with ENOSPC.
	 */
	atomic_inc(&root->will_be_snapshotted);
	smp_mb__after_atomic();
	/* wait for no snapshot writes */
@@ -773,6 +779,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
	if (ret)
		goto dec_and_free;

	/*
	 * All previous writes have started writeback in NOCOW mode, so now
	 * we force future writes to fallback to COW mode during snapshot
	 * creation.
	 */
	atomic_inc(&root->snapshot_force_cow);
	snapshot_force_cow = true;

	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);

	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
@@ -837,6 +851,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
fail:
	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
	if (snapshot_force_cow)
		atomic_dec(&root->snapshot_force_cow);
	if (atomic_dec_and_test(&root->will_be_snapshotted))
		wake_up_var(&root->will_be_snapshotted);
free_pending:
@@ -3453,6 +3469,25 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,

		same_lock_start = min_t(u64, loff, dst_loff);
		same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
	} else {
		/*
		 * If the source and destination inodes are different, the
		 * source's range end offset matches the source's i_size, that
		 * i_size is not a multiple of the sector size, and the
		 * destination range does not go past the destination's i_size,
		 * we must round down the length to the nearest sector size
		 * multiple. If we don't do this adjustment we end replacing
		 * with zeroes the bytes in the range that starts at the
		 * deduplication range's end offset and ends at the next sector
		 * size multiple.
		 */
		if (loff + olen == i_size_read(src) &&
		    dst_loff + len < i_size_read(dst)) {
			const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;

			len = round_down(i_size_read(src), sz) - loff;
			olen = len;
		}
	}

again:
Loading