Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 70499656 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "User visible features:

   - added support for the ioctl FS_IOC_FSGETXATTR, per-inode flags,
     successor of GET/SETFLAGS; now supports only existing flags:
     append, immutable, noatime, nodump, sync

   - 3 new unprivileged ioctls to allow users to enumerate subvolumes

   - dedupe syscall implementation does not restrict the range to 16MiB,
     though it still splits the whole range to 16MiB chunks

   - on user demand, rmdir() is able to delete an empty subvolume,
     export the capability in sysfs

   - fix inode number types in tracepoints, other cleanups

   - send: improved speed when dealing with a large removed directory,
     measurements show decrease from 2000 minutes to 2 minutes on a
     directory with 2 million entries

   - pre-commit check of superblock to detect a mysterious in-memory
     corruption

   - log message updates

  Other changes:

   - orphan inode cleanup improved, does no keep long-standing
     reservations that could lead up to early ENOSPC in some cases

   - slight improvement of handling snapshotted NOCOW files by avoiding
     some unnecessary tree searches

   - avoid OOM when dealing with many unmergeable small extents at flush
     time

   - speedup conversion of free space tree representations from/to
     bitmap/tree

   - code refactoring, deletion, cleanups:
      + delayed refs
      + delayed iput
      + redundant argument removals
      + memory barrier cleanups
      + remove a redundant mutex supposedly excluding several ioctls to
        run in parallel

   - new tracepoints for blockgroup manipulation

   - more sanity checks of compressed headers"

* tag 'for-4.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (183 commits)
  btrfs: Add unprivileged version of ino_lookup ioctl
  btrfs: Add unprivileged ioctl which returns subvolume's ROOT_REF
  btrfs: Add unprivileged ioctl which returns subvolume information
  Btrfs: clean up error handling in btrfs_truncate()
  btrfs: Factor out write portion of btrfs_get_blocks_direct
  btrfs: Factor out read portion of btrfs_get_blocks_direct
  btrfs: return ENOMEM if path allocation fails in btrfs_cross_ref_exist
  btrfs: raid56: Remove VLA usage
  btrfs: return error value if create_io_em failed in cow_file_range
  btrfs: drop useless member qgroup_reserved of btrfs_pending_snapshot
  btrfs: drop unused parameter qgroup_reserved
  btrfs: balance dirty metadata pages in btrfs_finish_ordered_io
  btrfs: lift some btrfs_cross_ref_exist checks in nocow path
  btrfs: Remove fs_info argument from btrfs_uuid_tree_rem
  btrfs: Remove fs_info argument from btrfs_uuid_tree_add
  Btrfs: remove unused check of skip_locking
  Btrfs: remove always true check in unlock_up
  Btrfs: grab write lock directly if write_lock_level is the max level
  Btrfs: move get root out of btrfs_search_slot to a helper
  Btrfs: use more straightforward extent_buffer_uptodate check
  ...
parents e3a44fd7 23d0b79d
Loading
Loading
Loading
Loading
+11 −11
Original line number Diff line number Diff line
@@ -19,17 +19,17 @@
 * ordered operations list so that we make sure to flush out any
 * new data the application may have written before commit.
 */
#define BTRFS_INODE_ORDERED_DATA_CLOSE		0
#define BTRFS_INODE_ORPHAN_META_RESERVED	1
#define BTRFS_INODE_DUMMY			2
#define BTRFS_INODE_IN_DEFRAG			3
#define BTRFS_INODE_HAS_ORPHAN_ITEM		4
#define BTRFS_INODE_HAS_ASYNC_EXTENT		5
#define BTRFS_INODE_NEEDS_FULL_SYNC		6
#define BTRFS_INODE_COPY_EVERYTHING		7
#define BTRFS_INODE_IN_DELALLOC_LIST		8
#define BTRFS_INODE_READDIO_NEED_LOCK		9
#define BTRFS_INODE_HAS_PROPS		        10
enum {
	BTRFS_INODE_ORDERED_DATA_CLOSE = 0,
	BTRFS_INODE_DUMMY,
	BTRFS_INODE_IN_DEFRAG,
	BTRFS_INODE_HAS_ASYNC_EXTENT,
	BTRFS_INODE_NEEDS_FULL_SYNC,
	BTRFS_INODE_COPY_EVERYTHING,
	BTRFS_INODE_IN_DELALLOC_LIST,
	BTRFS_INODE_READDIO_NEED_LOCK,
	BTRFS_INODE_HAS_PROPS,
};

/* in memory btrfs inode */
struct btrfs_inode {
+1 −6
Original line number Diff line number Diff line
@@ -990,12 +990,7 @@ static void __free_workspace(int type, struct list_head *workspace,
		btrfs_compress_op[idx]->free_workspace(workspace);
	atomic_dec(total_ws);
wake:
	/*
	 * Make sure counter is updated before we wake up waiters.
	 */
	smp_mb();
	if (waitqueue_active(ws_wait))
		wake_up(ws_wait);
	cond_wake_up(ws_wait);
}

static void free_workspace(int type, struct list_head *ws)
+2 −0
Original line number Diff line number Diff line
@@ -6,6 +6,8 @@
#ifndef BTRFS_COMPRESSION_H
#define BTRFS_COMPRESSION_H

#include <linux/sizes.h>

/*
 * We want to make sure that amount of RAM required to uncompress an extent is
 * reasonable, so we limit the total size in ram of a compressed extent to
+75 −48
Original line number Diff line number Diff line
@@ -2330,7 +2330,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
			no_skips = 1;

		t = path->nodes[i];
		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
		if (i >= lowest_unlock && i > skip_level) {
			btrfs_tree_unlock_rw(t, path->locks[i]);
			path->locks[i] = 0;
			if (write_lock_level &&
@@ -2432,7 +2432,6 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
	btrfs_unlock_up_safe(p, level + 1);
	btrfs_set_path_blocking(p);

	free_extent_buffer(tmp);
	if (p->reada != READA_NONE)
		reada_for_search(fs_info, p, level, slot, key->objectid);

@@ -2446,7 +2445,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
		 * and give up so that our caller doesn't loop forever
		 * on our EAGAINs.
		 */
		if (!btrfs_buffer_uptodate(tmp, 0, 0))
		if (!extent_buffer_uptodate(tmp))
			ret = -EIO;
		free_extent_buffer(tmp);
	} else {
@@ -2599,6 +2598,78 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
	return 0;
}

static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
							struct btrfs_path *p,
							int write_lock_level)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct extent_buffer *b;
	int root_lock;
	int level = 0;

	/* We try very hard to do read locks on the root */
	root_lock = BTRFS_READ_LOCK;

	if (p->search_commit_root) {
		/* The commit roots are read only so we always do read locks */
		if (p->need_commit_sem)
			down_read(&fs_info->commit_root_sem);
		b = root->commit_root;
		extent_buffer_get(b);
		level = btrfs_header_level(b);
		if (p->need_commit_sem)
			up_read(&fs_info->commit_root_sem);
		/*
		 * Ensure that all callers have set skip_locking when
		 * p->search_commit_root = 1.
		 */
		ASSERT(p->skip_locking == 1);

		goto out;
	}

	if (p->skip_locking) {
		b = btrfs_root_node(root);
		level = btrfs_header_level(b);
		goto out;
	}

	/*
	 * If the level is set to maximum, we can skip trying to get the read
	 * lock.
	 */
	if (write_lock_level < BTRFS_MAX_LEVEL) {
		/*
		 * We don't know the level of the root node until we actually
		 * have it read locked
		 */
		b = btrfs_read_lock_root_node(root);
		level = btrfs_header_level(b);
		if (level > write_lock_level)
			goto out;

		/* Whoops, must trade for write lock */
		btrfs_tree_read_unlock(b);
		free_extent_buffer(b);
	}

	b = btrfs_lock_root_node(root);
	root_lock = BTRFS_WRITE_LOCK;

	/* The level might have changed, check again */
	level = btrfs_header_level(b);

out:
	p->nodes[level] = b;
	if (!p->skip_locking)
		p->locks[level] = root_lock;
	/*
	 * Callers are responsible for dropping b's references.
	 */
	return b;
}


/*
 * btrfs_search_slot - look for a key in a tree and perform necessary
 * modifications to preserve tree invariants.
@@ -2635,7 +2706,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
	int err;
	int level;
	int lowest_unlock = 1;
	int root_lock;
	/* everything at write_lock_level or lower must be write locked */
	int write_lock_level = 0;
	u8 lowest_level = 0;
@@ -2673,50 +2743,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,

again:
	prev_cmp = -1;
	/*
	 * we try very hard to do read locks on the root
	 */
	root_lock = BTRFS_READ_LOCK;
	level = 0;
	if (p->search_commit_root) {
		/*
		 * the commit roots are read only
		 * so we always do read locks
		 */
		if (p->need_commit_sem)
			down_read(&fs_info->commit_root_sem);
		b = root->commit_root;
		extent_buffer_get(b);
		level = btrfs_header_level(b);
		if (p->need_commit_sem)
			up_read(&fs_info->commit_root_sem);
		if (!p->skip_locking)
			btrfs_tree_read_lock(b);
	} else {
		if (p->skip_locking) {
			b = btrfs_root_node(root);
			level = btrfs_header_level(b);
		} else {
			/* we don't know the level of the root node
			 * until we actually have it read locked
			 */
			b = btrfs_read_lock_root_node(root);
			level = btrfs_header_level(b);
			if (level <= write_lock_level) {
				/* whoops, must trade for write lock */
				btrfs_tree_read_unlock(b);
				free_extent_buffer(b);
				b = btrfs_lock_root_node(root);
				root_lock = BTRFS_WRITE_LOCK;

				/* the level might have changed, check again */
				level = btrfs_header_level(b);
			}
		}
	}
	p->nodes[level] = b;
	if (!p->skip_locking)
		p->locks[level] = root_lock;
	b = btrfs_search_slot_get_root(root, p, write_lock_level);

	while (b) {
		level = btrfs_header_level(b);
+37 −39
Original line number Diff line number Diff line
@@ -739,6 +739,12 @@ struct btrfs_delayed_root;
 */
#define BTRFS_FS_NEED_ASYNC_COMMIT		17

/*
 * Indicate that balance has been set up from the ioctl and is in the main
 * phase. The fs_info::balance_ctl is initialized.
 */
#define BTRFS_FS_BALANCE_RUNNING		18

struct btrfs_fs_info {
	u8 fsid[BTRFS_FSID_SIZE];
	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -838,7 +844,6 @@ struct btrfs_fs_info {
	struct mutex transaction_kthread_mutex;
	struct mutex cleaner_mutex;
	struct mutex chunk_mutex;
	struct mutex volume_mutex;

	/*
	 * this is taken to make sure we don't set block groups ro after
@@ -1004,7 +1009,6 @@ struct btrfs_fs_info {
	/* restriper state */
	spinlock_t balance_lock;
	struct mutex balance_mutex;
	atomic_t balance_running;
	atomic_t balance_pause_req;
	atomic_t balance_cancel_req;
	struct btrfs_balance_control *balance_ctl;
@@ -1219,9 +1223,6 @@ struct btrfs_root {
	spinlock_t log_extents_lock[2];
	struct list_head logged_list[2];

	spinlock_t orphan_lock;
	atomic_t orphan_inodes;
	struct btrfs_block_rsv *orphan_block_rsv;
	int orphan_cleanup_state;

	spinlock_t inode_lock;
@@ -2764,13 +2765,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
					    u64 len);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
				  struct btrfs_inode *inode);
void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
				     struct btrfs_block_rsv *rsv,
				     int nitems,
				     u64 *qgroup_reserved, bool use_global_rsv);
				     int nitems, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
				      struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
@@ -2828,7 +2825,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
			struct btrfs_fs_info *fs_info, const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
		       struct btrfs_fs_info *info, u64 start, u64 end);
		       u64 start, u64 end);

/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3042,11 +3039,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root);

/* uuid-tree.c */
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
			struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
			u64 subid);
int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
			struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
			u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
			    int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
@@ -3163,18 +3158,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
				     struct extent_map *em);

/* inode.c */
struct btrfs_delalloc_work {
	struct inode *inode;
	int delay_iput;
	struct completion completion;
	struct list_head list;
	struct btrfs_work work;
};

struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
						    int delay_iput);
void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);

struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
		struct page *page, size_t pg_offset, u64 start,
		u64 len, int create);
@@ -3193,10 +3176,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int btrfs_add_link(struct btrfs_trans_handle *trans,
		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
		   const char *name, int name_len, int add_backref, u64 index);
int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct inode *dir, u64 objectid,
			const char *name, int name_len);
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
			int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
@@ -3204,9 +3184,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
			       struct inode *inode, u64 new_size,
			       u32 min_type);

int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
			       int nr);
int btrfs_start_delalloc_inodes(struct btrfs_root *root);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
			      unsigned int extra_bits,
			      struct extent_state **cached_state, int dedupe);
@@ -3240,10 +3219,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
		struct btrfs_inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
void btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -3262,14 +3238,14 @@ void btrfs_test_inode_set_ops(struct inode *inode);
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_update_iflags(struct inode *inode);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
		      struct btrfs_ioctl_defrag_range_args *range,
		      u64 newer_than, unsigned long max_pages);
void btrfs_get_block_group_info(struct list_head *groups_list,
				struct btrfs_ioctl_space_info *space);
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
			       struct btrfs_ioctl_balance_args *bargs);
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
			   struct file *dst_file, u64 dst_loff);
@@ -3767,4 +3743,26 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
	return 0;
}

static inline void cond_wake_up(struct wait_queue_head *wq)
{
	/*
	 * This implies a full smp_mb barrier, see comments for
	 * waitqueue_active why.
	 */
	if (wq_has_sleeper(wq))
		wake_up(wq);
}

static inline void cond_wake_up_nomb(struct wait_queue_head *wq)
{
	/*
	 * Special case for conditional wakeup where the barrier required for
	 * waitqueue_active is implied by some of the preceding code. Eg. one
	 * of such atomic operations (atomic_dec_and_return, ...), or a
	 * unlock/lock sequence, etc.
	 */
	if (waitqueue_active(wq))
		wake_up(wq);
}

#endif
Loading