Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 02dbfc99 authored by Omar Sandoval's avatar Omar Sandoval Committed by Chris Mason
Browse files

Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes



Commit fe742fd4 ("Revert "btrfs: switch to ->iterate_shared()"")
backed out the conversion to ->iterate_shared() for Btrfs because the
delayed inode handling in btrfs_real_readdir() is racy. However, we can
still do readdir in parallel if there are no delayed nodes.

This is a temporary fix which upgrades the shared inode lock to an
exclusive lock only when we have delayed items until we come up with a
more complete solution. While we're here, rename the
btrfs_{get,put}_delayed_items functions to make it very clear that
they're just for readdir.

Tested with xfstests and by doing a parallel kernel build:

	while make tinyconfig && make -j4 && git clean dqfx; do
		:
	done

along with a bunch of parallel finds in another shell:

	while true; do
		for ((i=0; i<4; i++)); do
			find . >/dev/null &
		done
		wait
	done

Signed-off-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 33688abb
Loading
Loading
Loading
Loading
+22 −5
Original line number Diff line number Diff line
@@ -1606,7 +1606,8 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
	return 0;
}

void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
bool btrfs_readdir_get_delayed_items(struct inode *inode,
				     struct list_head *ins_list,
				     struct list_head *del_list)
{
	struct btrfs_delayed_node *delayed_node;
@@ -1614,7 +1615,14 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,

	delayed_node = btrfs_get_delayed_node(inode);
	if (!delayed_node)
		return;
		return false;

	/*
	 * We can only do one readdir with delayed items at a time because of
	 * item->readdir_list.
	 */
	inode_unlock_shared(inode);
	inode_lock(inode);

	mutex_lock(&delayed_node->mutex);
	item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,9 +1649,12 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
	 * requeue or dequeue this delayed node.
	 */
	atomic_dec(&delayed_node->refs);

	return true;
}

void btrfs_put_delayed_items(struct list_head *ins_list,
void btrfs_readdir_put_delayed_items(struct inode *inode,
				     struct list_head *ins_list,
				     struct list_head *del_list)
{
	struct btrfs_delayed_item *curr, *next;
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
		if (atomic_dec_and_test(&curr->refs))
			kfree(curr);
	}

	/*
	 * The VFS is going to do up_read(), so we need to downgrade back to a
	 * read lock.
	 */
	downgrade_write(&inode->i_rwsem);
}

int btrfs_should_delete_dir_index(struct list_head *del_list,
+6 −4
Original line number Diff line number Diff line
@@ -137,9 +137,11 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
void btrfs_destroy_delayed_inodes(struct btrfs_root *root);

/* Used for readdir() */
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
bool btrfs_readdir_get_delayed_items(struct inode *inode,
				     struct list_head *ins_list,
				     struct list_head *del_list);
void btrfs_put_delayed_items(struct list_head *ins_list,
void btrfs_readdir_put_delayed_items(struct inode *inode,
				     struct list_head *ins_list,
				     struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list,
				  u64 index);
+6 −4
Original line number Diff line number Diff line
@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
	int name_len;
	int is_curr = 0;	/* ctx->pos points to the current index? */
	bool emitted;
	bool put = false;

	/* FIXME, use a real flag for deciding about the key type */
	if (root->fs_info->tree_root == root)
@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
	if (key_type == BTRFS_DIR_INDEX_KEY) {
		INIT_LIST_HEAD(&ins_list);
		INIT_LIST_HEAD(&del_list);
		btrfs_get_delayed_items(inode, &ins_list, &del_list);
		put = btrfs_readdir_get_delayed_items(inode, &ins_list,
						      &del_list);
	}

	key.type = key_type;
@@ -5921,8 +5923,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
nopos:
	ret = 0;
err:
	if (key_type == BTRFS_DIR_INDEX_KEY)
		btrfs_put_delayed_items(&ins_list, &del_list);
	if (put)
		btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
	btrfs_free_path(path);
	return ret;
}
@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
static const struct file_operations btrfs_dir_file_operations = {
	.llseek		= generic_file_llseek,
	.read		= generic_read_dir,
	.iterate	= btrfs_real_readdir,
	.iterate_shared	= btrfs_real_readdir,
	.unlocked_ioctl	= btrfs_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= btrfs_compat_ioctl,