Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 24bbcf04 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason
Browse files

Btrfs: Add delayed iput



iput() can trigger new transactions if we are dropping the
final reference, so calling it in btrfs_commit_transaction
may end up deadlock. This patch adds delayed iput to avoid
the issue.

Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent f34f57a3
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -872,6 +872,9 @@ struct btrfs_fs_info {
	struct list_head dead_roots;
	struct list_head caching_block_groups;

	spinlock_t delayed_iput_lock;
	struct list_head delayed_iputs;

	atomic_t nr_async_submits;
	atomic_t async_submit_draining;
	atomic_t nr_async_bios;
@@ -2301,7 +2304,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
			       struct inode *inode, u64 new_size,
			       u32 min_type);

int btrfs_start_delalloc_inodes(struct btrfs_root *root);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping,
		     struct writeback_control *wbc);
@@ -2341,6 +2344,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root);
extern const struct dentry_operations btrfs_dentry_operations;

/* ioctl.c */
+4 −0
Original line number Diff line number Diff line
@@ -1476,6 +1476,7 @@ static int cleaner_kthread(void *arg)

		if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
		    mutex_trylock(&root->fs_info->cleaner_mutex)) {
			btrfs_run_delayed_iputs(root);
			btrfs_clean_old_snapshots(root);
			mutex_unlock(&root->fs_info->cleaner_mutex);
		}
@@ -1605,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
	INIT_LIST_HEAD(&fs_info->trans_list);
	INIT_LIST_HEAD(&fs_info->dead_roots);
	INIT_LIST_HEAD(&fs_info->delayed_iputs);
	INIT_LIST_HEAD(&fs_info->hashers);
	INIT_LIST_HEAD(&fs_info->delalloc_inodes);
	INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1613,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	spin_lock_init(&fs_info->new_trans_lock);
	spin_lock_init(&fs_info->ref_cache_lock);
	spin_lock_init(&fs_info->fs_roots_radix_lock);
	spin_lock_init(&fs_info->delayed_iput_lock);

	init_completion(&fs_info->kobj_unregister);
	fs_info->tree_root = tree_root;
@@ -2386,6 +2389,7 @@ int btrfs_commit_super(struct btrfs_root *root)
	int ret;

	mutex_lock(&root->fs_info->cleaner_mutex);
	btrfs_run_delayed_iputs(root);
	btrfs_clean_old_snapshots(root);
	mutex_unlock(&root->fs_info->cleaner_mutex);

+4 −4
Original line number Diff line number Diff line
@@ -2880,9 +2880,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
	root = async->root;
	info = async->info;

	btrfs_start_delalloc_inodes(root);
	btrfs_start_delalloc_inodes(root, 0);
	wake_up(&info->flush_wait);
	btrfs_wait_ordered_extents(root, 0);
	btrfs_wait_ordered_extents(root, 0, 0);

	spin_lock(&info->lock);
	info->flushing = 0;
@@ -2956,8 +2956,8 @@ static void flush_delalloc(struct btrfs_root *root,
	return;

flush:
	btrfs_start_delalloc_inodes(root);
	btrfs_wait_ordered_extents(root, 0);
	btrfs_start_delalloc_inodes(root, 0);
	btrfs_wait_ordered_extents(root, 0, 0);

	spin_lock(&info->lock);
	info->flushing = 0;
+53 −2
Original line number Diff line number Diff line
@@ -2022,6 +2022,54 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
	return -EIO;
}

struct delayed_iput {
	struct list_head list;
	struct inode *inode;
};

void btrfs_add_delayed_iput(struct inode *inode)
{
	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
	struct delayed_iput *delayed;

	if (atomic_add_unless(&inode->i_count, -1, 1))
		return;

	delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
	delayed->inode = inode;

	spin_lock(&fs_info->delayed_iput_lock);
	list_add_tail(&delayed->list, &fs_info->delayed_iputs);
	spin_unlock(&fs_info->delayed_iput_lock);
}

void btrfs_run_delayed_iputs(struct btrfs_root *root)
{
	LIST_HEAD(list);
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct delayed_iput *delayed;
	int empty;

	spin_lock(&fs_info->delayed_iput_lock);
	empty = list_empty(&fs_info->delayed_iputs);
	spin_unlock(&fs_info->delayed_iput_lock);
	if (empty)
		return;

	down_read(&root->fs_info->cleanup_work_sem);
	spin_lock(&fs_info->delayed_iput_lock);
	list_splice_init(&fs_info->delayed_iputs, &list);
	spin_unlock(&fs_info->delayed_iput_lock);

	while (!list_empty(&list)) {
		delayed = list_entry(list.next, struct delayed_iput, list);
		list_del(&delayed->list);
		iput(delayed->inode);
		kfree(delayed);
	}
	up_read(&root->fs_info->cleanup_work_sem);
}

/*
 * This creates an orphan entry for the given inode in case something goes
 * wrong in the middle of an unlink/truncate.
@@ -5568,7 +5616,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 * some fairly slow code that needs optimization. This walks the list
 * of all the inodes with pending delalloc and forces them to disk.
 */
int btrfs_start_delalloc_inodes(struct btrfs_root *root)
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
	struct list_head *head = &root->fs_info->delalloc_inodes;
	struct btrfs_inode *binode;
@@ -5587,6 +5635,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
		spin_unlock(&root->fs_info->delalloc_lock);
		if (inode) {
			filemap_flush(inode->i_mapping);
			if (delay_iput)
				btrfs_add_delayed_iput(inode);
			else
				iput(inode);
		}
		cond_resched();
+7 −3
Original line number Diff line number Diff line
@@ -352,7 +352,8 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 * wait for all the ordered extents in a root.  This is done when balancing
 * space between drives.
 */
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
int btrfs_wait_ordered_extents(struct btrfs_root *root,
			       int nocow_only, int delay_iput)
{
	struct list_head splice;
	struct list_head *cur;
@@ -389,6 +390,9 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
		if (inode) {
			btrfs_start_ordered_extent(inode, ordered, 1);
			btrfs_put_ordered_extent(ordered);
			if (delay_iput)
				btrfs_add_delayed_iput(inode);
			else
				iput(inode);
		} else {
			btrfs_put_ordered_extent(ordered);
@@ -447,7 +451,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
				btrfs_wait_ordered_range(inode, 0, (u64)-1);
			else
				filemap_flush(inode->i_mapping);
			iput(inode);
			btrfs_add_delayed_iput(inode);
		}

		cond_resched();
Loading