Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5da9d01b authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason
Browse files

Btrfs: Shrink delay allocated space in a synchronized



Shrink delayed allocation space in a synchronized manner is more
controllable than flushing all delay allocated space in an async
thread.

Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 424499db
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -700,10 +700,6 @@ struct btrfs_space_info {

	struct list_head list;

	/* for controlling how we free up space for allocations */
	wait_queue_head_t flush_wait;
	int flushing;

	/* for block groups in our same type */
	struct list_head block_groups[BTRFS_NR_RAID_TYPES];
	spinlock_t lock;
@@ -928,7 +924,6 @@ struct btrfs_fs_info {
	struct btrfs_workers endio_meta_write_workers;
	struct btrfs_workers endio_write_workers;
	struct btrfs_workers submit_workers;
	struct btrfs_workers enospc_workers;
	/*
	 * fixup workers take dirty pages that didn't properly go through
	 * the cow mechanism and make them safe to write.  It happens
@@ -2312,6 +2307,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
			       u32 min_type);

int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
			      struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
+0 −6
Original line number Diff line number Diff line
@@ -1759,9 +1759,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
			   min_t(u64, fs_devices->num_devices,
			   fs_info->thread_pool_size),
			   &fs_info->generic_worker);
	btrfs_init_workers(&fs_info->enospc_workers, "enospc",
			   fs_info->thread_pool_size,
			   &fs_info->generic_worker);

	/* a higher idle thresh on the submit workers makes it much more
	 * likely that bios will be send down in a sane order to the
@@ -1809,7 +1806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	btrfs_start_workers(&fs_info->endio_meta_workers, 1);
	btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
	btrfs_start_workers(&fs_info->endio_write_workers, 1);
	btrfs_start_workers(&fs_info->enospc_workers, 1);

	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2040,7 +2036,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
	btrfs_stop_workers(&fs_info->endio_write_workers);
	btrfs_stop_workers(&fs_info->submit_workers);
	btrfs_stop_workers(&fs_info->enospc_workers);
fail_iput:
	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
	iput(fs_info->btree_inode);
@@ -2473,7 +2468,6 @@ int close_ctree(struct btrfs_root *root)
	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
	btrfs_stop_workers(&fs_info->endio_write_workers);
	btrfs_stop_workers(&fs_info->submit_workers);
	btrfs_stop_workers(&fs_info->enospc_workers);

	btrfs_close_devices(fs_info->fs_devices);
	btrfs_mapping_tree_free(&fs_info->mapping_tree);
+55 −110
Original line number Diff line number Diff line
@@ -74,6 +74,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
				struct btrfs_root *root,
				struct btrfs_space_info *sinfo, u64 num_bytes);
static int shrink_delalloc(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root,
			   struct btrfs_space_info *sinfo, u64 to_reclaim);

static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2693,7 +2696,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
		INIT_LIST_HEAD(&found->block_groups[i]);
	init_rwsem(&found->groups_sem);
	init_waitqueue_head(&found->flush_wait);
	spin_lock_init(&found->lock);
	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
				BTRFS_BLOCK_GROUP_SYSTEM |
@@ -2907,105 +2909,6 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
		meta_sinfo->force_delalloc = 0;
}

struct async_flush {
	struct btrfs_root *root;
	struct btrfs_space_info *info;
	struct btrfs_work work;
};

static noinline void flush_delalloc_async(struct btrfs_work *work)
{
	struct async_flush *async;
	struct btrfs_root *root;
	struct btrfs_space_info *info;

	async = container_of(work, struct async_flush, work);
	root = async->root;
	info = async->info;

	btrfs_start_delalloc_inodes(root, 0);
	wake_up(&info->flush_wait);
	btrfs_wait_ordered_extents(root, 0, 0);

	spin_lock(&info->lock);
	info->flushing = 0;
	spin_unlock(&info->lock);
	wake_up(&info->flush_wait);

	kfree(async);
}

static void wait_on_flush(struct btrfs_space_info *info)
{
	DEFINE_WAIT(wait);
	u64 used;

	while (1) {
		prepare_to_wait(&info->flush_wait, &wait,
				TASK_UNINTERRUPTIBLE);
		spin_lock(&info->lock);
		if (!info->flushing) {
			spin_unlock(&info->lock);
			break;
		}

		used = info->bytes_used + info->bytes_reserved +
			info->bytes_pinned + info->bytes_readonly +
			info->bytes_super + info->bytes_root +
			info->bytes_may_use + info->bytes_delalloc;
		if (used < info->total_bytes) {
			spin_unlock(&info->lock);
			break;
		}
		spin_unlock(&info->lock);
		schedule();
	}
	finish_wait(&info->flush_wait, &wait);
}

static void flush_delalloc(struct btrfs_root *root,
				 struct btrfs_space_info *info)
{
	struct async_flush *async;
	bool wait = false;

	spin_lock(&info->lock);

	if (!info->flushing)
		info->flushing = 1;
	else
		wait = true;

	spin_unlock(&info->lock);

	if (wait) {
		wait_on_flush(info);
		return;
	}

	async = kzalloc(sizeof(*async), GFP_NOFS);
	if (!async)
		goto flush;

	async->root = root;
	async->info = info;
	async->work.func = flush_delalloc_async;

	btrfs_queue_worker(&root->fs_info->enospc_workers,
			   &async->work);
	wait_on_flush(info);
	return;

flush:
	btrfs_start_delalloc_inodes(root, 0);
	btrfs_wait_ordered_extents(root, 0, 0);

	spin_lock(&info->lock);
	info->flushing = 0;
	spin_unlock(&info->lock);
	wake_up(&info->flush_wait);
}

/*
 * Reserve metadata space for delalloc.
 */
@@ -3058,7 +2961,7 @@ int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
			filemap_flush(inode->i_mapping);
			goto again;
		} else if (flushed == 3) {
			flush_delalloc(root, meta_sinfo);
			shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
			goto again;
		}
		spin_lock(&meta_sinfo->lock);
@@ -3171,7 +3074,7 @@ int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
		}

		if (retries == 2) {
			flush_delalloc(root, meta_sinfo);
			shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
			goto again;
		}
		spin_lock(&meta_sinfo->lock);
@@ -3197,7 +3100,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
{
	struct btrfs_space_info *data_sinfo;
	u64 used;
	int ret = 0, committed = 0, flushed = 0;
	int ret = 0, committed = 0;

	/* make sure bytes are sectorsize aligned */
	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3217,13 +3120,6 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
	if (used + bytes > data_sinfo->total_bytes) {
		struct btrfs_trans_handle *trans;

		if (!flushed) {
			spin_unlock(&data_sinfo->lock);
			flush_delalloc(root, data_sinfo);
			flushed = 1;
			goto again;
		}

		/*
		 * if we don't have enough free bytes in this space then we need
		 * to alloc a new chunk.
@@ -3467,6 +3363,55 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
	return ret == 1 ? 1 : 0;
}

/*
 * shrink metadata reservation for delalloc
 */
static int shrink_delalloc(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root,
			   struct btrfs_space_info *sinfo, u64 to_reclaim)
{
	u64 reserved;
	u64 max_reclaim;
	u64 reclaimed = 0;
	int pause = 1;
	int ret;

	spin_lock(&sinfo->lock);
	reserved = sinfo->bytes_delalloc;
	spin_unlock(&sinfo->lock);

	if (reserved == 0)
		return 0;

	max_reclaim = min(reserved, to_reclaim);

	while (1) {
		ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
		if (!ret) {
			__set_current_state(TASK_INTERRUPTIBLE);
			schedule_timeout(pause);
			pause <<= 1;
			if (pause > HZ / 10)
				pause = HZ / 10;
		} else {
			pause = 1;
		}

		spin_lock(&sinfo->lock);
		if (reserved > sinfo->bytes_delalloc)
			reclaimed = reserved - sinfo->bytes_delalloc;
		reserved = sinfo->bytes_delalloc;
		spin_unlock(&sinfo->lock);

		if (reserved == 0 || reclaimed >= max_reclaim)
			break;

		if (trans && trans->transaction->blocked)
			return -EAGAIN;
	}
	return reclaimed >= to_reclaim;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      u64 bytenr, u64 num_bytes, int alloc,
+32 −0
Original line number Diff line number Diff line
@@ -5611,6 +5611,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
	return 0;
}

int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
{
	struct btrfs_inode *binode;
	struct inode *inode = NULL;

	spin_lock(&root->fs_info->delalloc_lock);
	while (!list_empty(&root->fs_info->delalloc_inodes)) {
		binode = list_entry(root->fs_info->delalloc_inodes.next,
				    struct btrfs_inode, delalloc_inodes);
		inode = igrab(&binode->vfs_inode);
		if (inode) {
			list_move_tail(&binode->delalloc_inodes,
				       &root->fs_info->delalloc_inodes);
			break;
		}

		list_del_init(&binode->delalloc_inodes);
		cond_resched_lock(&root->fs_info->delalloc_lock);
	}
	spin_unlock(&root->fs_info->delalloc_lock);

	if (inode) {
		write_inode_now(inode, 0);
		if (delay_iput)
			btrfs_add_delayed_iput(inode);
		else
			iput(inode);
		return 1;
	}
	return 0;
}

static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
			 const char *symname)
{