Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f64b4d79 authored by Jaegeuk Kim's avatar Jaegeuk Kim
Browse files

f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc



The f2fs_gc() called by f2fs_balance_fs() requires to be called outside of
fi->i_gc_rwsem[WRITE], since f2fs_gc() can try to grab it in a loop.

If it hits the miximum retrials in GC, let's give a chance to release
gc_mutex for a short time in order not to go into live lock in the worst
case.

Reviewed-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 6d598450
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -2215,14 +2215,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
	loff_t i_size = i_size_read(inode);

	if (to > i_size) {
		down_write(&F2FS_I(inode)->i_mmap_sem);
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		down_write(&F2FS_I(inode)->i_mmap_sem);

		truncate_pagecache(inode, i_size);
		f2fs_truncate_blocks(inode, i_size, true);

		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		up_write(&F2FS_I(inode)->i_mmap_sem);
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	}
}

+1 −0
Original line number Diff line number Diff line
@@ -1242,6 +1242,7 @@ struct f2fs_sb_info {
	unsigned int gc_mode;			/* current GC state */
	/* for skip statistic */
	unsigned long long skipped_atomic_files[2];	/* FG_GC and BG_GC */
	unsigned long long skipped_gc_rwsem;		/* FG_GC only */

	/* threshold for gc trials on pinned files */
	u64 gc_pin_file_threshold;
+62 −57
Original line number Diff line number Diff line
@@ -800,8 +800,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
	if (attr->ia_valid & ATTR_SIZE) {
		bool to_smaller = (attr->ia_size <= i_size_read(inode));

		down_write(&F2FS_I(inode)->i_mmap_sem);
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		down_write(&F2FS_I(inode)->i_mmap_sem);

		truncate_setsize(inode, attr->ia_size);

@@ -811,8 +811,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
		 * do not trim all blocks after i_size if target size is
		 * larger than i_size.
		 */
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		up_write(&F2FS_I(inode)->i_mmap_sem);
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

		if (err)
			return err;
@@ -965,8 +965,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
			blk_start = (loff_t)pg_start << PAGE_SHIFT;
			blk_end = (loff_t)pg_end << PAGE_SHIFT;

			down_write(&F2FS_I(inode)->i_mmap_sem);
			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
			down_write(&F2FS_I(inode)->i_mmap_sem);

			truncate_inode_pages_range(mapping, blk_start,
					blk_end - 1);
@@ -975,8 +975,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
			ret = f2fs_truncate_hole(inode, pg_start, pg_end);
			f2fs_unlock_op(sbi);

			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
			up_write(&F2FS_I(inode)->i_mmap_sem);
			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		}
	}

@@ -1191,25 +1191,33 @@ static int __exchange_data_block(struct inode *src_inode,
	return ret;
}

static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
	pgoff_t start = offset >> PAGE_SHIFT;
	pgoff_t end = (offset + len) >> PAGE_SHIFT;
	int ret;

	f2fs_balance_fs(sbi, true);
	f2fs_lock_op(sbi);

	f2fs_drop_extent_tree(inode);
	/* avoid gc operation during block exchange */
	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	down_write(&F2FS_I(inode)->i_mmap_sem);

	f2fs_lock_op(sbi);
	f2fs_drop_extent_tree(inode);
	truncate_pagecache(inode, offset);
	ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
	f2fs_unlock_op(sbi);

	up_write(&F2FS_I(inode)->i_mmap_sem);
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	return ret;
}

static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
	pgoff_t pg_start, pg_end;
	loff_t new_size;
	int ret;

@@ -1224,25 +1232,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
	if (ret)
		return ret;

	pg_start = offset >> PAGE_SHIFT;
	pg_end = (offset + len) >> PAGE_SHIFT;

	/* avoid gc operation during block exchange */
	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

	down_write(&F2FS_I(inode)->i_mmap_sem);
	/* write out all dirty pages from offset */
	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
	if (ret)
		goto out_unlock;

	truncate_pagecache(inode, offset);
		return ret;

	ret = f2fs_do_collapse(inode, pg_start, pg_end);
	ret = f2fs_do_collapse(inode, offset, len);
	if (ret)
		goto out_unlock;
		return ret;

	/* write out all moved pages, if possible */
	down_write(&F2FS_I(inode)->i_mmap_sem);
	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
	truncate_pagecache(inode, offset);

@@ -1250,11 +1250,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
	truncate_pagecache(inode, new_size);

	ret = f2fs_truncate_blocks(inode, new_size, true);
	up_write(&F2FS_I(inode)->i_mmap_sem);
	if (!ret)
		f2fs_i_size_write(inode, new_size);
out_unlock:
	up_write(&F2FS_I(inode)->i_mmap_sem);
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	return ret;
}

@@ -1320,10 +1318,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
	if (ret)
		return ret;

	down_write(&F2FS_I(inode)->i_mmap_sem);
	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
	if (ret)
		goto out_sem;
		return ret;

	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1335,7 +1332,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
		ret = fill_zero(inode, pg_start, off_start,
						off_end - off_start);
		if (ret)
			goto out_sem;
			return ret;

		new_size = max_t(loff_t, new_size, offset + len);
	} else {
@@ -1343,7 +1340,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
			ret = fill_zero(inode, pg_start++, off_start,
						PAGE_SIZE - off_start);
			if (ret)
				goto out_sem;
				return ret;

			new_size = max_t(loff_t, new_size,
					(loff_t)pg_start << PAGE_SHIFT);
@@ -1355,6 +1352,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
			pgoff_t end;

			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
			down_write(&F2FS_I(inode)->i_mmap_sem);

			truncate_pagecache_range(inode,
				(loff_t)index << PAGE_SHIFT,
@@ -1366,6 +1364,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
			ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
			if (ret) {
				f2fs_unlock_op(sbi);
				up_write(&F2FS_I(inode)->i_mmap_sem);
				up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
				goto out;
			}
@@ -1377,6 +1376,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
			f2fs_put_dnode(&dn);

			f2fs_unlock_op(sbi);
			up_write(&F2FS_I(inode)->i_mmap_sem);
			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

			f2fs_balance_fs(sbi, dn.node_changed);
@@ -1405,9 +1405,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
		else
			f2fs_i_size_write(inode, new_size);
	}
out_sem:
	up_write(&F2FS_I(inode)->i_mmap_sem);

	return ret;
}

@@ -1436,26 +1433,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)

	f2fs_balance_fs(sbi, true);

	/* avoid gc operation during block exchange */
	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

	down_write(&F2FS_I(inode)->i_mmap_sem);
	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
	up_write(&F2FS_I(inode)->i_mmap_sem);
	if (ret)
		goto out;
		return ret;

	/* write out all dirty pages from offset */
	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
	if (ret)
		goto out;

	truncate_pagecache(inode, offset);
		return ret;

	pg_start = offset >> PAGE_SHIFT;
	pg_end = (offset + len) >> PAGE_SHIFT;
	delta = pg_end - pg_start;
	idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;

	/* avoid gc operation during block exchange */
	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	down_write(&F2FS_I(inode)->i_mmap_sem);
	truncate_pagecache(inode, offset);

	while (!ret && idx > pg_start) {
		nr = idx - pg_start;
		if (nr > delta)
@@ -1469,16 +1467,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
					idx + delta, nr, false);
		f2fs_unlock_op(sbi);
	}
	up_write(&F2FS_I(inode)->i_mmap_sem);
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

	/* write out all moved pages, if possible */
	down_write(&F2FS_I(inode)->i_mmap_sem);
	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
	truncate_pagecache(inode, offset);
	up_write(&F2FS_I(inode)->i_mmap_sem);

	if (!ret)
		f2fs_i_size_write(inode, new_size);
out:
	up_write(&F2FS_I(inode)->i_mmap_sem);
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	return ret;
}

@@ -1718,8 +1717,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)

	inode_lock(inode);

	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

	if (f2fs_is_atomic_file(inode)) {
		if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
			ret = -EINVAL;
@@ -1730,6 +1727,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
	if (ret)
		goto out;

	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

	if (!get_dirty_pages(inode))
		goto skip_flush;

@@ -1737,18 +1736,20 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
		"Unexpected flush for atomic writes: ino=%lu, npages=%u",
					inode->i_ino, get_dirty_pages(inode));
	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
	if (ret)
	if (ret) {
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		goto out;
	}
skip_flush:
	set_inode_flag(inode, FI_ATOMIC_FILE);
	clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	F2FS_I(inode)->inmem_task = current;
	stat_inc_atomic_write(inode);
	stat_update_max_atomic_write(inode);
out:
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	inode_unlock(inode);
	mnt_drop_write_file(filp);
	return ret;
@@ -1766,9 +1767,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
	if (ret)
		return ret;

	inode_lock(inode);
	f2fs_balance_fs(F2FS_I_SB(inode), true);

	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	inode_lock(inode);

	if (f2fs_is_volatile_file(inode)) {
		ret = -EINVAL;
@@ -1794,7 +1795,6 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
		clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
		ret = -EINVAL;
	}
	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	inode_unlock(inode);
	mnt_drop_write_file(filp);
	return ret;
@@ -2390,15 +2390,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
	}

	inode_lock(src);
	down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
	if (src != dst) {
		ret = -EBUSY;
		if (!inode_trylock(dst))
			goto out;
		if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
			inode_unlock(dst);
			goto out;
		}
	}

	ret = -EINVAL;
@@ -2443,6 +2438,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
		goto out_unlock;

	f2fs_balance_fs(sbi, true);

	down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
	if (src != dst) {
		ret = -EBUSY;
		if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
			goto out_src;
	}

	f2fs_lock_op(sbi);
	ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
				pos_out >> F2FS_BLKSIZE_BITS,
@@ -2455,13 +2458,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
			f2fs_i_size_write(dst, dst_osize);
	}
	f2fs_unlock_op(sbi);
out_unlock:
	if (src != dst) {

	if (src != dst)
		up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
out_src:
	up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
out_unlock:
	if (src != dst)
		inode_unlock(dst);
	}
out:
	up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
	inode_unlock(src);
	return ret;
}
+20 −6
Original line number Diff line number Diff line
@@ -882,6 +882,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
			if (!down_write_trylock(
				&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
				iput(inode);
				sbi->skipped_gc_rwsem++;
				continue;
			}

@@ -911,6 +912,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
					continue;
				if (!down_write_trylock(
						&fi->i_gc_rwsem[WRITE])) {
					sbi->skipped_gc_rwsem++;
					up_write(&fi->i_gc_rwsem[READ]);
					continue;
				}
@@ -1048,6 +1050,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
		.iroot = RADIX_TREE_INIT(GFP_NOFS),
	};
	unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
	unsigned long long first_skipped;
	unsigned int skipped_round = 0, round = 0;

	trace_f2fs_gc_begin(sbi->sb, sync, background,
@@ -1060,6 +1063,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
				prefree_segments(sbi));

	cpc.reason = __get_cp_reason(sbi);
	sbi->skipped_gc_rwsem = 0;
	first_skipped = last_skipped;
gc_more:
	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
		ret = -EINVAL;
@@ -1101,7 +1106,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
	total_freed += seg_freed;

	if (gc_type == FG_GC) {
		if (sbi->skipped_atomic_files[FG_GC] > last_skipped)
		if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
						sbi->skipped_gc_rwsem)
			skipped_round++;
		last_skipped = sbi->skipped_atomic_files[FG_GC];
		round++;
@@ -1110,15 +1116,23 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
	if (gc_type == FG_GC)
		sbi->cur_victim_sec = NULL_SEGNO;

	if (!sync) {
	if (sync)
		goto stop;

	if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
			if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
				skipped_round * 2 >= round)
				f2fs_drop_inmem_pages_all(sbi, true);
		if (skipped_round <= MAX_SKIP_GC_COUNT ||
					skipped_round * 2 < round) {
			segno = NULL_SEGNO;
			goto gc_more;
		}

		if (first_skipped < last_skipped &&
				(last_skipped - first_skipped) >
						sbi->skipped_gc_rwsem) {
			f2fs_drop_inmem_pages_all(sbi, true);
			segno = NULL_SEGNO;
			goto gc_more;
		}
		if (gc_type == FG_GC)
			ret = f2fs_write_checkpoint(sbi, &cpc);
	}
+5 −1
Original line number Diff line number Diff line
@@ -445,8 +445,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
	int err;

	f2fs_balance_fs(sbi, true);
	f2fs_lock_op(sbi);

	down_write(&fi->i_gc_rwsem[WRITE]);

	f2fs_lock_op(sbi);
	set_inode_flag(inode, FI_ATOMIC_COMMIT);

	mutex_lock(&fi->inmem_lock);
@@ -461,6 +463,8 @@ int f2fs_commit_inmem_pages(struct inode *inode)
	clear_inode_flag(inode, FI_ATOMIC_COMMIT);

	f2fs_unlock_op(sbi);
	up_write(&fi->i_gc_rwsem[WRITE]);

	return err;
}

Loading