Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fe6f0ed0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've tuned f2fs to improve general performance by
  serializing block allocation and enhancing discard flows like fstrim
  which avoids user IO contention. And we've added fsync_mode=nobarrier
  which gives an option to user where it skips issuing cache_flush
  commands to underlying flash storage. And there are many bug fixes
  related to fuzzed images, revoked atomic writes, quota ops, and minor
  direct IO.

  Enhancements:
   - add fsync_mode=nobarrier which bypasses cache_flush command
   - enhance the discarding flow which avoids user IOs and issues in
     LBA order
   - readahead some encrypted blocks during GC
   - enable in-memory inode checksum to verify the blocks if
     F2FS_CHECK_FS is set
   - enhance nat_bits behavior
   - set -o discard by default
   - set REQ_RAHEAD to bio in ->readpages

  Bug fixes:
   - fix a corner case to corrupt atomic_writes revoking flow
   - revisit i_gc_rwsem to fix race conditions
   - fix some dio behaviors captured by xfstests
   - correct handling errors given by quota-related failures
   - add many sanity check flows to avoid fuzz test failures
   - add more error number propagation to their callers
   - fix several corner cases to continue fault injection w/ shutdown
     loop"

* tag 'f2fs-for-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (89 commits)
  f2fs: readahead encrypted block during GC
  f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc
  f2fs: fix performance issue observed with multi-thread sequential read
  f2fs: fix to skip verifying block address for non-regular inode
  f2fs: rework fault injection handling to avoid a warning
  f2fs: support fault_type mount option
  f2fs: fix to return success when trimming meta area
  f2fs: fix use-after-free of dicard command entry
  f2fs: support discard submission error injection
  f2fs: split discard command in prior to block layer
  f2fs: wake up gc thread immediately when gc_urgent is set
  f2fs: fix incorrect range->len in f2fs_trim_fs()
  f2fs: refresh recent accessed nat entry in lru list
  f2fs: fix avoid race between truncate and background GC
  f2fs: avoid race between zero_range and background GC
  f2fs: fix to do sanity check with block address in main area v2
  f2fs: fix to do sanity check with inline flags
  f2fs: fix to reset i_gc_failures correctly
  f2fs: fix invalid memory access
  f2fs: fix to avoid broken of dnode block list
  ...
parents 6faf05c2 6aa58d8a
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -51,6 +51,14 @@ Description:
		 Controls the dirty page count condition for the in-place-update
		 policies.

What:		/sys/fs/f2fs/<disk>/min_seq_blocks
Date:		August 2018
Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
		 Controls the dirty page count condition for batched sequential
		 writes in ->writepages.


What:		/sys/fs/f2fs/<disk>/min_hot_blocks
Date:		March 2017
Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+18 −0
Original line number Diff line number Diff line
@@ -157,6 +157,24 @@ data_flush Enable data flushing before checkpoint in order to
                       persist data of regular and symlink.
fault_injection=%d     Enable fault injection in all supported types with
                       specified injection rate.
fault_type=%d          Support configuring fault injection type, should be
                       enabled with fault_injection option, fault type value
                       is shown below, it supports single or combined type.
                       Type_Name		Type_Value
                       FAULT_KMALLOC		0x000000001
                       FAULT_KVMALLOC		0x000000002
                       FAULT_PAGE_ALLOC		0x000000004
                       FAULT_PAGE_GET		0x000000008
                       FAULT_ALLOC_BIO		0x000000010
                       FAULT_ALLOC_NID		0x000000020
                       FAULT_ORPHAN		0x000000040
                       FAULT_BLOCK		0x000000080
                       FAULT_DIR_DEPTH		0x000000100
                       FAULT_EVICT_INODE	0x000000200
                       FAULT_TRUNCATE		0x000000400
                       FAULT_IO			0x000000800
                       FAULT_CHECKPOINT		0x000001000
                       FAULT_DISCARD		0x000002000
mode=%s                Control block allocation mode which supports "adaptive"
                       and "lfs". In "lfs" mode, there should be no random
                       writes towards main area.
+106 −53
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ struct kmem_cache *f2fs_inode_entry_slab;

void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
	f2fs_build_fault_attr(sbi, 0, 0);
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
	if (!end_io)
		f2fs_flush_merged_writes(sbi);
@@ -70,6 +71,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
		.encrypted_page = NULL,
		.is_meta = is_meta,
	};
	int err;

	if (unlikely(!is_meta))
		fio.op_flags &= ~REQ_META;
@@ -84,9 +86,10 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,

	fio.page = page;

	if (f2fs_submit_page_bio(&fio)) {
	err = f2fs_submit_page_bio(&fio);
	if (err) {
		f2fs_put_page(page, 1);
		goto repeat;
		return ERR_PTR(err);
	}

	lock_page(page);
@@ -95,14 +98,9 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
		goto repeat;
	}

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
	if (unlikely(!PageUptodate(page))) {
		memset(page_address(page), 0, PAGE_SIZE);
		f2fs_stop_checkpoint(sbi, false);
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
out:
	return page;
@@ -113,13 +111,32 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
	return __get_meta_page(sbi, index, true);
}

struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	int count = 0;

retry:
	page = __get_meta_page(sbi, index, true);
	if (IS_ERR(page)) {
		if (PTR_ERR(page) == -EIO &&
				++count <= DEFAULT_RETRY_IO_COUNT)
			goto retry;

		f2fs_stop_checkpoint(sbi, false);
		f2fs_bug_on(sbi, 1);
	}

	return page;
}

/* for POR only */
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
					block_t blkaddr, int type)
{
	switch (type) {
@@ -140,8 +157,20 @@ bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
			return false;
		break;
	case META_POR:
	case DATA_GENERIC:
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			blkaddr < MAIN_BLKADDR(sbi))) {
			if (type == DATA_GENERIC) {
				f2fs_msg(sbi->sb, KERN_WARNING,
					"access invalid blkaddr:%u", blkaddr);
				WARN_ON(1);
			}
			return false;
		}
		break;
	case META_GENERIC:
		if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
			blkaddr >= MAIN_BLKADDR(sbi)))
			return false;
		break;
	default:
@@ -176,7 +205,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
	blk_start_plug(&plug);
	for (; nrpages-- > 0; blkno++) {

		if (!f2fs_is_valid_meta_blkaddr(sbi, blkno, type))
		if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
			goto out;

		switch (type) {
@@ -242,11 +271,8 @@ static int __f2fs_write_meta_page(struct page *page,

	trace_f2fs_writepage(page, META);

	if (unlikely(f2fs_cp_error(sbi))) {
		dec_page_count(sbi, F2FS_DIRTY_META);
		unlock_page(page);
		return 0;
	}
	if (unlikely(f2fs_cp_error(sbi)))
		goto redirty_out;
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
@@ -529,13 +555,12 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)

	spin_lock(&im->ino_lock);

#ifdef CONFIG_F2FS_FAULT_INJECTION
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
		spin_unlock(&im->ino_lock);
		f2fs_show_injection_info(FAULT_ORPHAN);
		return -ENOSPC;
	}
#endif

	if (unlikely(im->ino_num >= sbi->max_orphans))
		err = -ENOSPC;
	else
@@ -572,12 +597,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
	struct inode *inode;
	struct node_info ni;
	int err = f2fs_acquire_orphan_inode(sbi);

	if (err)
		goto err_out;

	__add_ino_entry(sbi, ino, 0, ORPHAN_INO);
	int err;

	inode = f2fs_iget_retry(sbi->sb, ino);
	if (IS_ERR(inode)) {
@@ -600,14 +620,15 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
	/* truncate all the data during iput */
	iput(inode);

	f2fs_get_node_info(sbi, ino, &ni);
	err = f2fs_get_node_info(sbi, ino, &ni);
	if (err)
		goto err_out;

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
		err = -EIO;
		goto err_out;
	}
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
	return 0;

err_out:
@@ -639,7 +660,10 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
	/* Needed for iput() to work correctly and not trash data */
	sbi->sb->s_flags |= SB_ACTIVE;

	/* Turn on quotas so that they are updated correctly */
	/*
	 * Turn on quotas which were not enabled for read-only mounts if
	 * filesystem has quota feature, so that they are updated correctly.
	 */
	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif

@@ -649,9 +673,15 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
	f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);

	for (i = 0; i < orphan_blocks; i++) {
		struct page *page = f2fs_get_meta_page(sbi, start_blk + i);
		struct page *page;
		struct f2fs_orphan_block *orphan_blk;

		page = f2fs_get_meta_page(sbi, start_blk + i);
		if (IS_ERR(page)) {
			err = PTR_ERR(page);
			goto out;
		}

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
@@ -742,10 +772,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
	__u32 crc = 0;

	*cp_page = f2fs_get_meta_page(sbi, cp_addr);
	if (IS_ERR(*cp_page))
		return PTR_ERR(*cp_page);

	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);

	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
	if (crc_offset > (blk_size - sizeof(__le32))) {
		f2fs_put_page(*cp_page, 1);
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
@@ -753,6 +787,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,

	crc = cur_cp_crc(*cp_block);
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_put_page(*cp_page, 1);
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
@@ -772,14 +807,22 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
		return NULL;

	if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
					sbi->blocks_per_seg) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid cp_pack_total_block_count:%u",
			le32_to_cpu(cp_block->cp_pack_total_block_count));
		goto invalid_cp;
	}
	pre_version = *version;

	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
		goto invalid_cp2;
		goto invalid_cp;
	cur_version = *version;

	if (cur_version == pre_version) {
@@ -787,9 +830,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
invalid_cp:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}
@@ -838,15 +880,15 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

	/* Sanity checking of checkpoint */
	if (f2fs_sanity_check_ckpt(sbi))
		goto free_fail_no_cp;

	if (cur_page == cp1)
		sbi->cur_cp_pack = 1;
	else
		sbi->cur_cp_pack = 2;

	/* Sanity checking of checkpoint */
	if (f2fs_sanity_check_ckpt(sbi))
		goto free_fail_no_cp;

	if (cp_blks <= 1)
		goto done;

@@ -859,6 +901,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
		if (IS_ERR(cur_page))
			goto free_fail_no_cp;
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
@@ -980,12 +1024,10 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)

		iput(inode);
		/* We need to give cpu to another writers. */
		if (ino == cur_ino) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
		if (ino == cur_ino)
			cond_resched();
		} else {
		else
			ino = cur_ino;
		}
	} else {
		/*
		 * We should submit bio, since it exists several
@@ -1119,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
	f2fs_unlock_all(sbi);
}

static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

@@ -1129,6 +1171,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
		if (!get_pages(sbi, F2FS_WB_CP_DATA))
			break;

		if (unlikely(f2fs_cp_error(sbi)))
			break;

		io_schedule_timeout(5*HZ);
	}
	finish_wait(&sbi->cp_wait, &wait);
@@ -1202,8 +1247,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,

	/* writeout cp pack 2 page */
	err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
	f2fs_bug_on(sbi, err);
	if (unlikely(err && f2fs_cp_error(sbi))) {
		f2fs_put_page(page, 1);
		return;
	}

	f2fs_bug_on(sbi, err);
	f2fs_put_page(page, 0);

	/* submit checkpoint (with barrier if NOBARRIER is not set) */
@@ -1229,7 +1278,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	while (get_pages(sbi, F2FS_DIRTY_META)) {
		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;
			break;
	}

	/*
@@ -1309,7 +1358,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
			f2fs_sync_meta_pages(sbi, META, LONG_MAX,
							FS_CP_META_IO);
			if (unlikely(f2fs_cp_error(sbi)))
				return -EIO;
				break;
		}
	}

@@ -1348,10 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);

	/* wait for previous submitted meta pages writeback */
	wait_on_all_pages_writeback(sbi);

	if (unlikely(f2fs_cp_error(sbi)))
		return -EIO;
	f2fs_wait_on_all_pages_writeback(sbi);

	/* flush all device cache */
	err = f2fs_flush_device_cache(sbi);
@@ -1360,12 +1406,19 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

	/* barrier and flush checkpoint cp pack 2 page if it can */
	commit_checkpoint(sbi, ckpt, start_blk);
	wait_on_all_pages_writeback(sbi);
	f2fs_wait_on_all_pages_writeback(sbi);

	/*
	 * invalidate intermediate page cache borrowed from meta inode
	 * which are used for migration of encrypted inode's blocks.
	 */
	if (f2fs_sb_has_encrypt(sbi->sb))
		invalidate_mapping_pages(META_MAPPING(sbi),
				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);

	f2fs_release_ino_entry(sbi, false);

	if (unlikely(f2fs_cp_error(sbi)))
		return -EIO;
	f2fs_reset_fsync_node_info(sbi);

	clear_sbi_flag(sbi, SBI_IS_DIRTY);
	clear_sbi_flag(sbi, SBI_NEED_CP);
@@ -1381,7 +1434,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

	return 0;
	return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}

/*
+137 −45
Original line number Diff line number Diff line
@@ -126,12 +126,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio)

static void f2fs_read_end_io(struct bio *bio)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
		f2fs_show_injection_info(FAULT_IO);
		bio->bi_status = BLK_STS_IOERR;
	}
#endif

	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -177,6 +175,8 @@ static void f2fs_write_end_io(struct bio *bio)
					page->index != nid_of_node(page));

		dec_page_count(sbi, type);
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
		clear_cold_data(page);
		end_page_writeback(page);
	}
@@ -264,7 +264,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
		if (type != DATA && type != NODE)
			goto submit_io;

		if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
		if (test_opt(sbi, LFS) && current->plug)
			blk_finish_plug(current->plug);

		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
@@ -441,7 +441,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;

	verify_block_addr(fio, fio->new_blkaddr);
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
		return -EFAULT;

	trace_f2fs_submit_page_bio(page, fio);
	f2fs_trace_ios(fio, 0);

@@ -485,7 +488,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
		spin_unlock(&io->io_lock);
	}

	if (is_valid_blkaddr(fio->old_blkaddr))
	if (__is_valid_data_blkaddr(fio->old_blkaddr))
		verify_block_addr(fio, fio->old_blkaddr);
	verify_block_addr(fio, fio->new_blkaddr);

@@ -534,19 +537,22 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
}

static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
							 unsigned nr_pages)
					unsigned nr_pages, unsigned op_flag)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;

	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
		return ERR_PTR(-EFAULT);

	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
	if (!bio)
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
	bio_set_op_attrs(bio, REQ_OP_READ, 0);
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);

	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
@@ -571,7 +577,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);

	if (IS_ERR(bio))
		return PTR_ERR(bio);
@@ -869,6 +875,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
	struct f2fs_summary sum;
	struct node_info ni;
	block_t old_blkaddr;
	pgoff_t fofs;
	blkcnt_t count = 1;
	int err;
@@ -876,6 +883,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
		return -EPERM;

	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
	if (dn->data_blkaddr == NEW_ADDR)
@@ -885,11 +896,13 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
		return err;

alloc:
	f2fs_get_node_info(sbi, dn->nid, &ni);
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

	f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
	old_blkaddr = dn->data_blkaddr;
	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
					&sum, seg_type, NULL, false);
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
	f2fs_set_data_blkaddr(dn);

	/* update i_size */
@@ -1045,7 +1058,13 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
next_block:
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);

	if (!is_valid_blkaddr(blkaddr)) {
	if (__is_valid_data_blkaddr(blkaddr) &&
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
		err = -EFAULT;
		goto sync_out;
	}

	if (!is_valid_data_blkaddr(sbi, blkaddr)) {
		if (create) {
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
@@ -1282,7 +1301,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
		if (!page)
			return -ENOMEM;

		f2fs_get_node_info(sbi, inode->i_ino, &ni);
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
@@ -1309,7 +1332,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
		if (!page)
			return -ENOMEM;

		f2fs_get_node_info(sbi, xnid, &ni);
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;
@@ -1425,11 +1452,11 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * readom read-ahead.
 * from read-ahead.
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
			unsigned nr_pages)
			unsigned nr_pages, bool is_readahead)
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
@@ -1500,6 +1527,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
				SetPageUptodate(page);
				goto confused;
			}

			if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
								DATA_GENERIC))
				goto set_error_page;
		} else {
			zero_user_segment(page, 0, PAGE_SIZE);
			if (!PageUptodate(page))
@@ -1519,7 +1550,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
			bio = NULL;
		}
		if (bio == NULL) {
			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
					is_readahead ? REQ_RAHEAD : 0);
			if (IS_ERR(bio)) {
				bio = NULL;
				goto set_error_page;
@@ -1563,7 +1595,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
	if (ret == -EAGAIN)
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
	return ret;
}

@@ -1580,12 +1612,13 @@ static int f2fs_read_data_pages(struct file *file,
	if (f2fs_has_inline_data(inode))
		return 0;

	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}

static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
	struct page *mpage;
	gfp_t gfp_flags = GFP_NOFS;

	if (!f2fs_encrypted_file(inode))
@@ -1597,9 +1630,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
retry_encrypt:
	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
			PAGE_SIZE, 0, fio->page->index, gfp_flags);
	if (!IS_ERR(fio->encrypted_page))
		return 0;

	if (IS_ERR(fio->encrypted_page)) {
		/* flush pending IOs and wait for a while in the ENOMEM case */
		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
			f2fs_flush_merged_writes(fio->sbi);
@@ -1610,6 +1641,16 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
		return PTR_ERR(fio->encrypted_page);
	}

	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
	if (mpage) {
		if (PageUptodate(mpage))
			memcpy(page_address(mpage),
				page_address(fio->encrypted_page), PAGE_SIZE);
		f2fs_put_page(mpage, 1);
	}
	return 0;
}

static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
{
@@ -1691,6 +1732,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
	struct extent_info ei = {0,0,0};
	struct node_info ni;
	bool ipu_force = false;
	int err = 0;

@@ -1699,12 +1741,14 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;

		if (is_valid_blkaddr(fio->old_blkaddr)) {
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC))
			return -EFAULT;

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
	}
	}

	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
@@ -1722,11 +1766,17 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
		goto out_writepage;
	}
got_it:
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC)) {
		err = -EFAULT;
		goto out_writepage;
	}
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
	if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) &&
	if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
					need_inplace_update(fio))) {
		err = encrypt_one_page(fio);
		if (err)
@@ -1751,6 +1801,12 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
		fio->need_lock = LOCK_REQ;
	}

	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;
@@ -2079,6 +2135,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
	return ret;
}

static inline bool __should_serialize_io(struct inode *inode,
					struct writeback_control *wbc)
{
	if (!S_ISREG(inode->i_mode))
		return false;
	if (wbc->sync_mode != WB_SYNC_ALL)
		return true;
	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
		return true;
	return false;
}

static int __f2fs_write_data_pages(struct address_space *mapping,
						struct writeback_control *wbc,
						enum iostat_type io_type)
@@ -2087,6 +2155,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct blk_plug plug;
	int ret;
	bool locked = false;

	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
@@ -2117,10 +2186,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
		goto skip_write;

	if (__should_serialize_io(inode, wbc)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}

	blk_start_plug(&plug);
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
	blk_finish_plug(&plug);

	if (locked)
		mutex_unlock(&sbi->writepages);

	if (wbc->sync_mode == WB_SYNC_ALL)
		atomic_dec(&sbi->wb_sync_req[DATA]);
	/*
@@ -2153,10 +2230,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
	loff_t i_size = i_size_read(inode);

	if (to > i_size) {
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		down_write(&F2FS_I(inode)->i_mmap_sem);

		truncate_pagecache(inode, i_size);
		f2fs_truncate_blocks(inode, i_size, true);

		up_write(&F2FS_I(inode)->i_mmap_sem);
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	}
}

@@ -2251,8 +2332,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,

	trace_f2fs_write_begin(inode, pos, len, flags);

	if (f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) {
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
		err = -ENOMEM;
		drop_atomic = true;
		goto fail;
@@ -2376,14 +2458,20 @@ static int f2fs_write_end(struct file *file,
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
{
	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

	if (offset & blocksize_mask)
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;

	if (iov_iter_alignment(iter) & blocksize_mask)
		return -EINVAL;

		return 1;
	}
	return 0;
}

@@ -2401,7 +2489,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)

	err = check_direct_IO(inode, iter, offset);
	if (err)
		return err;
		return err < 0 ? err : 0;

	if (f2fs_force_buffered_io(inode, rw))
		return 0;
@@ -2495,6 +2583,10 @@ static int f2fs_set_data_page_dirty(struct page *page)
	if (!PageUptodate(page))
		SetPageUptodate(page);

	/* don't remain PG_checked flag which was set during GC */
	if (is_cold_data(page))
		clear_cold_data(page);

	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
			f2fs_register_inmem_page(inode, page);
+2 −1
Original line number Diff line number Diff line
@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
	si->base_mem += sizeof(struct f2fs_nm_info);
	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
	si->base_mem += NM_I(sbi)->nat_blocks *
				f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
	si->base_mem += NM_I(sbi)->nat_blocks / 8;
	si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);

Loading