Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f9a03ae1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "This series adds two ioctls to control cached data and fragmented
  files.  Most of the rest fixes missing error cases and bugs that we
  have not covered so far.  Summary:

  Enhancements:
   - support an ioctl to execute online file defragmentation
   - support an ioctl to flush cached data
   - speed up shrinking of extent_cache entries
   - handle broken superblock
   - refector dirty inode management infra
   - revisit f2fs_map_blocks to handle more cases
   - reduce global lock coverage
   - add detecting user's idle time

  Major bug fixes:
   - fix data race condition on cached nat entries
   - fix error cases of volatile and atomic writes"

* tag 'for-f2fs-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (87 commits)
  f2fs: should unset atomic flag after successful commit
  f2fs: fix wrong memory condition check
  f2fs: monitor the number of background checkpoint
  f2fs: detect idle time depending on user behavior
  f2fs: introduce time and interval facility
  f2fs: skip releasing nodes in chindless extent tree
  f2fs: use atomic type for node count in extent tree
  f2fs: recognize encrypted data in f2fs_fiemap
  f2fs: clean up f2fs_balance_fs
  f2fs: remove redundant calls
  f2fs: avoid unnecessary f2fs_balance_fs calls
  f2fs: check the page status filled from disk
  f2fs: introduce __get_node_page to reuse common code
  f2fs: check node id earily when readaheading node page
  f2fs: read isize while holding i_mutex in fiemap
  Revert "f2fs: check the node block address of newly allocated nid"
  f2fs: cover more area with nat_tree_lock
  f2fs: introduce max_file_blocks in sbi
  f2fs crypto: check CONFIG_F2FS_FS_XATTR for encrypted symlink
  f2fs: introduce zombie list for fast shrinking extent trees
  ...
parents 1289ace5 447135a8
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -87,6 +87,12 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
		 Controls the checkpoint timing.

What:		/sys/fs/f2fs/<disk>/idle_interval
Date:		January 2016
Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
		 Controls the idle timing.

What:		/sys/fs/f2fs/<disk>/ra_nid_pages
Date:		October 2015
Contact:	"Chao Yu" <chao2.yu@samsung.com>
+6 −4
Original line number Diff line number Diff line
@@ -102,7 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
                       collection, triggered in background when I/O subsystem is
                       idle. If background_gc=on, it will turn on the garbage
                       collection and if background_gc=off, garbage collection
                       will be truned off. If background_gc=sync, it will turn
                       will be turned off. If background_gc=sync, it will turn
                       on synchronous garbage collection running in background.
                       Default value for this option is on. So garbage
                       collection is on by default.
@@ -145,10 +145,12 @@ extent_cache Enable an extent cache based on rb-tree, it can cache
                       as many as extent which map between contiguous logical
                       address and physical address per inode, resulting in
                       increasing the cache hit ratio. Set by default.
noextent_cache         Diable an extent cache based on rb-tree explicitly, see
noextent_cache         Disable an extent cache based on rb-tree explicitly, see
                       the above extent_cache mount option.
noinline_data          Disable the inline data feature, inline data feature is
                       enabled by default.
data_flush             Enable data flushing before checkpoint in order to
                       persist data of regular and symlink.

================================================================================
DEBUGFS ENTRIES
@@ -192,7 +194,7 @@ Files in /sys/fs/f2fs/<devname>
                              policy for garbage collection. Setting gc_idle = 0
                              (default) will disable this option. Setting
                              gc_idle = 1 will select the Cost Benefit approach
                              & setting gc_idle = 2 will select the greedy aproach.
                              & setting gc_idle = 2 will select the greedy approach.

 reclaim_segments             This parameter controls the number of prefree
                              segments to be reclaimed. If the number of prefree
@@ -298,7 +300,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
file. Each file is dump_ssa and dump_sit.

The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
It shows on-disk inode information reconized by a given inode number, and is
It shows on-disk inode information recognized by a given inode number, and is
able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
./dump_sit respectively.

+87 −90
Original line number Diff line number Diff line
@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
	dec_page_count(sbi, F2FS_DIRTY_META);
	unlock_page(page);

	if (wbc->for_reclaim)
	if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
		f2fs_submit_merged_bio(sbi, META, WRITE);
	return 0;

@@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
	spin_unlock(&im->ino_lock);
}

void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
	/* add new dirty ino entry into list */
	__add_ino_entry(sbi, ino, type);
}

void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
@@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
	return e ? true : false;
}

void release_dirty_inode(struct f2fs_sb_info *sbi)
void release_ino_entry(struct f2fs_sb_info *sbi)
{
	struct ino_entry *e, *tmp;
	int i;
@@ -722,47 +722,48 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
	return -EINVAL;
}

static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;

	if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
		return -EEXIST;
	if (is_inode_flag_set(fi, flag))
		return;

	set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
	F2FS_I(inode)->dirty_dir = new;
	list_add_tail(&new->list, &sbi->dir_inode_list);
	stat_inc_dirty_dir(sbi);
	return 0;
	set_inode_flag(fi, flag);
	list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
	stat_inc_dirty_inode(sbi, type);
}

static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;

	if (get_dirty_pages(inode) ||
			!is_inode_flag_set(F2FS_I(inode), flag))
		return;

	list_del_init(&fi->dirty_list);
	clear_inode_flag(fi, flag);
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}

void update_dirty_page(struct inode *inode, struct page *page)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct inode_entry *new;
	int ret = 0;
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;

	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
		return;

	if (!S_ISDIR(inode->i_mode)) {
		inode_inc_dirty_pages(inode);
		goto out;
	}

	new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
	new->inode = inode;
	INIT_LIST_HEAD(&new->list);

	spin_lock(&sbi->dir_inode_lock);
	ret = __add_dirty_inode(inode, new);
	spin_lock(&sbi->inode_lock[type]);
	__add_dirty_inode(inode, type);
	inode_inc_dirty_pages(inode);
	spin_unlock(&sbi->dir_inode_lock);
	spin_unlock(&sbi->inode_lock[type]);

	if (ret)
		kmem_cache_free(inode_entry_slab, new);
out:
	SetPagePrivate(page);
	f2fs_trace_pid(page);
}
@@ -770,70 +771,60 @@ void update_dirty_page(struct inode *inode, struct page *page)
void add_dirty_dir_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct inode_entry *new =
			f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
	int ret = 0;

	new->inode = inode;
	INIT_LIST_HEAD(&new->list);

	spin_lock(&sbi->dir_inode_lock);
	ret = __add_dirty_inode(inode, new);
	spin_unlock(&sbi->dir_inode_lock);

	if (ret)
		kmem_cache_free(inode_entry_slab, new);
	spin_lock(&sbi->inode_lock[DIR_INODE]);
	__add_dirty_inode(inode, DIR_INODE);
	spin_unlock(&sbi->inode_lock[DIR_INODE]);
}

void remove_dirty_dir_inode(struct inode *inode)
void remove_dirty_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct inode_entry *entry;

	if (!S_ISDIR(inode->i_mode))
		return;
	struct f2fs_inode_info *fi = F2FS_I(inode);
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;

	spin_lock(&sbi->dir_inode_lock);
	if (get_dirty_pages(inode) ||
			!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
		spin_unlock(&sbi->dir_inode_lock);
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
		return;
	}

	entry = F2FS_I(inode)->dirty_dir;
	list_del(&entry->list);
	F2FS_I(inode)->dirty_dir = NULL;
	clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
	stat_dec_dirty_dir(sbi);
	spin_unlock(&sbi->dir_inode_lock);
	kmem_cache_free(inode_entry_slab, entry);
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);

	/* Only from the recovery routine */
	if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
		clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
	if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
		clear_inode_flag(fi, FI_DELAY_IPUT);
		iput(inode);
	}
}

void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
	struct list_head *head;
	struct inode_entry *entry;
	struct inode *inode;
	struct f2fs_inode_info *fi;
	bool is_dir = (type == DIR_INODE);

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
	if (unlikely(f2fs_cp_error(sbi)))
		return;
		return -EIO;

	spin_lock(&sbi->dir_inode_lock);
	spin_lock(&sbi->inode_lock[type]);

	head = &sbi->dir_inode_list;
	head = &sbi->inode_list[type];
	if (list_empty(head)) {
		spin_unlock(&sbi->dir_inode_lock);
		return;
		spin_unlock(&sbi->inode_lock[type]);
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
		return 0;
	}
	entry = list_entry(head->next, struct inode_entry, list);
	inode = igrab(entry->inode);
	spin_unlock(&sbi->dir_inode_lock);
	fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
	inode = igrab(&fi->vfs_inode);
	spin_unlock(&sbi->inode_lock[type]);
	if (inode) {
		filemap_fdatawrite(inode->i_mapping);
		iput(inode);
@@ -868,11 +859,9 @@ static int block_operations(struct f2fs_sb_info *sbi)
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
		f2fs_unlock_all(sbi);
		sync_dirty_dir_inodes(sbi);
		if (unlikely(f2fs_cp_error(sbi))) {
			err = -EIO;
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
			goto out;
		}
		goto retry_flush_dents;
	}

@@ -885,10 +874,9 @@ static int block_operations(struct f2fs_sb_info *sbi)

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
		up_write(&sbi->node_write);
		sync_node_pages(sbi, 0, &wbc);
		if (unlikely(f2fs_cp_error(sbi))) {
		err = sync_node_pages(sbi, 0, &wbc);
		if (err) {
			f2fs_unlock_all(sbi);
			err = -EIO;
			goto out;
		}
		goto retry_flush_nodes;
@@ -919,7 +907,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
	finish_wait(&sbi->cp_wait, &wait);
}

static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -945,7 +933,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	while (get_pages(sbi, F2FS_DIRTY_META)) {
		sync_meta_pages(sbi, META, LONG_MAX);
		if (unlikely(f2fs_cp_error(sbi)))
			return;
			return -EIO;
	}

	next_free_nid(sbi, &last_nid);
@@ -1030,7 +1018,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	/* need to wait for end_io results */
	wait_on_all_pages_writeback(sbi);
	if (unlikely(f2fs_cp_error(sbi)))
		return;
		return -EIO;

	/* write out checkpoint buffer at block 0 */
	update_meta_page(sbi, ckpt, start_blk++);
@@ -1058,7 +1046,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	wait_on_all_pages_writeback(sbi);

	if (unlikely(f2fs_cp_error(sbi)))
		return;
		return -EIO;

	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
	filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
@@ -1081,22 +1069,25 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
		invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
								discard_blk);

	release_dirty_inode(sbi);
	release_ino_entry(sbi);

	if (unlikely(f2fs_cp_error(sbi)))
		return;
		return -EIO;

	clear_prefree_segments(sbi, cpc);
	clear_sbi_flag(sbi, SBI_IS_DIRTY);

	return 0;
}

/*
 * We guarantee that this checkpoint procedure will not fail.
 */
void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
	int err = 0;

	mutex_lock(&sbi->cp_mutex);

@@ -1104,14 +1095,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
		(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
		(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
		goto out;
	if (unlikely(f2fs_cp_error(sbi)))
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
		goto out;
	if (f2fs_readonly(sbi->sb))
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
		goto out;
	}

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

	if (block_operations(sbi))
	err = block_operations(sbi);
	if (err)
		goto out;

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
@@ -1133,7 +1129,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	flush_sit_entries(sbi, cpc);

	/* unlock all the fs_lock[] in do_checkpoint() */
	do_checkpoint(sbi, cpc);
	err = do_checkpoint(sbi, cpc);

	unblock_operations(sbi);
	stat_inc_cp_count(sbi->stat_info);
@@ -1143,10 +1139,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
			"checkpoint: version = %llx", ckpt_ver);

	/* do checkpoint periodically */
	sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
	f2fs_update_time(sbi, CP_TIME);
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
	mutex_unlock(&sbi->cp_mutex);
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
	return err;
}

void init_ino_entry_info(struct f2fs_sb_info *sbi)
+215 −162
Original line number Diff line number Diff line
@@ -225,7 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn)
	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
	addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
	set_page_dirty(node_page);
	if (set_page_dirty(node_page))
		dn->node_changed = true;
}

int reserve_new_block(struct dnode_of_data *dn)
@@ -412,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
	struct page *page;
	struct dnode_of_data dn;
	int err;
repeat:

	page = f2fs_grab_cache_page(mapping, index, true);
	if (!page) {
		/*
@@ -441,12 +442,11 @@ struct page *get_new_data_page(struct inode *inode,
	} else {
		f2fs_put_page(page, 1);

		page = get_read_data_page(inode, index, READ_SYNC, true);
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
		page = get_lock_data_page(inode, index, true);
		if (IS_ERR(page))
			goto repeat;

		/* wait for read completion */
		lock_page(page);
			return page;
	}
got_it:
	if (new_i_size && i_size_read(inode) <
@@ -494,14 +494,10 @@ static int __allocate_data_block(struct dnode_of_data *dn)
	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
		i_size_write(dn->inode,
				((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));

	/* direct IO doesn't use extent cache to maximize the performance */
	f2fs_drop_largest_extent(dn->inode, fofs);

	return 0;
}

static void __allocate_data_blocks(struct inode *inode, loff_t offset,
static int __allocate_data_blocks(struct inode *inode, loff_t offset,
							size_t count)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -510,14 +506,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
	u64 len = F2FS_BYTES_TO_BLK(count);
	bool allocated;
	u64 end_offset;
	int err = 0;

	while (len) {
		f2fs_balance_fs(sbi);
		f2fs_lock_op(sbi);

		/* When reading holes, we need its node page */
		set_new_dnode(&dn, inode, NULL, NULL, 0);
		if (get_dnode_of_data(&dn, start, ALLOC_NODE))
		err = get_dnode_of_data(&dn, start, ALLOC_NODE);
		if (err)
			goto out;

		allocated = false;
@@ -526,12 +523,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
		while (dn.ofs_in_node < end_offset && len) {
			block_t blkaddr;

			if (unlikely(f2fs_cp_error(sbi)))
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
				goto sync_out;
			}

			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
			if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
				if (__allocate_data_block(&dn))
				err = __allocate_data_block(&dn);
				if (err)
					goto sync_out;
				allocated = true;
			}
@@ -545,8 +545,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,

		f2fs_put_dnode(&dn);
		f2fs_unlock_op(sbi);

		f2fs_balance_fs(sbi, dn.node_changed);
	}
	return;
	return err;

sync_out:
	if (allocated)
@@ -554,7 +556,8 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
	f2fs_put_dnode(&dn);
out:
	f2fs_unlock_op(sbi);
	return;
	f2fs_balance_fs(sbi, dn.node_changed);
	return err;
}

/*
@@ -566,7 +569,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
 */
static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
						int create, int flag)
{
	unsigned int maxblocks = map->m_len;
@@ -577,6 +580,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	int err = 0, ofs = 1;
	struct extent_info ei;
	bool allocated = false;
	block_t blkaddr;

	map->m_len = 0;
	map->m_flags = 0;
@@ -592,7 +596,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	}

	if (create)
		f2fs_lock_op(F2FS_I_SB(inode));
		f2fs_lock_op(sbi);

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -640,12 +644,21 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	pgofs++;

get_next:
	if (map->m_len >= maxblocks)
		goto sync_out;

	if (dn.ofs_in_node >= end_offset) {
		if (allocated)
			sync_inode_page(&dn);
		allocated = false;
		f2fs_put_dnode(&dn);

		if (create) {
			f2fs_unlock_op(sbi);
			f2fs_balance_fs(sbi, dn.node_changed);
			f2fs_lock_op(sbi);
		}

		set_new_dnode(&dn, inode, NULL, NULL, 0);
		err = get_dnode_of_data(&dn, pgofs, mode);
		if (err) {
@@ -657,8 +670,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
	}

	if (maxblocks > map->m_len) {
		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
	blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);

	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
		if (create) {
@@ -694,15 +706,17 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
		map->m_len++;
		goto get_next;
	}
	}

sync_out:
	if (allocated)
		sync_inode_page(&dn);
put_out:
	f2fs_put_dnode(&dn);
unlock_out:
	if (create)
		f2fs_unlock_op(F2FS_I_SB(inode));
	if (create) {
		f2fs_unlock_op(sbi);
		f2fs_balance_fs(sbi, dn.node_changed);
	}
out:
	trace_f2fs_map_blocks(inode, map, err);
	return err;
@@ -742,6 +756,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	/* Block number less than F2FS MAX BLOCKS */
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
		return -EFBIG;

	return __get_data_block(inode, iblock, bh_result, create,
						F2FS_GET_BLOCK_BMAP);
}
@@ -761,10 +779,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
	loff_t isize = i_size_read(inode);
	loff_t isize;
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	bool past_eof = false, whole_file = false;
	int ret = 0;

	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
@@ -779,16 +796,19 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,

	mutex_lock(&inode->i_mutex);

	if (len >= isize) {
		whole_file = true;
		len = isize;
	}
	isize = i_size_read(inode);
	if (start >= isize)
		goto out;

	if (start + len > isize)
		len = isize - start;

	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);

next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;
@@ -800,41 +820,26 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
		start_blk++;

		if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
			past_eof = 1;

		if (past_eof && size) {
		/* Go through holes util pass the EOF */
		if (blk_to_logical(inode, start_blk++) < isize)
			goto prep_next;
		/* Found a hole beyond isize means no more extents.
		 * Note that the premise is that filesystems don't
		 * punch holes beyond isize and keep size unchanged.
		 */
		flags |= FIEMAP_EXTENT_LAST;
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
		} else if (size) {
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
			size = 0;
	}

		/* if we have holes up to/past EOF then we're done */
		if (start_blk > last_blk || past_eof || ret)
			goto out;
	} else {
		if (start_blk > last_blk && !whole_file) {
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
			goto out;
	}

		/*
		 * if size != 0 then we know we already have an extent
		 * to add, so add it.
		 */
		if (size) {
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
			if (ret)
	if (start_blk > last_blk || ret)
		goto out;
		}

	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
@@ -845,14 +850,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,

	start_blk += logical_to_blk(inode, size);

		/*
		 * If we are past the EOF, then we need to make sure as
		 * soon as we find a hole that the last extent we found
		 * is marked with FIEMAP_EXTENT_LAST
		 */
		if (!past_eof && logical + size >= isize)
			past_eof = true;
	}
prep_next:
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
@@ -1083,6 +1081,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
	 */
	if (unlikely(fio->blk_addr != NEW_ADDR &&
			!is_cold_data(page) &&
			!IS_ATOMIC_WRITTEN_PAGE(page) &&
			need_inplace_update(inode))) {
		rewrite_data_page(fio);
		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
@@ -1179,10 +1178,11 @@ static int f2fs_write_data_page(struct page *page,
	if (err)
		ClearPageUptodate(page);
	unlock_page(page);
	if (need_balance_fs)
		f2fs_balance_fs(sbi);
	if (wbc->for_reclaim)
	f2fs_balance_fs(sbi, need_balance_fs);
	if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
		remove_dirty_inode(inode);
	}
	return 0;

redirty_out:
@@ -1354,6 +1354,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
			available_free_memory(sbi, DIRTY_DENTS))
		goto skip_write;

	/* skip writing during file defragment */
	if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
		goto skip_write;

	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;
@@ -1369,7 +1373,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
	if (locked)
		mutex_unlock(&sbi->writepages);

	remove_dirty_dir_inode(inode);
	remove_dirty_inode(inode);

	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
	return ret;
@@ -1382,11 +1386,83 @@ static int f2fs_write_data_pages(struct address_space *mapping,
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
	loff_t i_size = i_size_read(inode);

	if (to > i_size) {
		truncate_pagecache(inode, i_size);
		truncate_blocks(inode, i_size, true);
	}
}

static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
	bool locked = false;
	struct extent_info ei;
	int err = 0;

	if (to > inode->i_size) {
		truncate_pagecache(inode, inode->i_size);
		truncate_blocks(inode, inode->i_size, true);
	if (f2fs_has_inline_data(inode) ||
			(pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
		f2fs_lock_op(sbi);
		locked = true;
	}
restart:
	/* check inline_data */
	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
		if (pos + len <= MAX_INLINE_DATA) {
			read_inline_data(page, ipage);
			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
			sync_inode_page(&dn);
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			bool restart = false;

			/* hole case */
			err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
			if (err || (!err && dn.data_blkaddr == NULL_ADDR))
				restart = true;
			if (restart) {
				f2fs_put_dnode(&dn);
				f2fs_lock_op(sbi);
				locked = true;
				goto restart;
			}
		}
	}

	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
out:
	f2fs_put_dnode(&dn);
unlock_out:
	if (locked)
		f2fs_unlock_op(sbi);
	return err;
}

static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -1396,15 +1472,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
	struct inode *inode = mapping->host;
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page = NULL;
	struct page *ipage;
	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
	struct dnode_of_data dn;
	bool need_balance = false;
	block_t blkaddr = NULL_ADDR;
	int err = 0;

	trace_f2fs_write_begin(inode, pos, len, flags);

	f2fs_balance_fs(sbi);

	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
@@ -1424,41 +1498,27 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,

	*pagep = page;

	f2fs_lock_op(sbi);

	/* check inline_data */
	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_fail;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
	if (err)
		goto fail;

	if (f2fs_has_inline_data(inode)) {
		if (pos + len <= MAX_INLINE_DATA) {
			read_inline_data(page, ipage);
			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
			sync_inode_page(&dn);
			goto put_next;
	if (need_balance && has_not_enough_free_secs(sbi, 0)) {
		unlock_page(page);
		f2fs_balance_fs(sbi, true);
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
		err = f2fs_convert_inline_page(&dn, page);
		if (err)
			goto put_fail;
	}

	err = f2fs_get_block(&dn, index);
	if (err)
		goto put_fail;
put_next:
	f2fs_put_dnode(&dn);
	f2fs_unlock_op(sbi);

	f2fs_wait_on_page_writeback(page, DATA);

	/* wait for GCed encrypted page writeback */
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
		f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);

	if (len == PAGE_CACHE_SIZE)
		goto out_update;
@@ -1474,14 +1534,14 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		goto out_update;
	}

	if (dn.data_blkaddr == NEW_ADDR) {
	if (blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
	} else {
		struct f2fs_io_info fio = {
			.sbi = sbi,
			.type = DATA,
			.rw = READ_SYNC,
			.blk_addr = dn.data_blkaddr,
			.blk_addr = blkaddr,
			.page = page,
			.encrypted_page = NULL,
		};
@@ -1512,10 +1572,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
	clear_cold_data(page);
	return 0;

put_fail:
	f2fs_put_dnode(&dn);
unlock_fail:
	f2fs_unlock_op(sbi);
fail:
	f2fs_put_page(page, 1);
	f2fs_write_failed(mapping, pos + len);
@@ -1540,6 +1596,7 @@ static int f2fs_write_end(struct file *file,
	}

	f2fs_put_page(page, 1);
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	return copied;
}

@@ -1567,11 +1624,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
	int err;

	/* we don't need to use inline_data strictly */
	if (f2fs_has_inline_data(inode)) {
	err = f2fs_convert_inline_inode(inode);
	if (err)
		return err;
	}

	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return 0;
@@ -1583,12 +1638,10 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));

	if (iov_iter_rw(iter) == WRITE) {
		__allocate_data_blocks(inode, offset, count);
		if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
			err = -EIO;
		err = __allocate_data_blocks(inode, offset, count);
		if (err)
			goto out;
	}
	}

	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
out:
+19 −10

File changed.

Preview size limit exceeded, changes collapsed.

Loading