Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 59c9081b authored by Yunlei He's avatar Yunlei He Committed by Jaegeuk Kim
Browse files

f2fs: allow write page cache when writting cp



This patch allow write data to normal file when writting
new checkpoint.

We relax three limitations for write_begin path:
1. data allocation
2. node allocation
3. variables in checkpoint

Signed-off-by: default avatarYunlei He <heyunlei@huawei.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 22588f87
Loading
Loading
Loading
Loading
+28 −12
Original line number Diff line number Diff line
@@ -944,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
	return 0;
}

static void __prepare_cp_block(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	nid_t last_nid = nm_i->next_scan_nid;

	next_free_nid(sbi, &last_nid);
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);
}

/*
 * Freeze all the FS-operations for checkpoint.
 */
@@ -970,7 +983,14 @@ static int block_operations(struct f2fs_sb_info *sbi)
		goto retry_flush_dents;
	}

	/*
	 * POR: we should ensure that there are no dirty node pages
	 * until finishing nat/sit flush. inode->i_blocks can be updated.
	 */
	down_write(&sbi->node_change);

	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		up_write(&sbi->node_change);
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
@@ -978,10 +998,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
		goto retry_flush_dents;
	}

	/*
	 * POR: we should ensure that there are no dirty node pages
	 * until finishing nat/sit flush.
	 */
retry_flush_nodes:
	down_write(&sbi->node_write);

@@ -989,11 +1005,19 @@ static int block_operations(struct f2fs_sb_info *sbi)
		up_write(&sbi->node_write);
		err = sync_node_pages(sbi, &wbc);
		if (err) {
			up_write(&sbi->node_change);
			f2fs_unlock_all(sbi);
			goto out;
		}
		goto retry_flush_nodes;
	}

	/*
	 * sbi->node_change is used only for AIO write_begin path which produces
	 * dirty node blocks and some checkpoint values by block allocation.
	 */
	__prepare_cp_block(sbi);
	up_write(&sbi->node_change);
out:
	blk_finish_plug(&plug);
	return err;
@@ -1061,7 +1085,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	nid_t last_nid = nm_i->next_scan_nid;
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
	__u32 crc32 = 0;
@@ -1078,14 +1101,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
			return -EIO;
	}

	next_free_nid(sbi, &last_nid);

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
		ckpt->cur_node_segno[i] =
@@ -1104,10 +1124,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);

	/* 2 cp  + n data seg summary + orphan inode blocks */
	data_sum_blocks = npages_for_summary_flush(sbi, false);
	spin_lock(&sbi->cp_lock);
+22 −6
Original line number Diff line number Diff line
@@ -787,6 +787,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
	return err;
}

static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

/*
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
@@ -829,7 +844,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,

next_dnode:
	if (create)
		f2fs_lock_op(sbi);
		__do_map_lock(sbi, flag, true);

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -939,7 +954,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	f2fs_put_dnode(&dn);

	if (create) {
		f2fs_unlock_op(sbi);
		__do_map_lock(sbi, flag, false);
		f2fs_balance_fs(sbi, dn.node_changed);
	}
	goto next_dnode;
@@ -948,7 +963,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	f2fs_put_dnode(&dn);
unlock_out:
	if (create) {
		f2fs_unlock_op(sbi);
		__do_map_lock(sbi, flag, false);
		f2fs_balance_fs(sbi, dn.node_changed);
	}
out:
@@ -1688,7 +1703,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,

	if (f2fs_has_inline_data(inode) ||
			(pos & PAGE_MASK) >= i_size_read(inode)) {
		f2fs_lock_op(sbi);
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
		locked = true;
	}
restart:
@@ -1724,7 +1739,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
			err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
			if (err || dn.data_blkaddr == NULL_ADDR) {
				f2fs_put_dnode(&dn);
				f2fs_lock_op(sbi);
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
				locked = true;
				goto restart;
			}
@@ -1738,7 +1754,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
	f2fs_put_dnode(&dn);
unlock_out:
	if (locked)
		f2fs_unlock_op(sbi);
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
	return err;
}

+1 −0
Original line number Diff line number Diff line
@@ -830,6 +830,7 @@ struct f2fs_sb_info {
	struct mutex cp_mutex;			/* checkpoint procedure lock */
	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
	struct rw_semaphore node_write;		/* locking node writes */
	struct rw_semaphore node_change;	/* locking node change */
	wait_queue_head_t cp_wait;
	unsigned long last_time[MAX_TIME];	/* to store time in jiffies */
	long interval_time[MAX_TIME];		/* to store thresholds */
+6 −6
Original line number Diff line number Diff line
@@ -2448,11 +2448,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
		f2fs_put_page(page, 1);
	}

	f2fs_bug_on(sbi, set->entry_cnt);

	/* Allow dirty nats by node block allocation in write_begin */
	if (!set->entry_cnt) {
		radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
		kmem_cache_free(nat_entry_set_slab, set);
	}
}

/*
 * This function is called during the checkpointing process.
@@ -2496,8 +2497,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
		__flush_nat_entry_set(sbi, set, cpc);

	up_write(&nm_i->nat_tree_lock);

	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
	/* Allow dirty nats by node block allocation in write_begin */
}

static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+1 −0
Original line number Diff line number Diff line
@@ -1918,6 +1918,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
	mutex_init(&sbi->gc_mutex);
	mutex_init(&sbi->cp_mutex);
	init_rwsem(&sbi->node_write);
	init_rwsem(&sbi->node_change);

	/* disallow all the data/node/meta page writes */
	set_sbi_flag(sbi, SBI_POR_DOING);