Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b54ecfb7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "This series includes patches to:
   - add nobarrier mount option
   - support tmpfile and rename2
   - enhance the fdatasync behavior
   - fix the error path
   - fix the recovery routine
   - refactor a part of the checkpoint procedure
   - reduce some lock contentions"

* tag 'for-f2fs-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (40 commits)
  f2fs: use for_each_set_bit to simplify the code
  f2fs: add f2fs_balance_fs for expand_inode_data
  f2fs: invalidate xattr node page when evict inode
  f2fs: avoid skipping recover_inline_xattr after recover_inline_data
  f2fs: add tracepoint for f2fs_direct_IO
  f2fs: reduce competition among node page writes
  f2fs: fix coding style
  f2fs: remove redundant lines in allocate_data_block
  f2fs: add tracepoint for f2fs_issue_flush
  f2fs: avoid retrying wrong recovery routine when error was occurred
  f2fs: test before set/clear bits
  f2fs: fix wrong condition for unlikely
  f2fs: enable in-place-update for fdatasync
  f2fs: skip unnecessary data writes during fsync
  f2fs: add info of appended or updated data writes
  f2fs: use radix_tree for ino management
  f2fs: add infra for ino management
  f2fs: punch the core function for inode management
  f2fs: add nobarrier mount option
  f2fs: fix to put root inode in error path of fill_super
  ...
parents ae9b475e b65ee148
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -126,6 +126,11 @@ flush_merge Merge concurrent cache_flush commands as much as possible
                       to eliminate redundant command issues. If the underlying
		       device handles the cache_flush command relatively slowly,
		       recommend to enable this option.
nobarrier              This option can be used if underlying storage guarantees
                       its cached data should be written to the novolatile area.
		       If this option is set, no cache_flush commands are issued
		       but f2fs still guarantees the write ordering of all the
		       data writes.

================================================================================
DEBUGFS ENTRIES
+0 −6
Original line number Diff line number Diff line
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
	size_t size = 0;
	int error;

	if (acl) {
		error = posix_acl_valid(acl);
		if (error < 0)
			return error;
	}

	switch (type) {
	case ACL_TYPE_ACCESS:
		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
+117 −61
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@
#include "segment.h"
#include <trace/events/f2fs.h>

static struct kmem_cache *orphan_entry_slab;
static struct kmem_cache *ino_entry_slab;
static struct kmem_cache *inode_entry_slab;

/*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
	.set_page_dirty	= f2fs_set_meta_page_dirty,
};

static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
	struct ino_entry *e;
retry:
	spin_lock(&sbi->ino_lock[type]);

	e = radix_tree_lookup(&sbi->ino_root[type], ino);
	if (!e) {
		e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
		if (!e) {
			spin_unlock(&sbi->ino_lock[type]);
			goto retry;
		}
		if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
			spin_unlock(&sbi->ino_lock[type]);
			kmem_cache_free(ino_entry_slab, e);
			goto retry;
		}
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;

		list_add_tail(&e->list, &sbi->ino_list[type]);
	}
	spin_unlock(&sbi->ino_lock[type]);
}

static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
	struct ino_entry *e;

	spin_lock(&sbi->ino_lock[type]);
	e = radix_tree_lookup(&sbi->ino_root[type], ino);
	if (e) {
		list_del(&e->list);
		radix_tree_delete(&sbi->ino_root[type], ino);
		if (type == ORPHAN_INO)
			sbi->n_orphans--;
		spin_unlock(&sbi->ino_lock[type]);
		kmem_cache_free(ino_entry_slab, e);
		return;
	}
	spin_unlock(&sbi->ino_lock[type]);
}

void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
	/* add new dirty ino entry into list */
	__add_ino_entry(sbi, ino, type);
}

void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
	struct ino_entry *e;
	spin_lock(&sbi->ino_lock[mode]);
	e = radix_tree_lookup(&sbi->ino_root[mode], ino);
	spin_unlock(&sbi->ino_lock[mode]);
	return e ? true : false;
}

static void release_dirty_inode(struct f2fs_sb_info *sbi)
{
	struct ino_entry *e, *tmp;
	int i;

	for (i = APPEND_INO; i <= UPDATE_INO; i++) {
		spin_lock(&sbi->ino_lock[i]);
		list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
			list_del(&e->list);
			radix_tree_delete(&sbi->ino_root[i], e->ino);
			kmem_cache_free(ino_entry_slab, e);
		}
		spin_unlock(&sbi->ino_lock[i]);
	}
}

int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
	int err = 0;

	spin_lock(&sbi->orphan_inode_lock);
	spin_lock(&sbi->ino_lock[ORPHAN_INO]);
	if (unlikely(sbi->n_orphans >= sbi->max_orphans))
		err = -ENOSPC;
	else
		sbi->n_orphans++;
	spin_unlock(&sbi->orphan_inode_lock);
	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);

	return err;
}

void release_orphan_inode(struct f2fs_sb_info *sbi)
{
	spin_lock(&sbi->orphan_inode_lock);
	spin_lock(&sbi->ino_lock[ORPHAN_INO]);
	f2fs_bug_on(sbi->n_orphans == 0);
	sbi->n_orphans--;
	spin_unlock(&sbi->orphan_inode_lock);
	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}

void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
	struct list_head *head;
	struct orphan_inode_entry *new, *orphan;

	new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
	new->ino = ino;

	spin_lock(&sbi->orphan_inode_lock);
	head = &sbi->orphan_inode_list;
	list_for_each_entry(orphan, head, list) {
		if (orphan->ino == ino) {
			spin_unlock(&sbi->orphan_inode_lock);
			kmem_cache_free(orphan_entry_slab, new);
			return;
		}

		if (orphan->ino > ino)
			break;
	}

	/* add new orphan entry into list which is sorted by inode number */
	list_add_tail(&new->list, &orphan->list);
	spin_unlock(&sbi->orphan_inode_lock);
	/* add new orphan ino entry into list */
	__add_ino_entry(sbi, ino, ORPHAN_INO);
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
	struct list_head *head;
	struct orphan_inode_entry *orphan;

	spin_lock(&sbi->orphan_inode_lock);
	head = &sbi->orphan_inode_list;
	list_for_each_entry(orphan, head, list) {
		if (orphan->ino == ino) {
			list_del(&orphan->list);
			f2fs_bug_on(sbi->n_orphans == 0);
			sbi->n_orphans--;
			spin_unlock(&sbi->orphan_inode_lock);
			kmem_cache_free(orphan_entry_slab, orphan);
			return;
		}
	}
	spin_unlock(&sbi->orphan_inode_lock);
	/* remove orphan entry from orphan list */
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
}

static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
	unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
		(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
	struct page *page = NULL;
	struct orphan_inode_entry *orphan = NULL;
	struct ino_entry *orphan = NULL;

	for (index = 0; index < orphan_blocks; index++)
		grab_meta_page(sbi, start_blk + index);

	index = 1;
	spin_lock(&sbi->orphan_inode_lock);
	head = &sbi->orphan_inode_list;
	spin_lock(&sbi->ino_lock[ORPHAN_INO]);
	head = &sbi->ino_list[ORPHAN_INO];

	/* loop for each orphan inode entry and write them in Jornal block */
	list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
		f2fs_put_page(page, 1);
	}

	spin_unlock(&sbi->orphan_inode_lock);
	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}

static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ static void block_operations(struct f2fs_sb_info *sbi)
	 * until finishing nat/sit flush.
	 */
retry_flush_nodes:
	mutex_lock(&sbi->node_write);
	down_write(&sbi->node_write);

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
		mutex_unlock(&sbi->node_write);
		up_write(&sbi->node_write);
		sync_node_pages(sbi, 0, &wbc);
		goto retry_flush_nodes;
	}
@@ -726,7 +774,7 @@ static void block_operations(struct f2fs_sb_info *sbi)

static void unblock_operations(struct f2fs_sb_info *sbi)
{
	mutex_unlock(&sbi->node_write);
	up_write(&sbi->node_write);
	f2fs_unlock_all(sbi);
}

@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
	nid_t last_nid = 0;
	block_t start_blk;
	struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
	 * This avoids to conduct wrong roll-forward operations and uses
	 * metapages, so should be called prior to sync_meta_pages below.
	 */
	discard_next_dnode(sbi);
	discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));

	/* Flush all the NAT/SIT pages */
	while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
	/* Here, we only have one bio having CP pack */
	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);

	if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
	if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
		clear_prefree_segments(sbi);
		release_dirty_inode(sbi);
		F2FS_RESET_SB_DIRT(sbi);
	}
}
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}

void init_orphan_info(struct f2fs_sb_info *sbi)
void init_ino_entry_info(struct f2fs_sb_info *sbi)
{
	spin_lock_init(&sbi->orphan_inode_lock);
	INIT_LIST_HEAD(&sbi->orphan_inode_list);
	sbi->n_orphans = 0;
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
		INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
		spin_lock_init(&sbi->ino_lock[i]);
		INIT_LIST_HEAD(&sbi->ino_list[i]);
	}

	/*
	 * considering 512 blocks in a segment 8 blocks are needed for cp
	 * and log segment summaries. Remaining blocks are used to keep
	 * orphan entries with the limitation one reserved segment
	 * for cp pack we can have max 1020*504 orphan entries
	 */
	sbi->n_orphans = 0;
	sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
				* F2FS_ORPHANS_PER_BLOCK;
}

int __init create_checkpoint_caches(void)
{
	orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
			sizeof(struct orphan_inode_entry));
	if (!orphan_entry_slab)
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
		return -ENOMEM;
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
			sizeof(struct dir_inode_entry));
	if (!inode_entry_slab) {
		kmem_cache_destroy(orphan_entry_slab);
		kmem_cache_destroy(ino_entry_slab);
		return -ENOMEM;
	}
	return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)

void destroy_checkpoint_caches(void)
{
	kmem_cache_destroy(orphan_entry_slab);
	kmem_cache_destroy(ino_entry_slab);
	kmem_cache_destroy(inode_entry_slab);
}
+45 −14
Original line number Diff line number Diff line
@@ -139,6 +139,9 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
		if (test_opt(sbi, NOBARRIER))
			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
		else
			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
	}
	__submit_merged_bio(io);
@@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
	if (check_extent_cache(inode, pgofs, bh_result))
		goto out;

	if (create)
	if (create) {
		f2fs_balance_fs(sbi);
		f2fs_lock_op(sbi);
	}

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
			!is_cold_data(page) &&
			need_inplace_update(inode))) {
		rewrite_data_page(page, old_blkaddr, fio);
		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
	} else {
		write_data_page(page, &dn, &new_blkaddr, fio);
		update_extent_cache(new_blkaddr, &dn);
		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
	}
out_writepage:
	f2fs_put_dnode(&dn);
@@ -914,6 +921,16 @@ static int f2fs_write_data_pages(struct address_space *mapping,
	return 0;
}

static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;

	if (to > inode->i_size) {
		truncate_pagecache(inode, inode->i_size);
		truncate_blocks(inode, inode->i_size);
	}
}

static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
@@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
repeat:
	err = f2fs_convert_inline_data(inode, pos + len);
	if (err)
		return err;
		goto fail;

	page = grab_cache_page_write_begin(mapping, index, flags);
	if (!page)
		return -ENOMEM;
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}

	/* to avoid latency during memory pressure */
	unlock_page(page);
@@ -949,10 +968,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
	set_new_dnode(&dn, inode, NULL, NULL, 0);
	err = f2fs_reserve_block(&dn, index);
	f2fs_unlock_op(sbi);

	if (err) {
		f2fs_put_page(page, 0);
		return err;
		goto fail;
	}
inline_data:
	lock_page(page);
@@ -982,19 +1000,20 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
			err = f2fs_read_inline_data(inode, page);
			if (err) {
				page_cache_release(page);
				return err;
				goto fail;
			}
		} else {
			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
							READ_SYNC);
			if (err)
				return err;
				goto fail;
		}

		lock_page(page);
		if (unlikely(!PageUptodate(page))) {
			f2fs_put_page(page, 1);
			return -EIO;
			err = -EIO;
			goto fail;
		}
		if (unlikely(page->mapping != mapping)) {
			f2fs_put_page(page, 1);
@@ -1005,6 +1024,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
	SetPageUptodate(page);
	clear_cold_data(page);
	return 0;
fail:
	f2fs_write_failed(mapping, pos + len);
	return err;
}

static int f2fs_write_end(struct file *file,
@@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,

	trace_f2fs_write_end(inode, pos, len, copied);

	SetPageUptodate(page);
	set_page_dirty(page);

	if (pos + copied > i_size_read(inode)) {
@@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
		struct iov_iter *iter, loff_t offset)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	size_t count = iov_iter_count(iter);
	int err;

	/* Let buffer I/O handle the inline data case. */
	if (f2fs_has_inline_data(inode))
@@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
	/* clear fsync mark to recover these blocks */
	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);

	return blockdev_direct_IO(rw, iocb, inode, iter, offset,
				  get_data_block);
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);

	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
	if (err < 0 && (rw & WRITE))
		f2fs_write_failed(mapping, offset + count);

	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);

	return err;
}

static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
+6 −13
Original line number Diff line number Diff line
@@ -167,7 +167,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
	si->cache_mem += npages << PAGE_CACHE_SHIFT;
	npages = META_MAPPING(sbi)->nrpages;
	si->cache_mem += npages << PAGE_CACHE_SHIFT;
	si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
	si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
	si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
}

@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)

	f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
	if (!f2fs_debugfs_root)
		goto bail;
		return;

	file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
			NULL, &stat_fops);
	if (!file)
		goto free_debugfs_dir;

	return;

free_debugfs_dir:
	if (!file) {
		debugfs_remove(f2fs_debugfs_root);

bail:
		f2fs_debugfs_root = NULL;
	return;
	}
}

void f2fs_destroy_root_stats(void)
Loading