Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 605babb9 authored by Ryusuke Konishi's avatar Ryusuke Konishi Committed by Greg Kroah-Hartman
Browse files

nilfs2: fix lockdep warnings in page operations for btree nodes

[ Upstream commit e897be17a441fa637cd166fc3de1445131e57692 ]

Patch series "nilfs2 lockdep warning fixes".

The first two are to resolve the lockdep warning issue, and the last one
is the accompanying cleanup and low priority.

Based on your comment, this series solves the issue by separating inode
object as needed.  Since I was worried about the impact of the object
composition changes, I tested the series carefully not to cause
regressions especially for delicate functions such like disk space
reclamation and snapshots.

This patch (of 3):

If CONFIG_LOCKDEP is enabled, nilfs2 hits lockdep warnings at
inode_to_wb() during page/folio operations for btree nodes:

  WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 inode_to_wb include/linux/backing-dev.h:269 [inline]
  WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 folio_account_dirtied mm/page-writeback.c:2460 [inline]
  WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 __folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509
  Modules linked in:
  ...
  RIP: 0010:inode_to_wb include/linux/backing-dev.h:269 [inline]
  RIP: 0010:folio_account_dirtied mm/page-writeback.c:2460 [inline]
  RIP: 0010:__folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509
  ...
  Call Trace:
    __set_page_dirty include/linux/pagemap.h:834 [inline]
    mark_buffer_dirty+0x4e6/0x650 fs/buffer.c:1145
    nilfs_btree_propagate_p fs/nilfs2/btree.c:1889 [inline]
    nilfs_btree_propagate+0x4ae/0xea0 fs/nilfs2/btree.c:2085
    nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337
    nilfs_collect_dat_data+0x45/0xd0 fs/nilfs2/segment.c:625
    nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1009
    nilfs_segctor_scan_file+0x47a/0x700 fs/nilfs2/segment.c:1048
    nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1224 [inline]
    nilfs_segctor_collect fs/nilfs2/segment.c:1494 [inline]
    nilfs_segctor_do_construct+0x14f3/0x6c60 fs/nilfs2/segment.c:2036
    nilfs_segctor_construct+0x7a7/0xb30 fs/nilfs2/segment.c:2372
    nilfs_segctor_thread_construct fs/nilfs2/segment.c:2480 [inline]
    nilfs_segctor_thread+0x3c3/0xf90 fs/nilfs2/segment.c:2563
    kthread+0x405/0x4f0 kernel/kthread.c:327
    ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295

This is because nilfs2 uses two page caches for each inode and
inode->i_mapping never points to one of them, the btree node cache.

This causes inode_to_wb(inode) to refer to a different page cache than
the caller page/folio operations such like __folio_start_writeback(),
__folio_end_writeback(), or __folio_mark_dirty() acquired the lock.

This patch resolves the issue by allocating and using an additional
inode to hold the page cache of btree nodes.  The inode is attached
one-to-one to the traditional nilfs2 inode if it requires a block
mapping with b-tree.  This setup change is in memory only and does not
affect the disk format.

Link: https://lkml.kernel.org/r/1647867427-30498-1-git-send-email-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/1647867427-30498-2-git-send-email-konishi.ryusuke@gmail.com
Link: https://lore.kernel.org/r/YXrYvIo8YRnAOJCj@casper.infradead.org
Link: https://lore.kernel.org/r/9a20b33d-b38f-b4a2-4742-c1eb5b8e4d6c@redhat.com


Signed-off-by: default avatarRyusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: default avatar <syzbot+0d5b462a6f07447991b3@syzkaller.appspotmail.com>
Reported-by: default avatar <syzbot+34ef28bb2aeb28724aa0@syzkaller.appspotmail.com>
Reported-by: default avatarHao Sun <sunhao.th@gmail.com>
Reported-by: default avatarDavid Hildenbrand <david@redhat.com>
Tested-by: default avatarRyusuke Konishi <konishi.ryusuke@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent f6206892
Loading
Loading
Loading
Loading
+20 −3
Original line number Diff line number Diff line
@@ -20,6 +20,23 @@
#include "page.h"
#include "btnode.h"


/**
 * nilfs_init_btnc_inode - initialize B-tree node cache inode
 * @btnc_inode: inode to be initialized
 *
 * nilfs_init_btnc_inode() sets up an inode for B-tree node cache.
 */
void nilfs_init_btnc_inode(struct inode *btnc_inode)
{
	struct nilfs_inode_info *ii = NILFS_I(btnc_inode);

	btnc_inode->i_mode = S_IFREG;
	ii->i_flags = 0;
	memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
	mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
}

void nilfs_btnode_cache_clear(struct address_space *btnc)
{
	invalidate_mapping_pages(btnc, 0, -1);
@@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
struct buffer_head *
nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
{
	struct inode *inode = NILFS_BTNC_I(btnc);
	struct inode *inode = btnc->host;
	struct buffer_head *bh;

	bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
@@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
			      struct buffer_head **pbh, sector_t *submit_ptr)
{
	struct buffer_head *bh;
	struct inode *inode = NILFS_BTNC_I(btnc);
	struct inode *inode = btnc->host;
	struct page *page;
	int err;

@@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
				    struct nilfs_btnode_chkey_ctxt *ctxt)
{
	struct buffer_head *obh, *nbh;
	struct inode *inode = NILFS_BTNC_I(btnc);
	struct inode *inode = btnc->host;
	__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
	int err;

+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt {
	struct buffer_head *newbh;
};

void nilfs_init_btnc_inode(struct inode *btnc_inode);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
					      __u64 blocknr);
+19 −8
Original line number Diff line number Diff line
@@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path)
static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
				     __u64 ptr, struct buffer_head **bhp)
{
	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
	struct address_space *btnc = btnc_inode->i_mapping;
	struct buffer_head *bh;

	bh = nilfs_btnode_create_block(btnc, ptr);
@@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
				   struct buffer_head **bhp,
				   const struct nilfs_btree_readahead_info *ra)
{
	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
	struct address_space *btnc = btnc_inode->i_mapping;
	struct buffer_head *bh, *ra_bh;
	sector_t submit_ptr = 0;
	int ret;
@@ -1742,6 +1744,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
		dat = nilfs_bmap_get_dat(btree);
	}

	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
	if (ret < 0)
		return ret;

	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
	if (ret < 0)
		return ret;
@@ -1914,7 +1920,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
		path[level].bp_ctxt.bh = path[level].bp_bh;
		ret = nilfs_btnode_prepare_change_key(
			&NILFS_BMAP_I(btree)->i_btnode_cache,
			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
			&path[level].bp_ctxt);
		if (ret < 0) {
			nilfs_dat_abort_update(dat,
@@ -1940,7 +1946,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,

	if (buffer_nilfs_node(path[level].bp_bh)) {
		nilfs_btnode_commit_change_key(
			&NILFS_BMAP_I(btree)->i_btnode_cache,
			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
			&path[level].bp_ctxt);
		path[level].bp_bh = path[level].bp_ctxt.bh;
	}
@@ -1959,7 +1965,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
			       &path[level].bp_newreq.bpr_req);
	if (buffer_nilfs_node(path[level].bp_bh))
		nilfs_btnode_abort_change_key(
			&NILFS_BMAP_I(btree)->i_btnode_cache,
			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
			&path[level].bp_ctxt);
}

@@ -2135,7 +2141,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
					     struct list_head *listp)
{
	struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
	struct address_space *btcache = btnc_inode->i_mapping;
	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
	struct pagevec pvec;
	struct buffer_head *bh, *head;
@@ -2189,12 +2196,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
		path[level].bp_ctxt.newkey = blocknr;
		path[level].bp_ctxt.bh = *bh;
		ret = nilfs_btnode_prepare_change_key(
			&NILFS_BMAP_I(btree)->i_btnode_cache,
			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
			&path[level].bp_ctxt);
		if (ret < 0)
			return ret;
		nilfs_btnode_commit_change_key(
			&NILFS_BMAP_I(btree)->i_btnode_cache,
			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
			&path[level].bp_ctxt);
		*bh = path[level].bp_ctxt.bh;
	}
@@ -2399,6 +2406,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap)

	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
		ret = -EIO;
	else
		ret = nilfs_attach_btree_node_cache(
			&NILFS_BMAP_I(bmap)->vfs_inode);

	return ret;
}

+4 −3
Original line number Diff line number Diff line
@@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
				   __u64 vbn, struct buffer_head **out_bh)
{
	struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode;
	int ret;

	ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
	ret = nilfs_btnode_submit_block(btnc_inode->i_mapping,
					vbn ? : pbn, pbn, REQ_OP_READ, 0,
					out_bh, &pbn);
	if (ret == -EEXIST) /* internal code (cache hit) */
@@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
	ii->i_flags = 0;
	nilfs_bmap_init_gc(ii->i_bmap);

	return 0;
	return nilfs_attach_btree_node_cache(inode);
}

/**
@@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
		ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
		list_del_init(&ii->i_dirty);
		truncate_inode_pages(&ii->vfs_inode.i_data, 0);
		nilfs_btnode_cache_clear(&ii->i_btnode_cache);
		nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
		iput(&ii->vfs_inode);
	}
}
+90 −14
Original line number Diff line number Diff line
@@ -28,12 +28,14 @@
 * @cno: checkpoint number
 * @root: pointer on NILFS root object (mounted checkpoint)
 * @for_gc: inode for GC flag
 * @for_btnc: inode for B-tree node cache flag
 */
struct nilfs_iget_args {
	u64 ino;
	__u64 cno;
	struct nilfs_root *root;
	int for_gc;
	bool for_gc;
	bool for_btnc;
};

static int nilfs_iget_test(struct inode *inode, void *opaque);
@@ -322,7 +324,8 @@ static int nilfs_insert_inode_locked(struct inode *inode,
				     unsigned long ino)
{
	struct nilfs_iget_args args = {
		.ino = ino, .root = root, .cno = 0, .for_gc = 0
		.ino = ino, .root = root, .cno = 0, .for_gc = false,
		.for_btnc = false
	};

	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
@@ -534,6 +537,13 @@ static int nilfs_iget_test(struct inode *inode, void *opaque)
		return 0;

	ii = NILFS_I(inode);
	if (test_bit(NILFS_I_BTNC, &ii->i_state)) {
		if (!args->for_btnc)
			return 0;
	} else if (args->for_btnc) {
		return 0;
	}

	if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
		return !args->for_gc;

@@ -545,15 +555,15 @@ static int nilfs_iget_set(struct inode *inode, void *opaque)
	struct nilfs_iget_args *args = opaque;

	inode->i_ino = args->ino;
	if (args->for_gc) {
		NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
	NILFS_I(inode)->i_cno = args->cno;
		NILFS_I(inode)->i_root = NULL;
	} else {
	NILFS_I(inode)->i_root = args->root;
	if (args->root && args->ino == NILFS_ROOT_INO)
		nilfs_get_root(args->root);
		NILFS_I(inode)->i_root = args->root;
	}

	if (args->for_gc)
		NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
	if (args->for_btnc)
		NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC);
	return 0;
}

@@ -561,7 +571,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
			    unsigned long ino)
{
	struct nilfs_iget_args args = {
		.ino = ino, .root = root, .cno = 0, .for_gc = 0
		.ino = ino, .root = root, .cno = 0, .for_gc = false,
		.for_btnc = false
	};

	return ilookup5(sb, ino, nilfs_iget_test, &args);
@@ -571,7 +582,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
				unsigned long ino)
{
	struct nilfs_iget_args args = {
		.ino = ino, .root = root, .cno = 0, .for_gc = 0
		.ino = ino, .root = root, .cno = 0, .for_gc = false,
		.for_btnc = false
	};

	return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
@@ -602,7 +614,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
				__u64 cno)
{
	struct nilfs_iget_args args = {
		.ino = ino, .root = NULL, .cno = cno, .for_gc = 1
		.ino = ino, .root = NULL, .cno = cno, .for_gc = true,
		.for_btnc = false
	};
	struct inode *inode;
	int err;
@@ -622,6 +635,68 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
	return inode;
}

/**
 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
 * @inode: inode object
 *
 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
 * or does nothing if the inode already has it.  This function allocates
 * an additional inode to maintain page cache of B-tree nodes one-on-one.
 *
 * Return Value: On success, 0 is returned. On errors, one of the following
 * negative error code is returned.
 *
 * %-ENOMEM - Insufficient memory available.
 */
int nilfs_attach_btree_node_cache(struct inode *inode)
{
	struct nilfs_inode_info *ii = NILFS_I(inode);
	struct inode *btnc_inode;
	struct nilfs_iget_args args;

	if (ii->i_assoc_inode)
		return 0;

	args.ino = inode->i_ino;
	args.root = ii->i_root;
	args.cno = ii->i_cno;
	args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0;
	args.for_btnc = true;

	btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
				  nilfs_iget_set, &args);
	if (unlikely(!btnc_inode))
		return -ENOMEM;
	if (btnc_inode->i_state & I_NEW) {
		nilfs_init_btnc_inode(btnc_inode);
		unlock_new_inode(btnc_inode);
	}
	NILFS_I(btnc_inode)->i_assoc_inode = inode;
	NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
	ii->i_assoc_inode = btnc_inode;

	return 0;
}

/**
 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
 * @inode: inode object
 *
 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
 * holder inode bound to @inode, or does nothing if @inode doesn't have it.
 */
void nilfs_detach_btree_node_cache(struct inode *inode)
{
	struct nilfs_inode_info *ii = NILFS_I(inode);
	struct inode *btnc_inode = ii->i_assoc_inode;

	if (btnc_inode) {
		NILFS_I(btnc_inode)->i_assoc_inode = NULL;
		ii->i_assoc_inode = NULL;
		iput(btnc_inode);
	}
}

void nilfs_write_inode_common(struct inode *inode,
			      struct nilfs_inode *raw_inode, int has_bmap)
{
@@ -770,7 +845,8 @@ static void nilfs_clear_inode(struct inode *inode)
	if (test_bit(NILFS_I_BMAP, &ii->i_state))
		nilfs_bmap_clear(ii->i_bmap);

	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
	if (!test_bit(NILFS_I_BTNC, &ii->i_state))
		nilfs_detach_btree_node_cache(inode);

	if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
		nilfs_put_root(ii->i_root);
Loading