Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53e87268 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o
Browse files

ext4: fix deadlock in journal_unmap_buffer()



We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start.  We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY.  Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.

Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 4520fb3c
Loading
Loading
Loading
Loading
+71 −11
Original line number Diff line number Diff line
@@ -2894,7 +2894,7 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
	block_invalidatepage(page, offset);
}

static void ext4_journalled_invalidatepage(struct page *page,
static int __ext4_journalled_invalidatepage(struct page *page,
					    unsigned long offset)
{
	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
	if (offset == 0)
		ClearPageChecked(page);

	jbd2_journal_invalidatepage(journal, page, offset);
	return jbd2_journal_invalidatepage(journal, page, offset);
}

/* Wrapper for aops... */
static void ext4_journalled_invalidatepage(struct page *page,
					   unsigned long offset)
{
	WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
}

static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
	return err;
}

/*
 * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
 * buffers that are attached to a page stradding i_size and are undergoing
 * commit. In that case we have to wait for commit to finish and try again.
 */
static void ext4_wait_for_tail_page_commit(struct inode *inode)
{
	struct page *page;
	unsigned offset;
	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
	tid_t commit_tid = 0;
	int ret;

	offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
	/*
	 * All buffers in the last page remain valid? Then there's nothing to
	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
	 * blocksize case
	 */
	if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
		return;
	while (1) {
		page = find_lock_page(inode->i_mapping,
				      inode->i_size >> PAGE_CACHE_SHIFT);
		if (!page)
			return;
		ret = __ext4_journalled_invalidatepage(page, offset);
		unlock_page(page);
		page_cache_release(page);
		if (ret != -EBUSY)
			return;
		commit_tid = 0;
		read_lock(&journal->j_state_lock);
		if (journal->j_committing_transaction)
			commit_tid = journal->j_committing_transaction->t_tid;
		read_unlock(&journal->j_state_lock);
		if (commit_tid)
			jbd2_log_wait_commit(journal, commit_tid);
	}
}

/*
 * ext4_setattr()
 *
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
	}

	if (attr->ia_valid & ATTR_SIZE) {
		if (attr->ia_size != i_size_read(inode)) {
			truncate_setsize(inode, attr->ia_size);
			/* Inode size will be reduced, wait for dio in flight.
			 * Temporarily disable dioread_nolock to prevent
			 * livelock. */
		if (attr->ia_size != inode->i_size) {
			loff_t oldsize = inode->i_size;

			i_size_write(inode, attr->ia_size);
			/*
			 * Blocks are going to be removed from the inode. Wait
			 * for dio in flight.  Temporarily disable
			 * dioread_nolock to prevent livelock.
			 */
			if (orphan) {
				if (!ext4_should_journal_data(inode)) {
					ext4_inode_block_unlocked_dio(inode);
					inode_dio_wait(inode);
					ext4_inode_resume_unlocked_dio(inode);
				} else
					ext4_wait_for_tail_page_commit(inode);
			}
			/*
			 * Truncate pagecache after we've waited for commit
			 * in data=journal mode to make pages freeable.
			 */
			truncate_pagecache(inode, oldsize, inode->i_size);
		}
		ext4_truncate(inode);
	}
+14 −13
Original line number Diff line number Diff line
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,

	BUFFER_TRACE(bh, "entry");

retry:
	/*
	 * It is safe to proceed here without the j_list_lock because the
	 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1935,14 +1934,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
		 * for commit and try again.
		 */
		if (partial_page) {
			tid_t tid = journal->j_committing_transaction->t_tid;

			jbd2_journal_put_journal_head(jh);
			spin_unlock(&journal->j_list_lock);
			jbd_unlock_bh_state(bh);
			write_unlock(&journal->j_state_lock);
			jbd2_log_wait_commit(journal, tid);
			goto retry;
			return -EBUSY;
		}
		/*
		 * OK, buffer won't be reachable after truncate. We just set
@@ -2003,21 +1999,23 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 * @page:    page to flush
 * @offset:  length of page to invalidate.
 *
 * Reap page buffers containing data after offset in page.
 *
 * Reap page buffers containing data after offset in page. Can return -EBUSY
 * if buffers are part of the committing transaction and the page is straddling
 * i_size. Caller then has to wait for current commit and try again.
 */
void jbd2_journal_invalidatepage(journal_t *journal,
int jbd2_journal_invalidatepage(journal_t *journal,
				struct page *page,
				unsigned long offset)
{
	struct buffer_head *head, *bh, *next;
	unsigned int curr_off = 0;
	int may_free = 1;
	int ret = 0;

	if (!PageLocked(page))
		BUG();
	if (!page_has_buffers(page))
		return;
		return 0;

	/* We will potentially be playing with lists other than just the
	 * data lists (especially for journaled data mode), so be
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
		if (offset <= curr_off) {
			/* This block is wholly outside the truncation point */
			lock_buffer(bh);
			may_free &= journal_unmap_buffer(journal, bh,
							 offset > 0);
			ret = journal_unmap_buffer(journal, bh, offset > 0);
			unlock_buffer(bh);
			if (ret < 0)
				return ret;
			may_free &= ret;
		}
		curr_off = next_off;
		bh = next;
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
		if (may_free && try_to_free_buffers(page))
			J_ASSERT(!page_has_buffers(page));
	}
	return 0;
}

/*
+1 −1
Original line number Diff line number Diff line
@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
extern void	 journal_sync_buffer (struct buffer_head *);
extern void	 jbd2_journal_invalidatepage(journal_t *,
extern int	 jbd2_journal_invalidatepage(journal_t *,
				struct page *, unsigned long);
extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
extern int	 jbd2_journal_stop(handle_t *);