Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 009c6e87 authored by Brian Foster's avatar Brian Foster Committed by Dave Chinner
Browse files

xfs: add missing ilock around dio write last extent alignment



The iomap codepath (via get_blocks()) acquires and release the inode
lock in the case of a direct write that requires block allocation. This
is because xfs_iomap_write_direct() allocates a transaction, which means
the ilock must be dropped and reacquired after the transaction is
allocated and reserved.

xfs_iomap_write_direct() invokes xfs_iomap_eof_align_last_fsb() before
the transaction is created and thus before the ilock is reacquired. This
can lead to calls to xfs_iread_extents() and reads of the in-core extent
list without any synchronization (via xfs_bmap_eof() and
xfs_bmap_last_extent()). xfs_iread_extents() assert fails if the ilock
is not held, but this is not currently seen in practice as the current
callers had already invoked xfs_bmapi_read().

What has been seen in practice are reports of crashes down in the
xfs_bmap_eof() codepath on direct writes due to seemingly bogus pointer
references from xfs_iext_get_ext(). While an explicit reproducer is not
currently available to confirm the cause of the problem, crash analysis
and code inspection from David Jeffrey had identified the insufficient
locking.

xfs_iomap_eof_align_last_fsb() is called from other contexts with the
inode lock already held, so we cannot acquire it therein.
__xfs_get_blocks() acquires and drops the ilock with variable flags to
cover the event that the extent list must be read in. The common case is
that __xfs_get_blocks() acquires the shared ilock. To provide locking
around the last extent alignment call without adding more lock cycles to
the dio path, update xfs_iomap_write_direct() to expect the shared ilock
held on entry and do the extent alignment under its protection. Demote
the lock, if necessary, from __xfs_get_blocks() and push the
xfs_qm_dqattach() call outside of the shared lock critical section.
Also, add an assert to document that the extent list is always expected
to be present in this path. Otherwise, we risk a call to
xfs_iread_extents() while under the shared ilock. This is safe as all
current callers have executed an xfs_bmapi_read() call under the current
iolock context.

Reported-by: default avatarDavid Jeffery <djeffery@redhat.com>
Signed-off-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent 5cb13dcd
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -1408,12 +1408,12 @@ __xfs_get_blocks(
	      imap.br_startblock == DELAYSTARTBLOCK))) {
		if (direct || xfs_get_extsz_hint(ip)) {
			/*
			 * Drop the ilock in preparation for starting the block
			 * allocation transaction.  It will be retaken
			 * exclusively inside xfs_iomap_write_direct for the
			 * actual allocation.
			 * xfs_iomap_write_direct() expects the shared lock. It
			 * is unlocked on return.
			 */
			xfs_iunlock(ip, lockmode);
			if (lockmode == XFS_ILOCK_EXCL)
				xfs_ilock_demote(ip, lockmode);

			error = xfs_iomap_write_direct(ip, offset, size,
						       &imap, nimaps);
			if (error)
+26 −7
Original line number Diff line number Diff line
@@ -131,20 +131,29 @@ xfs_iomap_write_direct(
	uint		qblocks, resblks, resrtextents;
	int		committed;
	int		error;

	error = xfs_qm_dqattach(ip, 0);
	if (error)
		return error;
	int		lockmode;

	rt = XFS_IS_REALTIME_INODE(ip);
	extsz = xfs_get_extsz_hint(ip);
	lockmode = XFS_ILOCK_SHARED;	/* locked by caller */

	ASSERT(xfs_isilocked(ip, lockmode));

	offset_fsb = XFS_B_TO_FSBT(mp, offset);
	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
	if ((offset + count) > XFS_ISIZE(ip)) {
		/*
		 * Assert that the in-core extent list is present since this can
		 * call xfs_iread_extents() and we only have the ilock shared.
		 * This should be safe because the lock was held around a bmapi
		 * call in the caller and we only need it to access the in-core
		 * list.
		 */
		ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
								XFS_IFEXTENTS);
		error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
		if (error)
			return error;
			goto out_unlock;
	} else {
		if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
			last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -173,6 +182,15 @@ xfs_iomap_write_direct(
		quota_flag = XFS_QMOPT_RES_REGBLKS;
	}

	/*
	 * Drop the shared lock acquired by the caller, attach the dquot if
	 * necessary and move on to transaction setup.
	 */
	xfs_iunlock(ip, lockmode);
	error = xfs_qm_dqattach(ip, 0);
	if (error)
		return error;

	/*
	 * Allocate and setup the transaction
	 */
@@ -187,7 +205,8 @@ xfs_iomap_write_direct(
		return error;
	}

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	lockmode = XFS_ILOCK_EXCL;
	xfs_ilock(ip, lockmode);

	error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
	if (error)
@@ -229,7 +248,7 @@ xfs_iomap_write_direct(
		error = xfs_alert_fsblock_zero(ip, imap);

out_unlock:
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	xfs_iunlock(ip, lockmode);
	return error;

out_bmap_cancel:
+5 −0
Original line number Diff line number Diff line
@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);

		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
			/*
			 * xfs_iomap_write_direct() expects to take ownership of
			 * the shared ilock.
			 */
			xfs_ilock(ip, XFS_ILOCK_SHARED);
			error = xfs_iomap_write_direct(ip, offset, length,
						       &imap, nimaps);
			if (error)