Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d2b3964a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong
Browse files

xfs: fix COW writeback race



Due to the way how xfs_iomap_write_allocate tries to convert the whole
found extents from delalloc to real space we can run into a race
condition with multiple threads doing writes to this same extent.
For the non-COW case that is harmless as the only thing that can happen
is that we call xfs_bmapi_write on an extent that has already been
converted to a real allocation.  For COW writes where we move the extent
from the COW to the data fork after I/O completion the race is, however,
not quite as harmless.  In the worst case we are now calling
xfs_bmapi_write on a region that contains hole in the COW work, which
will trip up an assert in debug builds or lead to file system corruption
in non-debug builds.  This seems to be reproducible with workloads of
small O_DSYNC write, although so far I've not managed to come up with
a with an isolated reproducer.

The fix for the issue is relatively simple:  tell xfs_bmapi_write
that we are only asked to convert delayed allocations and skip holes
in that case.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 7a308bb3
Loading
Loading
Loading
Loading
+32 −12
Original line number Diff line number Diff line
@@ -4514,8 +4514,6 @@ xfs_bmapi_write(
	int			n;		/* current extent index */
	xfs_fileoff_t		obno;		/* old block number (offset) */
	int			whichfork;	/* data or attr fork */
	char			inhole;		/* current location is hole in file */
	char			wasdelay;	/* old extent was delayed */

#ifdef DEBUG
	xfs_fileoff_t		orig_bno;	/* original block number value */
@@ -4603,22 +4601,44 @@ xfs_bmapi_write(
	bma.firstblock = firstblock;

	while (bno < end && n < *nmap) {
		inhole = eof || bma.got.br_startoff > bno;
		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
		bool			need_alloc = false, wasdelay = false;

		/* in hole or beyoned EOF? */
		if (eof || bma.got.br_startoff > bno) {
			if (flags & XFS_BMAPI_DELALLOC) {
				/*
				 * For the COW fork we can reasonably get a
				 * request for converting an extent that races
				 * with other threads already having converted
				 * part of it, as there converting COW to
				 * regular blocks is not protected using the
				 * IOLOCK.
				 */
				ASSERT(flags & XFS_BMAPI_COWFORK);
				if (!(flags & XFS_BMAPI_COWFORK)) {
					error = -EIO;
					goto error0;
				}

				if (eof || bno >= end)
					break;
			} else {
				need_alloc = true;
			}
		} else {
			/*
			 * Make sure we only reflink into a hole.
			 */
		if (flags & XFS_BMAPI_REMAP)
			ASSERT(inhole);
		if (flags & XFS_BMAPI_COWFORK)
			ASSERT(!inhole);
			ASSERT(!(flags & XFS_BMAPI_REMAP));
			if (isnullstartblock(bma.got.br_startblock))
				wasdelay = true;
		}

		/*
		 * First, deal with the hole before the allocated space
		 * that we found, if any.
		 */
		if (inhole || wasdelay) {
		if (need_alloc || wasdelay) {
			bma.eof = eof;
			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
			bma.wasdel = wasdelay;
+5 −1
Original line number Diff line number Diff line
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
/* Map something in the CoW fork. */
#define XFS_BMAPI_COWFORK	0x200

/* Only convert delalloc space, don't allocate entirely new extents */
#define XFS_BMAPI_DELALLOC	0x400

#define XFS_BMAPI_FLAGS \
	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
	{ XFS_BMAPI_METADATA,	"METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
	{ XFS_BMAPI_ZERO,	"ZERO" }, \
	{ XFS_BMAPI_REMAP,	"REMAP" }, \
	{ XFS_BMAPI_COWFORK,	"COWFORK" }
	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }


static inline int xfs_bmapi_aflag(int w)
+1 −1
Original line number Diff line number Diff line
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
	xfs_trans_t	*tp;
	int		nimaps;
	int		error = 0;
	int		flags = 0;
	int		flags = XFS_BMAPI_DELALLOC;
	int		nres;

	if (whichfork == XFS_COW_FORK)