Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef473667 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: allocate delayed extents in CoW fork



Modify the writepage handler to find and convert pending delalloc
extents to real allocations.  Furthermore, when we're doing non-cow
writes to a part of a file that already has a CoW reservation (the
cowextsz hint that we set up in a subsequent patch facilitates this),
promote the write to copy-on-write so that the entire extent can get
written out as a single extent on disk, thereby reducing post-CoW
fragmentation.

Christoph moved the CoW support code in _map_blocks to a separate helper
function, refactored other functions, and reduced the number of CoW fork
lookups, so I merged those changes here to reduce churn.

Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 60b4984f
Loading
Loading
Loading
Loading
+79 −18
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/pagevec.h>
@@ -341,6 +342,7 @@ xfs_map_blocks(
	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	ASSERT(type != XFS_IO_COW);
	if (type == XFS_IO_UNWRITTEN)
		bmapi_flags |= XFS_BMAPI_IGSTATE;

@@ -355,6 +357,13 @@ xfs_map_blocks(
	offset_fsb = XFS_B_TO_FSBT(mp, offset);
	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
				imap, &nimaps, bmapi_flags);
	/*
	 * Truncate an overwrite extent if there's a pending CoW
	 * reservation before the end of this extent.  This forces us
	 * to come back to writepage to take care of the CoW.
	 */
	if (nimaps && type == XFS_IO_OVERWRITE)
		xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
	xfs_iunlock(ip, XFS_ILOCK_SHARED);

	if (error)
@@ -365,8 +374,7 @@ xfs_map_blocks(
		error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
				imap);
		if (!error)
			trace_xfs_map_blocks_alloc(ip, offset, count, type,
					imap);
			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
		return error;
	}

@@ -739,6 +747,56 @@ xfs_aops_discard_page(
	return;
}

static int
xfs_map_cow(
	struct xfs_writepage_ctx *wpc,
	struct inode		*inode,
	loff_t			offset,
	unsigned int		*new_type)
{
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_bmbt_irec	imap;
	bool			is_cow = false, need_alloc = false;
	int			error;

	/*
	 * If we already have a valid COW mapping keep using it.
	 */
	if (wpc->io_type == XFS_IO_COW) {
		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
		if (wpc->imap_valid) {
			*new_type = XFS_IO_COW;
			return 0;
		}
	}

	/*
	 * Else we need to check if there is a COW mapping at this offset.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);
	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
	xfs_iunlock(ip, XFS_ILOCK_SHARED);

	if (!is_cow)
		return 0;

	/*
	 * And if the COW mapping has a delayed extent here we need to
	 * allocate real space for it now.
	 */
	if (need_alloc) {
		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
				&imap);
		if (error)
			return error;
	}

	wpc->io_type = *new_type = XFS_IO_COW;
	wpc->imap_valid = true;
	wpc->imap = imap;
	return 0;
}

/*
 * We implement an immediate ioend submission policy here to avoid needing to
 * chain multiple ioends and hence nest mempool allocations which can violate
@@ -771,6 +829,7 @@ xfs_writepage_map(
	int			error = 0;
	int			count = 0;
	int			uptodate = 1;
	unsigned int		new_type;

	bh = head = page_buffers(page);
	offset = page_offset(page);
@@ -791,22 +850,13 @@ xfs_writepage_map(
			continue;
		}

		if (buffer_unwritten(bh)) {
			if (wpc->io_type != XFS_IO_UNWRITTEN) {
				wpc->io_type = XFS_IO_UNWRITTEN;
				wpc->imap_valid = false;
			}
		} else if (buffer_delay(bh)) {
			if (wpc->io_type != XFS_IO_DELALLOC) {
				wpc->io_type = XFS_IO_DELALLOC;
				wpc->imap_valid = false;
			}
		} else if (buffer_uptodate(bh)) {
			if (wpc->io_type != XFS_IO_OVERWRITE) {
				wpc->io_type = XFS_IO_OVERWRITE;
				wpc->imap_valid = false;
			}
		} else {
		if (buffer_unwritten(bh))
			new_type = XFS_IO_UNWRITTEN;
		else if (buffer_delay(bh))
			new_type = XFS_IO_DELALLOC;
		else if (buffer_uptodate(bh))
			new_type = XFS_IO_OVERWRITE;
		else {
			if (PageUptodate(page))
				ASSERT(buffer_mapped(bh));
			/*
@@ -819,6 +869,17 @@ xfs_writepage_map(
			continue;
		}

		if (xfs_is_reflink_inode(XFS_I(inode))) {
			error = xfs_map_cow(wpc, inode, offset, &new_type);
			if (error)
				goto out;
		}

		if (wpc->io_type != new_type) {
			wpc->io_type = new_type;
			wpc->imap_valid = false;
		}

		if (wpc->imap_valid)
			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
							 offset);
+3 −1
Original line number Diff line number Diff line
@@ -28,13 +28,15 @@ enum {
	XFS_IO_DELALLOC,	/* covers delalloc region */
	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
	XFS_IO_OVERWRITE,	/* covers already allocated extent */
	XFS_IO_COW,		/* covers copy-on-write extent */
};

#define XFS_IO_TYPES \
	{ XFS_IO_INVALID,		"invalid" }, \
	{ XFS_IO_DELALLOC,		"delalloc" }, \
	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
	{ XFS_IO_OVERWRITE,		"overwrite" }
	{ XFS_IO_OVERWRITE,		"overwrite" }, \
	{ XFS_IO_COW,			"CoW" }

/*
 * Structure for buffered I/O completions.
+84 −0
Original line number Diff line number Diff line
@@ -328,3 +328,87 @@ xfs_reflink_reserve_cow_range(

	return error;
}

/*
 * Find the CoW reservation (and whether or not it needs block allocation)
 * for a given byte offset of a file.
 */
bool
xfs_reflink_find_cow_mapping(
	struct xfs_inode		*ip,
	xfs_off_t			offset,
	struct xfs_bmbt_irec		*imap,
	bool				*need_alloc)
{
	struct xfs_bmbt_irec		irec;
	struct xfs_ifork		*ifp;
	struct xfs_bmbt_rec_host	*gotp;
	xfs_fileoff_t			bno;
	xfs_extnum_t			idx;

	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
	ASSERT(xfs_is_reflink_inode(ip));

	/* Find the extent in the CoW fork. */
	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
	bno = XFS_B_TO_FSBT(ip->i_mount, offset);
	gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
	if (!gotp)
		return false;

	xfs_bmbt_get_all(gotp, &irec);
	if (bno >= irec.br_startoff + irec.br_blockcount ||
	    bno < irec.br_startoff)
		return false;

	trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
			&irec);

	/* If it's still delalloc, we must allocate later. */
	*imap = irec;
	*need_alloc = !!(isnullstartblock(irec.br_startblock));

	return true;
}

/*
 * Trim an extent to end at the next CoW reservation past offset_fsb.
 */
int
xfs_reflink_trim_irec_to_next_cow(
	struct xfs_inode		*ip,
	xfs_fileoff_t			offset_fsb,
	struct xfs_bmbt_irec		*imap)
{
	struct xfs_bmbt_irec		irec;
	struct xfs_ifork		*ifp;
	struct xfs_bmbt_rec_host	*gotp;
	xfs_extnum_t			idx;

	if (!xfs_is_reflink_inode(ip))
		return 0;

	/* Find the extent in the CoW fork. */
	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
	gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
	if (!gotp)
		return 0;
	xfs_bmbt_get_all(gotp, &irec);

	/* This is the extent before; try sliding up one. */
	if (irec.br_startoff < offset_fsb) {
		idx++;
		if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
			return 0;
		gotp = xfs_iext_get_ext(ifp, idx);
		xfs_bmbt_get_all(gotp, &irec);
	}

	if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
		return 0;

	imap->br_blockcount = irec.br_startoff - imap->br_startoff;
	trace_xfs_reflink_trim_irec(ip, imap);

	return 0;
}
+4 −0
Original line number Diff line number Diff line
@@ -28,5 +28,9 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,

extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
		xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
		struct xfs_bmbt_irec *imap, bool *need_alloc);
extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap);

#endif /* __XFS_REFLINK_H */