Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 84009357 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull xfs fixes from Dave Chinner:
 "There are a couple of recently found, long standing remote attribute
  corruption fixes caused by log recovery getting confused after a
  crash, and the new DAX code in XFS (merged in 4.2-rc1) needs to
  actually use the DAX fault path on read faults.

  Summary:

   - remote attribute log recovery corruption fixes

   - DAX page faults need to use direct mappings, not a page cache
     mapping"

* tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
  xfs: remote attributes need to be considered data
  xfs: remote attribute headers contain an invalid LSN
  xfs: call dax_fault on read page faults for DAX
parents dbe08116 df150ed1
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @complete_unwritten: The filesystem method used to convert unwritten blocks
 *	to written so the data written to them is exposed. This is required for
 *	required by write faults for filesystems that will return unwritten
 *	extent mappings from @get_block, but it is optional for reads as
 *	dax_insert_mapping() will always zero unwritten blocks. If the fs does
 *	not support unwritten extents, the it should pass NULL.
 *
 * When a page fault occurs, filesystems may call this helper in their
 * fault handler for DAX files. __dax_fault() assumes the caller has done all
@@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
	 * as for normal BH based IO completions.
	 */
	error = dax_insert_mapping(inode, &bh, vma, vmf);
	if (buffer_unwritten(&bh))
	if (buffer_unwritten(&bh)) {
		if (complete_unwritten)
			complete_unwritten(&bh, !error);
		else
			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
	}

 out:
	if (error == -ENOMEM)
+34 −10
Original line number Diff line number Diff line
@@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	struct xfs_buf_log_item	*bip = bp->b_fspriv;
	int		blksize = mp->m_attr_geo->blksize;
	char		*ptr;
	int		len;
	xfs_daddr_t	bno;
	int		blksize = mp->m_attr_geo->blksize;

	/* no verification of non-crc buffers */
	if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify(
	ASSERT(len >= blksize);

	while (len > 0) {
		struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;

		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			xfs_verifier_error(bp);
			return;
		}
		if (bip) {
			struct xfs_attr3_rmt_hdr *rmt;

			rmt = (struct xfs_attr3_rmt_hdr *)ptr;
			rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
		/*
		 * Ensure we aren't writing bogus LSNs to disk. See
		 * xfs_attr3_rmt_hdr_set() for the explanation.
		 */
		if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			xfs_verifier_error(bp);
			return;
		}
		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);

@@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set(
	rmt->rm_owner = cpu_to_be64(ino);
	rmt->rm_blkno = cpu_to_be64(bno);

	/*
	 * Remote attribute blocks are written synchronously, so we don't
	 * have an LSN that we can stamp in them that makes any sense to log
	 * recovery. To ensure that log recovery handles overwrites of these
	 * blocks sanely (i.e. once they've been freed and reallocated as some
	 * other type of metadata) we need to ensure that the LSN has a value
	 * that tells log recovery to ignore the LSN and overwrite the buffer
	 * with whatever is in it's log. To do this, we use the magic
	 * NULLCOMMITLSN to indicate that the LSN is invalid.
	 */
	rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);

	return sizeof(struct xfs_attr3_rmt_hdr);
}

@@ -434,14 +451,21 @@ xfs_attr_rmtval_set(

		/*
		 * Allocate a single extent, up to the size of the value.
		 *
		 * Note that we have to consider this a data allocation as we
		 * write the remote attribute without logging the contents.
		 * Hence we must ensure that we aren't using blocks that are on
		 * the busy list so that we don't overwrite blocks which have
		 * recently been freed but their transactions are not yet
		 * committed to disk. If we overwrite the contents of a busy
		 * extent and then crash then the block may not contain the
		 * correct metadata after log recovery occurs.
		 */
		xfs_bmap_init(args->flist, args->firstblock);
		nmap = 1;
		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
				  blkcnt,
				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
				  args->firstblock, args->total, &map, &nmap,
				  args->flist);
				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
				  args->total, &map, &nmap, args->flist);
		if (!error) {
			error = xfs_bmap_finish(&args->trans, args->flist,
						&committed);
+15 −6
Original line number Diff line number Diff line
@@ -1514,18 +1514,27 @@ xfs_filemap_fault(
	struct vm_area_struct	*vma,
	struct vm_fault		*vmf)
{
	struct xfs_inode	*ip = XFS_I(file_inode(vma->vm_file));
	struct inode		*inode = file_inode(vma->vm_file);
	int			ret;

	trace_xfs_filemap_fault(ip);
	trace_xfs_filemap_fault(XFS_I(inode));

	/* DAX can shortcut the normal fault path on write faults! */
	if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
	if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
		return xfs_filemap_page_mkwrite(vma, vmf);

	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
	if (IS_DAX(inode)) {
		/*
		 * we do not want to trigger unwritten extent conversion on read
		 * faults - that is unnecessary overhead and would also require
		 * changes to xfs_get_blocks_direct() to map unwritten extent
		 * ioend for conversion on read-only mappings.
		 */
		ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
	} else
		ret = filemap_fault(vma, vmf);
	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);

	return ret;
}
+8 −3
Original line number Diff line number Diff line
@@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn(
		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
		break;
	case XFS_ATTR3_RMT_MAGIC:
		lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
		uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
		break;
		/*
		 * Remote attr blocks are written synchronously, rather than
		 * being logged. That means they do not contain a valid LSN
		 * (i.e. transactionally ordered) in them, and hence any time we
		 * see a buffer to replay over the top of a remote attribute
		 * block we should simply do so.
		 */
		goto recover_immediately;
	case XFS_SB_MAGIC:
		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
		uuid = &((struct xfs_dsb *)blk)->sb_uuid;