Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit abe72ff4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-4.20-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "Dave and I have continued our work fixing corruption problems that can
  be found when running long-term burn-in exercisers on xfs. Here are
  some patches fixing most of the problems, but there will likely be
  more. :/

   - Numerous corruption fixes for copy on write

   - Numerous corruption fixes for blocksize < pagesize writes

   - Don't miscalculate AG reservations for small final AGs

   - Fix page cache truncation to work properly for reflink and extent
     shifting

   - Fix use-after-free when retrying failed inode/dquot buffer logging

   - Fix corruptions seen when using copy_file_range in directio mode"

* tag 'xfs-4.20-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: readpages doesn't zero page tail beyond EOF
  vfs: vfs_dedupe_file_range() doesn't return EOPNOTSUPP
  iomap: dio data corruption and spurious errors when pipes fill
  iomap: sub-block dio needs to zeroout beyond EOF
  iomap: FUA is wrong for DIO O_DSYNC writes into unwritten extents
  xfs: delalloc -> unwritten COW fork allocation can go wrong
  xfs: flush removing page cache in xfs_reflink_remap_prep
  xfs: extent shifting doesn't fully invalidate page cache
  xfs: finobt AG reserves don't consider last AG can be a runt
  xfs: fix transient reference count error in xfs_buf_resubmit_failed_buffers
  xfs: uncached buffer tracing needs to print bno
  xfs: make xfs_file_remap_range() static
  xfs: fix shared extent data corruption due to missing cow reservation
parents 7c98a426 8c110d43
Loading
Loading
Loading
Loading
+41 −12
Original line number Original line Diff line number Diff line
@@ -142,13 +142,14 @@ static void
iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
{
{
	loff_t orig_pos = *pos;
	loff_t isize = i_size_read(inode);
	unsigned block_bits = inode->i_blkbits;
	unsigned block_bits = inode->i_blkbits;
	unsigned block_size = (1 << block_bits);
	unsigned block_size = (1 << block_bits);
	unsigned poff = offset_in_page(*pos);
	unsigned poff = offset_in_page(*pos);
	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
	unsigned first = poff >> block_bits;
	unsigned first = poff >> block_bits;
	unsigned last = (poff + plen - 1) >> block_bits;
	unsigned last = (poff + plen - 1) >> block_bits;
	unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;


	/*
	/*
	 * If the block size is smaller than the page size we need to check the
	 * If the block size is smaller than the page size we need to check the
@@ -183,8 +184,12 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
	 * handle both halves separately so that we properly zero data in the
	 * handle both halves separately so that we properly zero data in the
	 * page cache for blocks that are entirely outside of i_size.
	 * page cache for blocks that are entirely outside of i_size.
	 */
	 */
	if (orig_pos <= isize && orig_pos + length > isize) {
		unsigned end = offset_in_page(isize - 1) >> block_bits;

		if (first <= end && last > end)
		if (first <= end && last > end)
			plen -= (last - end) * block_size;
			plen -= (last - end) * block_size;
	}


	*offp = poff;
	*offp = poff;
	*lenp = plen;
	*lenp = plen;
@@ -1580,7 +1585,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
	struct bio *bio;
	struct bio *bio;
	bool need_zeroout = false;
	bool need_zeroout = false;
	bool use_fua = false;
	bool use_fua = false;
	int nr_pages, ret;
	int nr_pages, ret = 0;
	size_t copied = 0;
	size_t copied = 0;


	if ((pos | length | align) & ((1 << blkbits) - 1))
	if ((pos | length | align) & ((1 << blkbits) - 1))
@@ -1596,12 +1601,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,


	if (iomap->flags & IOMAP_F_NEW) {
	if (iomap->flags & IOMAP_F_NEW) {
		need_zeroout = true;
		need_zeroout = true;
	} else {
	} else if (iomap->type == IOMAP_MAPPED) {
		/*
		/*
		 * Use a FUA write if we need datasync semantics, this
		 * Use a FUA write if we need datasync semantics, this is a pure
		 * is a pure data IO that doesn't require any metadata
		 * data IO that doesn't require any metadata updates (including
		 * updates and the underlying device supports FUA. This
		 * after IO completion such as unwritten extent conversion) and
		 * allows us to avoid cache flushes on IO completion.
		 * the underlying device supports FUA. This allows us to avoid
		 * cache flushes on IO completion.
		 */
		 */
		if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
		if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
		    (dio->flags & IOMAP_DIO_WRITE_FUA) &&
		    (dio->flags & IOMAP_DIO_WRITE_FUA) &&
@@ -1644,8 +1650,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,


		ret = bio_iov_iter_get_pages(bio, &iter);
		ret = bio_iov_iter_get_pages(bio, &iter);
		if (unlikely(ret)) {
		if (unlikely(ret)) {
			/*
			 * We have to stop part way through an IO. We must fall
			 * through to the sub-block tail zeroing here, otherwise
			 * this short IO may expose stale data in the tail of
			 * the block we haven't written data to.
			 */
			bio_put(bio);
			bio_put(bio);
			return copied ? copied : ret;
			goto zero_tail;
		}
		}


		n = bio->bi_iter.bi_size;
		n = bio->bi_iter.bi_size;
@@ -1676,13 +1688,21 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
		dio->submit.cookie = submit_bio(bio);
		dio->submit.cookie = submit_bio(bio);
	} while (nr_pages);
	} while (nr_pages);


	if (need_zeroout) {
	/*
	 * We need to zeroout the tail of a sub-block write if the extent type
	 * requires zeroing or the write extends beyond EOF. If we don't zero
	 * the block tail in the latter case, we can expose stale data via mmap
	 * reads of the EOF block.
	 */
zero_tail:
	if (need_zeroout ||
	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
		/* zero out from the end of the write to the end of the block */
		/* zero out from the end of the write to the end of the block */
		pad = pos & (fs_block_size - 1);
		pad = pos & (fs_block_size - 1);
		if (pad)
		if (pad)
			iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
			iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
	}
	}
	return copied;
	return copied ? copied : ret;
}
}


static loff_t
static loff_t
@@ -1857,6 +1877,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
				dio->wait_for_completion = true;
				dio->wait_for_completion = true;
				ret = 0;
				ret = 0;
			}
			}

			/*
			 * Splicing to pipes can fail on a full pipe. We have to
			 * swallow this to make it look like a short IO
			 * otherwise the higher splice layers will completely
			 * mishandle the error and stop moving data.
			 */
			if (ret == -EFAULT)
				ret = 0;
			break;
			break;
		}
		}
		pos += ret;
		pos += ret;
+7 −8
Original line number Original line Diff line number Diff line
@@ -2094,17 +2094,18 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
	off = same->src_offset;
	off = same->src_offset;
	len = same->src_length;
	len = same->src_length;


	ret = -EISDIR;
	if (S_ISDIR(src->i_mode))
	if (S_ISDIR(src->i_mode))
		goto out;
		return -EISDIR;


	ret = -EINVAL;
	if (!S_ISREG(src->i_mode))
	if (!S_ISREG(src->i_mode))
		goto out;
		return -EINVAL;

	if (!file->f_op->remap_file_range)
		return -EOPNOTSUPP;


	ret = remap_verify_area(file, off, len, false);
	ret = remap_verify_area(file, off, len, false);
	if (ret < 0)
	if (ret < 0)
		goto out;
		return ret;
	ret = 0;
	ret = 0;


	if (off + len > i_size_read(src))
	if (off + len > i_size_read(src))
@@ -2147,10 +2148,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
		fdput(dst_fd);
		fdput(dst_fd);
next_loop:
next_loop:
		if (fatal_signal_pending(current))
		if (fatal_signal_pending(current))
			goto out;
			break;
	}
	}

out:
	return ret;
	return ret;
}
}
EXPORT_SYMBOL(vfs_dedupe_file_range);
EXPORT_SYMBOL(vfs_dedupe_file_range);
+4 −1
Original line number Original line Diff line number Diff line
@@ -1694,10 +1694,13 @@ xfs_bmap_add_extent_delay_real(
	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
		/*
		/*
		 * Filling in all of a previously delayed allocation extent.
		 * Filling in all of a previously delayed allocation extent.
		 * The right neighbor is contiguous, the left is not.
		 * The right neighbor is contiguous, the left is not. Take care
		 * with delay -> unwritten extent allocation here because the
		 * delalloc record we are overwriting is always written.
		 */
		 */
		PREV.br_startblock = new->br_startblock;
		PREV.br_startblock = new->br_startblock;
		PREV.br_blockcount += RIGHT.br_blockcount;
		PREV.br_blockcount += RIGHT.br_blockcount;
		PREV.br_state = new->br_state;


		xfs_iext_next(ifp, &bma->icur);
		xfs_iext_next(ifp, &bma->icur);
		xfs_iext_remove(bma->ip, &bma->icur, state);
		xfs_iext_remove(bma->ip, &bma->icur, state);
+7 −4
Original line number Original line Diff line number Diff line
@@ -538,14 +538,17 @@ xfs_inobt_rec_check_count(


static xfs_extlen_t
static xfs_extlen_t
xfs_inobt_max_size(
xfs_inobt_max_size(
	struct xfs_mount	*mp)
	struct xfs_mount	*mp,
	xfs_agnumber_t		agno)
{
{
	xfs_agblock_t		agblocks = xfs_ag_block_count(mp, agno);

	/* Bail out if we're uninitialized, which can happen in mkfs. */
	/* Bail out if we're uninitialized, which can happen in mkfs. */
	if (mp->m_inobt_mxr[0] == 0)
	if (mp->m_inobt_mxr[0] == 0)
		return 0;
		return 0;


	return xfs_btree_calc_size(mp->m_inobt_mnr,
	return xfs_btree_calc_size(mp->m_inobt_mnr,
		(uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
				(uint64_t)agblocks * mp->m_sb.sb_inopblock /
					XFS_INODES_PER_CHUNK);
					XFS_INODES_PER_CHUNK);
}
}


@@ -594,7 +597,7 @@ xfs_finobt_calc_reserves(
	if (error)
	if (error)
		return error;
		return error;


	*ask += xfs_inobt_max_size(mp);
	*ask += xfs_inobt_max_size(mp, agno);
	*used += tree_len;
	*used += tree_len;
	return 0;
	return 0;
}
}
+2 −8
Original line number Original line Diff line number Diff line
@@ -1042,7 +1042,7 @@ xfs_unmap_extent(
	goto out_unlock;
	goto out_unlock;
}
}


static int
int
xfs_flush_unmap_range(
xfs_flush_unmap_range(
	struct xfs_inode	*ip,
	struct xfs_inode	*ip,
	xfs_off_t		offset,
	xfs_off_t		offset,
@@ -1195,13 +1195,7 @@ xfs_prepare_shift(
	 * Writeback and invalidate cache for the remainder of the file as we're
	 * Writeback and invalidate cache for the remainder of the file as we're
	 * about to shift down every extent from offset to EOF.
	 * about to shift down every extent from offset to EOF.
	 */
	 */
	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1);
	error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
	if (error)
		return error;
	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
					offset >> PAGE_SHIFT, -1);
	if (error)
		return error;


	/*
	/*
	 * Clean out anything hanging around in the cow fork now that
	 * Clean out anything hanging around in the cow fork now that
Loading