Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 781fca5b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-4.19-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
 "This is the second part of the XFS changes for 4.19.

  The biggest changes are the removal of buffer heads frm XFS, a massive
  reworking of the deferred transaction operations handling code, the
  removal of the long defunct barrier/nobarrier mount options, and the
  addition of a few more online repair functions.

  Summary:

   - Use extent maps to track pagecache page status instead of
     bufferhead state.

   - Refactor pagecache read and write paths to use the new iomap
     library functions, which enable us to drop the old bufferhead code
     for pagesize == blocksize filesystems.

   - Set up parallel per-block-per-page metadata to track subpage
     information that was tracked by buffer heads, which enables us to
     drop the old bufferhead code for pagesize > blocksize filesystems.

   - Tie a deferred ops control structure to a transaction so that we
     can take advantage of an upper-level dfops without having to plumb
     pointer passing through the code.

   - Refactor the deferred ops code to track deferred ops as part of the
     transaction structure (instead of as a separate data structure) so
     that we can simplify the scoping rules around defer_ops.

   - Refactor twisty delwri buffer submission code to avoid deadlocks.

   - Shorten and fix indenting problems in the scrub code.

   - Detect obviously bad summary counts at mount and fix them.

   - Directly associate deferred ops control structure with a
     transaction so that callers no longer have to manage it themselves.

   - Remove a couple of IRIX-era inode macros.

   - Remove the long-deprecated 'barrier' and 'nobarrier' mount options.

   - Clean up the inode fork structure a bit.

   - Check for bad fs summary counter values in the superblock.

   - Reduce COW fork lookups during writeback.

   - Refactor the deferred ops control structures into the transaction
     structure, thereby eliminating the need for transaction users to
     handle the deferred ops as a separate data structure.

   - Add the ability to repair AG headers online.

   - Fix a crash due to insufficient return value checking.

   - Various fixes and cleanups"

* tag 'xfs-4.19-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (155 commits)
  xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree
  xfs: remove b_last_holder & associated macros
  iomap: Switch to offset_in_page for clarity
  xfs: Close race between direct IO and xfs_break_layouts()
  xfs: repair the AGI
  xfs: repair the AGFL
  xfs: repair the AGF
  xfs: remove dead error handling code in xfs_dquot_disk_alloc()
  xfs: use WRITE_ONCE to update if_seq
  xfs: fix a comment in xfs_log_reserve
  xfs: only validate summary counts on primary superblock
  xfs: substitute spaces with tabs
  xfs: fold dfops into the transaction
  xfs: always defer agfl block frees
  xfs: pass transaction to xfs_defer_add()
  xfs: replace xfs_defer_ops ->dop_pending with on-stack list
  xfs: cancel dfops on xfs_defer_finish() error
  xfs: clean out superfluous dfops dop params/vars
  xfs: drop dop param from xfs_defer_op_type ->finish_item() callback
  xfs: automatic dfops inode relogging
  ...
parents 10f3e23f 01239d77
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -223,8 +223,6 @@ Deprecated Mount Options

  Name				Removal Schedule
  ----				----------------
  barrier			no earlier than v4.15
  nobarrier			no earlier than v4.15


Removed Mount Options
@@ -236,6 +234,8 @@ Removed Mount Options
  ihashsize			v4.0
  irixsgid			v4.0
  osyncisdsync/osyncisosync	v4.0
  barrier			v4.19
  nobarrier			v4.19


sysctls
+265 −27
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/iomap.h>
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/swap.h>
@@ -104,6 +105,138 @@ iomap_sector(struct iomap *iomap, loff_t pos)
	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}

static struct iomap_page *
iomap_page_create(struct inode *inode, struct page *page)
{
	struct iomap_page *iop = to_iomap_page(page);

	if (iop || i_blocksize(inode) == PAGE_SIZE)
		return iop;

	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
	atomic_set(&iop->read_count, 0);
	atomic_set(&iop->write_count, 0);
	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
	set_page_private(page, (unsigned long)iop);
	SetPagePrivate(page);
	return iop;
}

static void
iomap_page_release(struct page *page)
{
	struct iomap_page *iop = to_iomap_page(page);

	if (!iop)
		return;
	WARN_ON_ONCE(atomic_read(&iop->read_count));
	WARN_ON_ONCE(atomic_read(&iop->write_count));
	ClearPagePrivate(page);
	set_page_private(page, 0);
	kfree(iop);
}

/*
 * Calculate the range inside the page that we actually need to read.
 */
static void
iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
{
	unsigned block_bits = inode->i_blkbits;
	unsigned block_size = (1 << block_bits);
	unsigned poff = offset_in_page(*pos);
	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
	unsigned first = poff >> block_bits;
	unsigned last = (poff + plen - 1) >> block_bits;
	unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;

	/*
	 * If the block size is smaller than the page size we need to check the
	 * per-block uptodate status and adjust the offset and length if needed
	 * to avoid reading in already uptodate ranges.
	 */
	if (iop) {
		unsigned int i;

		/* move forward for each leading block marked uptodate */
		for (i = first; i <= last; i++) {
			if (!test_bit(i, iop->uptodate))
				break;
			*pos += block_size;
			poff += block_size;
			plen -= block_size;
			first++;
		}

		/* truncate len if we find any trailing uptodate block(s) */
		for ( ; i <= last; i++) {
			if (test_bit(i, iop->uptodate)) {
				plen -= (last - i + 1) * block_size;
				last = i - 1;
				break;
			}
		}
	}

	/*
	 * If the extent spans the block that contains the i_size we need to
	 * handle both halves separately so that we properly zero data in the
	 * page cache for blocks that are entirely outside of i_size.
	 */
	if (first <= end && last > end)
		plen -= (last - end) * block_size;

	*offp = poff;
	*lenp = plen;
}

static void
iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
{
	struct iomap_page *iop = to_iomap_page(page);
	struct inode *inode = page->mapping->host;
	unsigned first = off >> inode->i_blkbits;
	unsigned last = (off + len - 1) >> inode->i_blkbits;
	unsigned int i;
	bool uptodate = true;

	if (iop) {
		for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
			if (i >= first && i <= last)
				set_bit(i, iop->uptodate);
			else if (!test_bit(i, iop->uptodate))
				uptodate = false;
		}
	}

	if (uptodate && !PageError(page))
		SetPageUptodate(page);
}

static void
iomap_read_finish(struct iomap_page *iop, struct page *page)
{
	if (!iop || atomic_dec_and_test(&iop->read_count))
		unlock_page(page);
}

static void
iomap_read_page_end_io(struct bio_vec *bvec, int error)
{
	struct page *page = bvec->bv_page;
	struct iomap_page *iop = to_iomap_page(page);

	if (unlikely(error)) {
		ClearPageUptodate(page);
		SetPageError(page);
	} else {
		iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
	}

	iomap_read_finish(iop, page);
}

static void
iomap_read_inline_data(struct inode *inode, struct page *page,
		struct iomap *iomap)
@@ -132,7 +265,7 @@ iomap_read_end_io(struct bio *bio)
	int i;

	bio_for_each_segment_all(bvec, bio, i)
		page_endio(bvec->bv_page, false, error);
		iomap_read_page_end_io(bvec, error);
	bio_put(bio);
}

@@ -150,9 +283,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
{
	struct iomap_readpage_ctx *ctx = data;
	struct page *page = ctx->cur_page;
	unsigned poff = pos & (PAGE_SIZE - 1);
	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
	struct iomap_page *iop = iomap_page_create(inode, page);
	bool is_contig = false;
	loff_t orig_pos = pos;
	unsigned poff, plen;
	sector_t sector;

	if (iomap->type == IOMAP_INLINE) {
@@ -161,13 +295,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
		return PAGE_SIZE;
	}

	/* we don't support blocksize < PAGE_SIZE quite yet. */
	WARN_ON_ONCE(pos != page_offset(page));
	WARN_ON_ONCE(plen != PAGE_SIZE);
	/* zero post-eof blocks as the page may be mapped */
	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
	if (plen == 0)
		goto done;

	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
		zero_user(page, poff, plen);
		SetPageUptodate(page);
		iomap_set_range_uptodate(page, poff, plen);
		goto done;
	}

@@ -183,6 +318,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
		is_contig = true;
	}

	/*
	 * If we start a new segment we need to increase the read count, and we
	 * need to do so before submitting any previous full bio to make sure
	 * that we don't prematurely unlock the page.
	 */
	if (iop)
		atomic_inc(&iop->read_count);

	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -203,7 +346,13 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,

	__bio_add_page(ctx->bio, page, plen, poff);
done:
	return plen;
	/*
	 * Move the caller beyond our range so that it keeps making progress.
	 * For that we have to include any leading non-uptodate ranges, but
	 * we can skip trailing ones as they will be handled in the next
	 * iteration.
	 */
	return pos - orig_pos + plen;
}

int
@@ -214,8 +363,6 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
	unsigned poff;
	loff_t ret;

	WARN_ON_ONCE(page_has_buffers(page));

	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
		ret = iomap_apply(inode, page_offset(page) + poff,
				PAGE_SIZE - poff, 0, ops, &ctx,
@@ -280,7 +427,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
	loff_t done, ret;

	for (done = 0; done < length; done += ret) {
		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
			if (!ctx->cur_page_in_bio)
				unlock_page(ctx->cur_page);
			put_page(ctx->cur_page);
@@ -341,6 +488,84 @@ iomap_readpages(struct address_space *mapping, struct list_head *pages,
}
EXPORT_SYMBOL_GPL(iomap_readpages);

int
iomap_is_partially_uptodate(struct page *page, unsigned long from,
		unsigned long count)
{
	struct iomap_page *iop = to_iomap_page(page);
	struct inode *inode = page->mapping->host;
	unsigned first = from >> inode->i_blkbits;
	unsigned last = (from + count - 1) >> inode->i_blkbits;
	unsigned i;

	if (iop) {
		for (i = first; i <= last; i++)
			if (!test_bit(i, iop->uptodate))
				return 0;
		return 1;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);

int
iomap_releasepage(struct page *page, gfp_t gfp_mask)
{
	/*
	 * mm accommodates an old ext3 case where clean pages might not have had
	 * the dirty bit cleared. Thus, it can send actual dirty pages to
	 * ->releasepage() via shrink_active_list(), skip those here.
	 */
	if (PageDirty(page) || PageWriteback(page))
		return 0;
	iomap_page_release(page);
	return 1;
}
EXPORT_SYMBOL_GPL(iomap_releasepage);

void
iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
{
	/*
	 * If we are invalidating the entire page, clear the dirty state from it
	 * and release it to avoid unnecessary buildup of the LRU.
	 */
	if (offset == 0 && len == PAGE_SIZE) {
		WARN_ON_ONCE(PageWriteback(page));
		cancel_dirty_page(page);
		iomap_page_release(page);
	}
}
EXPORT_SYMBOL_GPL(iomap_invalidatepage);

#ifdef CONFIG_MIGRATION
int
iomap_migrate_page(struct address_space *mapping, struct page *newpage,
		struct page *page, enum migrate_mode mode)
{
	int ret;

	ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
	if (ret != MIGRATEPAGE_SUCCESS)
		return ret;

	if (page_has_private(page)) {
		ClearPagePrivate(page);
		set_page_private(newpage, page_private(page));
		set_page_private(page, 0);
		SetPagePrivate(newpage);
	}

	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
	return MIGRATEPAGE_SUCCESS;
}
EXPORT_SYMBOL_GPL(iomap_migrate_page);
#endif /* CONFIG_MIGRATION */

static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
@@ -364,6 +589,7 @@ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,

	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
		zero_user_segments(page, poff, from, to, poff + plen);
		iomap_set_range_uptodate(page, poff, plen);
		return 0;
	}

@@ -379,21 +605,33 @@ static int
__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
		struct page *page, struct iomap *iomap)
{
	struct iomap_page *iop = iomap_page_create(inode, page);
	loff_t block_size = i_blocksize(inode);
	loff_t block_start = pos & ~(block_size - 1);
	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
	unsigned poff = block_start & (PAGE_SIZE - 1);
	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;

	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
	unsigned from = offset_in_page(pos), to = from + len, poff, plen;
	int status = 0;

	if (PageUptodate(page))
		return 0;
	if (from <= poff && to >= poff + plen)
		return 0;
	return iomap_read_page_sync(inode, block_start, page,

	do {
		iomap_adjust_read_range(inode, iop, &block_start,
				block_end - block_start, &poff, &plen);
		if (plen == 0)
			break;

		if ((from > poff && from < poff + plen) ||
		    (to > poff && to < poff + plen)) {
			status = iomap_read_page_sync(inode, block_start, page,
					poff, plen, from, to, iomap);
			if (status)
				break;
		}

	} while ((block_start += plen) < block_end);

	return status;
}

static int
@@ -476,7 +714,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
	if (unlikely(copied < len && !PageUptodate(page))) {
		copied = 0;
	} else {
		SetPageUptodate(page);
		iomap_set_range_uptodate(page, offset_in_page(pos), len);
		iomap_set_page_dirty(page);
	}
	return __generic_write_end(inode, pos, copied, page);
@@ -538,7 +776,7 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
		unsigned long bytes;	/* Bytes to write to page */
		size_t copied;		/* Bytes copied from user */

		offset = (pos & (PAGE_SIZE - 1));
		offset = offset_in_page(pos);
		bytes = min_t(unsigned long, PAGE_SIZE - offset,
						iov_iter_count(i));
again:
@@ -652,7 +890,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
		unsigned long offset;	/* Offset into pagecache page */
		unsigned long bytes;	/* Bytes to write to page */

		offset = (pos & (PAGE_SIZE - 1));
		offset = offset_in_page(pos);
		bytes = min_t(loff_t, PAGE_SIZE - offset, length);

		rpage = __iomap_read_page(inode, pos);
@@ -744,7 +982,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
	do {
		unsigned offset, bytes;

		offset = pos & (PAGE_SIZE - 1); /* Within page */
		offset = offset_in_page(pos);
		bytes = min_t(loff_t, PAGE_SIZE - offset, count);

		if (IS_DAX(inode))
@@ -812,7 +1050,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
		block_commit_write(page, 0, length);
	} else {
		WARN_ON_ONCE(!PageUptodate(page));
		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
		iomap_page_create(inode, page);
	}

	return length;
@@ -837,7 +1075,7 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)

	/* page is wholly or partially inside EOF */
	if (((page->index + 1) << PAGE_SHIFT) > size)
		length = size & ~PAGE_MASK;
		length = offset_in_page(size);
	else
		length = PAGE_SIZE;

@@ -1000,7 +1238,7 @@ page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
		goto out_unlock_not_found;

	for (off = 0; off < PAGE_SIZE; off += bsize) {
		if ((*lastoff & ~PAGE_MASK) >= off + bsize)
		if (offset_in_page(*lastoff) >= off + bsize)
			continue;
		if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
			unlock_page(page);
+1 −0
Original line number Diff line number Diff line
@@ -158,6 +158,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y				+= $(addprefix scrub/, \
				   agheader_repair.o \
				   bitmap.o \
				   repair.o \
				   )
endif
+7 −6
Original line number Diff line number Diff line
@@ -248,7 +248,8 @@ __xfs_ag_resv_init(
/* Create a per-AG block reservation. */
int
xfs_ag_resv_init(
	struct xfs_perag		*pag)
	struct xfs_perag		*pag,
	struct xfs_trans		*tp)
{
	struct xfs_mount		*mp = pag->pag_mount;
	xfs_agnumber_t			agno = pag->pag_agno;
@@ -260,11 +261,11 @@ xfs_ag_resv_init(
	if (pag->pag_meta_resv.ar_asked == 0) {
		ask = used = 0;

		error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
		error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used);
		if (error)
			goto out;

		error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
		error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used);
		if (error)
			goto out;

@@ -282,7 +283,7 @@ xfs_ag_resv_init(

			mp->m_inotbt_nores = true;

			error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
			error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask,
					&used);
			if (error)
				goto out;
@@ -298,7 +299,7 @@ xfs_ag_resv_init(
	if (pag->pag_rmapbt_resv.ar_asked == 0) {
		ask = used = 0;

		error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
		error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used);
		if (error)
			goto out;

@@ -309,7 +310,7 @@ xfs_ag_resv_init(

#ifdef DEBUG
	/* need to read in the AGF for the ASSERT below to work */
	error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
	error = xfs_alloc_pagf_init(pag->pag_mount, tp, pag->pag_agno, 0);
	if (error)
		return error;

+2 −2
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
#define	__XFS_AG_RESV_H__

int xfs_ag_resv_free(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp);

bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag,
@@ -28,7 +28,7 @@ xfs_ag_resv_rmapbt_alloc(
	struct xfs_mount	*mp,
	xfs_agnumber_t		agno)
{
	struct xfs_alloc_arg	args = {0};
	struct xfs_alloc_arg	args = { NULL };
	struct xfs_perag	*pag;

	args.len = 1;
Loading