NFS: Fix a race when doing NFS write coalescing (c63c7b05) · Commits · e / devices / android_kernel_fairphone_FP4

fs/nfs/pagelist.c

+0 −92

Original line number	Diff line number	Diff line
		@@ -17,7 +17,6 @@
		#include <linux/nfs_page.h>
		#include <linux/nfs_fs.h>
		#include <linux/nfs_mount.h>
		#include <linux/writeback.h>

		#define NFS_PARANOIA 1

		@@ -353,25 +352,6 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
		return 1;
		}

		/**
		* nfs_pageio_add_list - Split coalesced requests out from a list.
		* @desc: destination io descriptor
		* @head: source list
		*
		* Moves a maximum of 'nmax' elements from one list to another.
		* The elements are checked to ensure that they form a contiguous set
		* of pages, and that the RPC credentials are the same.
		*/
		void nfs_pageio_add_list(struct nfs_pageio_descriptor *desc,
		struct list_head *head)
		{
		while (!list_empty(head)) {
		struct nfs_page *req = nfs_list_entry(head->next);
		if (!nfs_pageio_add_request(desc, req))
		break;
		}
		}

		/**
		* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
		* @desc: pointer to io descriptor
		@@ -382,78 +362,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
		}

		#define NFS_SCAN_MAXENTRIES 16
		/**
		* nfs_scan_dirty - Scan the radix tree for dirty requests
		* @mapping: pointer to address space
		* @wbc: writeback_control structure
		* @dst: Destination list
		*
		* Moves elements from one of the inode request lists.
		* If the number of requests is set to 0, the entire address_space
		* starting at index idx_start, is scanned.
		* The requests are not checked to ensure that they form a contiguous set.
		* You must be holding the inode's req_lock when calling this function
		*/
		long nfs_scan_dirty(struct address_space *mapping,
		struct writeback_control *wbc,
		struct list_head *dst)
		{
		struct nfs_inode *nfsi = NFS_I(mapping->host);
		struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
		struct nfs_page *req;
		pgoff_t idx_start, idx_end;
		long res = 0;
		int found, i;

		if (nfsi->ndirty == 0)
		return 0;
		if (wbc->range_cyclic) {
		idx_start = 0;
		idx_end = ULONG_MAX;
		} else if (wbc->range_end == 0) {
		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
		idx_end = ULONG_MAX;
		} else {
		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
		}

		for (;;) {
		unsigned int toscan = NFS_SCAN_MAXENTRIES;

		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
		(void **)&pgvec[0], idx_start, toscan,
		NFS_PAGE_TAG_DIRTY);

		/* Did we make progress? */
		if (found <= 0)
		break;

		for (i = 0; i < found; i++) {
		req = pgvec[i];
		if (!wbc->range_cyclic && req->wb_index > idx_end)
		goto out;

		/* Try to lock request and mark it for writeback */
		if (!nfs_set_page_writeback_locked(req))
		goto next;
		radix_tree_tag_clear(&nfsi->nfs_page_tree,
		req->wb_index, NFS_PAGE_TAG_DIRTY);
		nfsi->ndirty--;
		nfs_list_remove_request(req);
		nfs_list_add_request(req, dst);
		res++;
		if (res == LONG_MAX)
		goto out;
		next:
		idx_start = req->wb_index + 1;
		}
		}
		out:
		WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
		return res;
		}

		/**
		* nfs_scan_list - Scan a list for matching requests
		* @nfsi: NFS inode

fs/nfs/write.c

+47 −102

Original line number	Diff line number	Diff line
		@@ -38,7 +38,8 @@
		static struct nfs_page * nfs_update_request(struct nfs_open_context*,
		struct page *,
		unsigned int, unsigned int);
		static long nfs_flush_mapping(struct address_space mapping, struct writeback_control wbc, int how);
		static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
		struct inode *inode, int ioflags);
		static const struct rpc_call_ops nfs_write_partial_ops;
		static const struct rpc_call_ops nfs_write_full_ops;
		static const struct rpc_call_ops nfs_commit_ops;
		@@ -201,7 +202,7 @@ static int nfs_writepage_setup(struct nfs_open_context ctx, struct page page,
		static int wb_priority(struct writeback_control *wbc)
		{
		if (wbc->for_reclaim)
		return FLUSH_HIGHPRI;
		return FLUSH_HIGHPRI \| FLUSH_STABLE;
		if (wbc->for_kupdate)
		return FLUSH_LOWPRI;
		return 0;
		@@ -251,7 +252,8 @@ static void nfs_end_page_writeback(struct page *page)
		* was not tagged.
		* May also return an error if the user signalled nfs_wait_on_request().
		*/
		static int nfs_page_mark_flush(struct page *page)
		static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
		struct page *page)
		{
		struct nfs_page *req;
		struct nfs_inode *nfsi = NFS_I(page->mapping->host);
		@@ -273,6 +275,8 @@ static int nfs_page_mark_flush(struct page *page)
		* request as dirty (in which case we don't care).
		*/
		spin_unlock(req_lock);
		/* Prevent deadlock! */
		nfs_pageio_complete(pgio);
		ret = nfs_wait_on_request(req);
		nfs_release_request(req);
		if (ret != 0)
		@@ -283,21 +287,18 @@ static int nfs_page_mark_flush(struct page *page)
		/* This request is marked for commit */
		spin_unlock(req_lock);
		nfs_unlock_request(req);
		nfs_pageio_complete(pgio);
		return 1;
		}
		if (nfs_set_page_writeback(page) == 0) {
		nfs_list_remove_request(req);
		/* add the request to the inode's dirty list. */
		radix_tree_tag_set(&nfsi->nfs_page_tree,
		req->wb_index, NFS_PAGE_TAG_DIRTY);
		nfs_list_add_request(req, &nfsi->dirty);
		nfsi->ndirty++;
		spin_unlock(req_lock);
		__mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES);
		} else
		if (nfs_set_page_writeback(page) != 0) {
		spin_unlock(req_lock);
		BUG();
		}
		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
		NFS_PAGE_TAG_WRITEBACK);
		ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
		nfs_unlock_request(req);
		spin_unlock(req_lock);
		nfs_pageio_add_request(pgio, req);
		return ret;
		}

		@@ -306,6 +307,7 @@ static int nfs_page_mark_flush(struct page *page)
		*/
		static int nfs_writepage_locked(struct page page, struct writeback_control wbc)
		{
		struct nfs_pageio_descriptor mypgio, *pgio;
		struct nfs_open_context *ctx;
		struct inode *inode = page->mapping->host;
		unsigned offset;
		@@ -314,7 +316,14 @@ static int nfs_writepage_locked(struct page page, struct writeback_control wbc
		nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
		nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);

		err = nfs_page_mark_flush(page);
		if (wbc->for_writepages)
		pgio = wbc->fs_private;
		else {
		nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
		pgio = &mypgio;
		}

		err = nfs_page_async_flush(pgio, page);
		if (err <= 0)
		goto out;
		err = 0;
		@@ -331,12 +340,12 @@ static int nfs_writepage_locked(struct page page, struct writeback_control wbc
		put_nfs_open_context(ctx);
		if (err != 0)
		goto out;
		err = nfs_page_mark_flush(page);
		err = nfs_page_async_flush(pgio, page);
		if (err > 0)
		err = 0;
		out:
		if (!wbc->for_writepages)
		nfs_flush_mapping(page->mapping, wbc, FLUSH_STABLE\|wb_priority(wbc));
		nfs_pageio_complete(pgio);
		return err;
		}

		@@ -352,20 +361,20 @@ int nfs_writepage(struct page page, struct writeback_control wbc)
		int nfs_writepages(struct address_space mapping, struct writeback_control wbc)
		{
		struct inode *inode = mapping->host;
		struct nfs_pageio_descriptor pgio;
		int err;

		nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);

		nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
		wbc->fs_private = &pgio;
		err = generic_writepages(mapping, wbc);
		nfs_pageio_complete(&pgio);
		if (err)
		return err;
		err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
		if (err < 0)
		goto out;
		nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
		err = 0;
		out:
		return err;
		if (pgio.pg_error)
		return pgio.pg_error;
		return 0;
		}

		/*
		@@ -536,18 +545,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_st
		return res;
		}

		static void nfs_cancel_dirty_list(struct list_head *head)
		{
		struct nfs_page *req;
		while(!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_end_page_writeback(req->wb_page);
		nfs_inode_remove_request(req);
		nfs_clear_page_writeback(req);
		}
		}

		static void nfs_cancel_commit_list(struct list_head *head)
		{
		struct nfs_page *req;
		@@ -936,33 +933,15 @@ static int nfs_flush_one(struct inode inode, struct list_head head, size_t cou
		return -ENOMEM;
		}

		static int nfs_flush_list(struct inode inode, struct list_head head, int npages, int how)
		static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
		struct inode *inode, int ioflags)
		{
		struct nfs_pageio_descriptor desc;
		int wpages = NFS_SERVER(inode)->wpages;
		int wsize = NFS_SERVER(inode)->wsize;

		/* For single writes, FLUSH_STABLE is more efficient */
		if (npages <= wpages && npages == NFS_I(inode)->npages
		&& nfs_list_entry(head->next)->wb_bytes <= wsize)
		how \|= FLUSH_STABLE;

		if (wsize < PAGE_CACHE_SIZE)
		nfs_pageio_init(&desc, inode, nfs_flush_multi, wsize, how);
		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
		else
		nfs_pageio_init(&desc, inode, nfs_flush_one, wsize, how);
		nfs_pageio_add_list(&desc, head);
		nfs_pageio_complete(&desc);
		if (desc.pg_error == 0)
		return 0;
		while (!list_empty(head)) {
		struct nfs_page *req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_redirty_request(req);
		nfs_end_page_writeback(req->wb_page);
		nfs_clear_page_writeback(req);
		}
		return desc.pg_error;
		nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
		}

		/*
		@@ -1286,31 +1265,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
		.rpc_call_done = nfs_commit_done,
		.rpc_release = nfs_commit_release,
		};
		#else
		static inline int nfs_commit_list(struct inode inode, struct list_head head, int how)
		{
		return 0;
		}
		#endif

		static long nfs_flush_mapping(struct address_space mapping, struct writeback_control wbc, int how)
		{
		struct nfs_inode *nfsi = NFS_I(mapping->host);
		LIST_HEAD(head);
		long res;

		spin_lock(&nfsi->req_lock);
		res = nfs_scan_dirty(mapping, wbc, &head);
		spin_unlock(&nfsi->req_lock);
		if (res) {
		int error = nfs_flush_list(mapping->host, &head, res, how);
		if (error < 0)
		return error;
		}
		return res;
		}

		#if defined(CONFIG_NFS_V3) \|\| defined(CONFIG_NFS_V4)
		int nfs_commit_inode(struct inode *inode, int how)
		{
		struct nfs_inode *nfsi = NFS_I(inode);
		@@ -1327,6 +1282,11 @@ int nfs_commit_inode(struct inode *inode, int how)
		}
		return res;
		}
		#else
		static inline int nfs_commit_list(struct inode inode, struct list_head head, int how)
		{
		return 0;
		}
		#endif

		long nfs_sync_mapping_wait(struct address_space mapping, struct writeback_control wbc, int how)
		@@ -1360,19 +1320,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
		if (ret != 0)
		continue;
		pages = nfs_scan_dirty(mapping, wbc, &head);
		if (pages != 0) {
		spin_unlock(&nfsi->req_lock);
		if (how & FLUSH_INVALIDATE) {
		nfs_cancel_dirty_list(&head);
		ret = pages;
		} else
		ret = nfs_flush_list(inode, &head, pages, how);
		spin_lock(&nfsi->req_lock);
		continue;
		}
		if (wbc->pages_skipped != 0)
		continue;
		if (nocommit)
		break;
		pages = nfs_scan_commit(inode, &head, idx_start, npages);
		@@ -1412,7 +1359,7 @@ int nfs_wb_all(struct inode *inode)
		};
		int ret;

		ret = generic_writepages(mapping, &wbc);
		ret = nfs_writepages(mapping, &wbc);
		if (ret < 0)
		goto out;
		ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
		@@ -1435,11 +1382,9 @@ int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, lo
		};
		int ret;

		if (!(how & FLUSH_NOWRITEPAGE)) {
		ret = generic_writepages(mapping, &wbc);
		ret = nfs_writepages(mapping, &wbc);
		if (ret < 0)
		goto out;
		}
		ret = nfs_sync_mapping_wait(mapping, &wbc, how);
		if (ret >= 0)
		return 0;
		@@ -1462,7 +1407,7 @@ int nfs_wb_page_priority(struct inode inode, struct page page, int how)
		int ret;

		BUG_ON(!PageLocked(page));
		if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) {
		if (clear_page_dirty_for_io(page)) {
		ret = nfs_writepage_locked(page, &wbc);
		if (ret < 0)
		goto out;

include/linux/nfs_page.h

+1 −7

Original line number	Diff line number	Diff line
		@@ -21,8 +21,7 @@
		/*
		* Valid flags for the radix tree
		*/
		#define NFS_PAGE_TAG_DIRTY 0
		#define NFS_PAGE_TAG_WRITEBACK 1
		#define NFS_PAGE_TAG_WRITEBACK 0

		/*
		* Valid flags for a dirty buffer
		@@ -72,9 +71,6 @@ extern void nfs_clear_request(struct nfs_page *req);
		extern void nfs_release_request(struct nfs_page *req);


		extern long nfs_scan_dirty(struct address_space *mapping,
		struct writeback_control *wbc,
		struct list_head *dst);
		extern int nfs_scan_list(struct nfs_inode nfsi, struct list_head head, struct list_head *dst,
		unsigned long idx_start, unsigned int npages);
		extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
		@@ -84,8 +80,6 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
		int how);
		extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
		struct nfs_page *);
		extern void nfs_pageio_add_list(struct nfs_pageio_descriptor *,
		struct list_head *);
		extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
		extern int nfs_wait_on_request(struct nfs_page *);
		extern void nfs_unlock_request(struct nfs_page *req);

include/linux/writeback.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -59,6 +59,8 @@ struct writeback_control {
		unsigned for_reclaim:1; /* Invoked from the page allocator */
		unsigned for_writepages:1; /* This is a writepages() call */
		unsigned range_cyclic:1; /* range_start is cyclic */

		void fs_private; / For use by ->writepages() */
		};

		/*