Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a564b8f0 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

nfs: enable swap on NFS



Implement the new swapfile a_ops for NFS and hook up ->direct_IO.  This
will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under
PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol
->connect() method.

PF_MEMALLOC should allow the allocation of struct socket and related
objects and the early (re)setting of SOCK_MEMALLOC should allow us to
receive the packets required for the TCP connection buildup.

[jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases]
[dfeng@redhat.com: Fix handling of multiple swap files]
[a.p.zijlstra@chello.nl: Original patch]
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 29418aa4
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -86,6 +86,14 @@ config NFS_V4

	  If unsure, say Y.

config NFS_SWAP
	bool "Provide swap over NFS support"
	default n
	depends on NFS_FS
	select SUNRPC_SWAP
	help
	  This option enables swapon to work on files located on NFS mounts.

config NFS_V4_1
	bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
	depends on NFS_V4 && EXPERIMENTAL
+54 −28
Original line number Diff line number Diff line
@@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 * @nr_segs: size of iovec array
 *
 * The presence of this routine in the address space ops vector means
 * the NFS client supports direct I/O.  However, we shunt off direct
 * read and write requests before the VFS gets them, so this method
 * should never be called.
 * the NFS client supports direct I/O. However, for most direct IO, we
 * shunt off direct read and write requests before the VFS gets them,
 * so this method is only ever called for swap.
 */
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
{
#ifndef CONFIG_NFS_SWAP
	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
			iocb->ki_filp->f_path.dentry->d_name.name,
			(long long) pos, nr_segs);

	return -EINVAL;
#else
	VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);

	if (rw == READ || rw == KERNEL_READ)
		return nfs_file_direct_read(iocb, iov, nr_segs, pos,
				rw == READ ? true : false);
	return nfs_file_direct_write(iocb, iov, nr_segs, pos,
				rw == WRITE ? true : false);
#endif /* CONFIG_NFS_SWAP */
}

static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
 */
static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
						const struct iovec *iov,
						loff_t pos)
						loff_t pos, bool uio)
{
	struct nfs_direct_req *dreq = desc->pg_dreq;
	struct nfs_open_context *ctx = dreq->ctx;
@@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
					  GFP_KERNEL);
		if (!pagevec)
			break;
		if (uio) {
			down_read(&current->mm->mmap_sem);
			result = get_user_pages(current, current->mm, user_addr,
					npages, 1, 0, pagevec, NULL);
			up_read(&current->mm->mmap_sem);
			if (result < 0)
				break;
		} else {
			WARN_ON(npages != 1);
			result = get_kernel_page(user_addr, 1, pagevec);
			if (WARN_ON(result != 1))
				break;
		}

		if ((unsigned)result < npages) {
			bytes = result * PAGE_SIZE;
			if (bytes <= pgbase) {
@@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
					      const struct iovec *iov,
					      unsigned long nr_segs,
					      loff_t pos)
					      loff_t pos, bool uio)
{
	struct nfs_pageio_descriptor desc;
	ssize_t result = -EINVAL;
@@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,

	for (seg = 0; seg < nr_segs; seg++) {
		const struct iovec *vec = &iov[seg];
		result = nfs_direct_read_schedule_segment(&desc, vec, pos);
		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
		if (result < 0)
			break;
		requested_bytes += result;
@@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
}

static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
			       unsigned long nr_segs, loff_t pos)
			       unsigned long nr_segs, loff_t pos, bool uio)
{
	ssize_t result = -ENOMEM;
	struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
	if (!is_sync_kiocb(iocb))
		dreq->iocb = iocb;

	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
	if (!result)
		result = nfs_direct_wait(dreq);
	NFS_I(inode)->read_io += result;
@@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 */
static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
						 const struct iovec *iov,
						 loff_t pos)
						 loff_t pos, bool uio)
{
	struct nfs_direct_req *dreq = desc->pg_dreq;
	struct nfs_open_context *ctx = dreq->ctx;
@@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
		if (!pagevec)
			break;

		if (uio) {
			down_read(&current->mm->mmap_sem);
			result = get_user_pages(current, current->mm, user_addr,
						npages, 0, 0, pagevec, NULL);
			up_read(&current->mm->mmap_sem);
			if (result < 0)
				break;
		} else {
			WARN_ON(npages != 1);
			result = get_kernel_page(user_addr, 0, pagevec);
			if (WARN_ON(result != 1))
				break;
		}

		if ((unsigned)result < npages) {
			bytes = result * PAGE_SIZE;
@@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
					       const struct iovec *iov,
					       unsigned long nr_segs,
					       loff_t pos)
					       loff_t pos, bool uio)
{
	struct nfs_pageio_descriptor desc;
	struct inode *inode = dreq->inode;
@@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,

	for (seg = 0; seg < nr_segs; seg++) {
		const struct iovec *vec = &iov[seg];
		result = nfs_direct_write_schedule_segment(&desc, vec, pos);
		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
		if (result < 0)
			break;
		requested_bytes += result;
@@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,

static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos,
				size_t count)
				size_t count, bool uio)
{
	ssize_t result = -ENOMEM;
	struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
	if (!is_sync_kiocb(iocb))
		dreq->iocb = iocb;

	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
	if (!result)
		result = nfs_direct_wait(dreq);
out_release:
@@ -867,7 +893,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 * cache.
 */
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos)
				unsigned long nr_segs, loff_t pos, bool uio)
{
	ssize_t retval = -EINVAL;
	struct file *file = iocb->ki_filp;
@@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,

	task_io_account_read(count);

	retval = nfs_direct_read(iocb, iov, nr_segs, pos);
	retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
	if (retval > 0)
		iocb->ki_pos = pos + retval;

@@ -923,7 +949,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 * is no atomic O_APPEND write facility in the NFS protocol.
 */
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos)
				unsigned long nr_segs, loff_t pos, bool uio)
{
	ssize_t retval = -EINVAL;
	struct file *file = iocb->ki_filp;
@@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,

	task_io_account_write(count);

	retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
	retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
	if (retval > 0) {
		struct inode *inode = mapping->host;

+20 −2
Original line number Diff line number Diff line
@@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
	ssize_t result;

	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_read(iocb, iov, nr_segs, pos);
		return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);

	dprintk("NFS: read(%s/%s, %lu@%lu)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page)
	return nfs_wb_page(inode, page);
}

#ifdef CONFIG_NFS_SWAP
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
						sector_t *span)
{
	*span = sis->pages;
	return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
}

static void nfs_swap_deactivate(struct file *file)
{
	xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
}
#endif

const struct address_space_operations nfs_file_aops = {
	.readpage = nfs_readpage,
	.readpages = nfs_readpages,
@@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = {
	.migratepage = nfs_migrate_page,
	.launder_page = nfs_launder_page,
	.error_remove_page = generic_error_remove_page,
#ifdef CONFIG_NFS_SWAP
	.swap_activate = nfs_swap_activate,
	.swap_deactivate = nfs_swap_deactivate,
#endif
};

/*
@@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
	size_t count = iov_length(iov, nr_segs);

	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_write(iocb, iov, nr_segs, pos);
		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);

	dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
+2 −2
Original line number Diff line number Diff line
@@ -473,10 +473,10 @@ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
			unsigned long);
extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
			const struct iovec *iov, unsigned long nr_segs,
			loff_t pos);
			loff_t pos, bool uio);
extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
			const struct iovec *iov, unsigned long nr_segs,
			loff_t pos);
			loff_t pos, bool uio);

/*
 * linux/fs/nfs/dir.c
+3 −0
Original line number Diff line number Diff line
@@ -174,6 +174,8 @@ struct rpc_xprt {
	unsigned long		state;		/* transport state */
	unsigned char		shutdown   : 1,	/* being shut down */
				resvport   : 1; /* use a reserved port */
	unsigned int		swapper;	/* we're swapping over this
						   transport */
	unsigned int		bind_index;	/* bind function index */

	/*
@@ -316,6 +318,7 @@ void xprt_release_rqst_cong(struct rpc_task *task);
void			xprt_disconnect_done(struct rpc_xprt *xprt);
void			xprt_force_disconnect(struct rpc_xprt *xprt);
void			xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
int			xs_swapper(struct rpc_xprt *xprt, int enable);

/*
 * Reserved bit positions in xprt->state
Loading