Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 40859d7e authored by Chuck Lever's avatar Chuck Lever Committed by Trond Myklebust
Browse files

NFS: support large reads and writes on the wire



 Most NFS server implementations allow up to 64KB reads and writes on the
 wire.  The Solaris NFS server allows up to a megabyte, for instance.

 Now the Linux NFS client supports transfer sizes up to 1MB, too.  This will
 help reduce protocol and context switch overhead on read/write intensive NFS
 workloads, and support larger atomic read and write operations on servers
 that support them.

 Test-plan:
 Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
 Tests with NFS over UDP to verify the maximum RPC payload size cap.

 Signed-off-by: default avatarChuck Lever <cel@netapp.com>
 Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent 325cfed9
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
	struct list_head *list;
	struct nfs_direct_req *dreq;
	unsigned int reads = 0;
	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;

	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
	if (!dreq)
@@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int

	list = &dreq->list;
	for(;;) {
		struct nfs_read_data *data = nfs_readdata_alloc();
		struct nfs_read_data *data = nfs_readdata_alloc(rpages);

		if (unlikely(!data)) {
			while (!list_empty(list)) {
@@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode,
	struct nfs_writeverf first_verf;
	struct nfs_write_data *wdata;

	wdata = nfs_writedata_alloc();
	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
	if (!wdata)
		return -ENOMEM;

+10 −15
Original line number Diff line number Diff line
@@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize)
static inline unsigned long
nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
{
	if (bsize < 1024)
		bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
	else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
		bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
	if (bsize < NFS_MIN_FILE_IO_SIZE)
		bsize = NFS_DEF_FILE_IO_SIZE;
	else if (bsize >= NFS_MAX_FILE_IO_SIZE)
		bsize = NFS_MAX_FILE_IO_SIZE;

	return nfs_block_bits(bsize, nrbitsp);
}
@@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
	if (server->rsize > max_rpc_payload)
		server->rsize = max_rpc_payload;
	if (server->wsize > max_rpc_payload)
		server->wsize = max_rpc_payload;

	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
		server->rsize = NFS_MAX_FILE_IO_SIZE;
	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	if (server->rpages > NFS_READ_MAXIOV) {
		server->rpages = NFS_READ_MAXIOV;
		server->rsize = server->rpages << PAGE_CACHE_SHIFT;
	}

	if (server->wsize > max_rpc_payload)
		server->wsize = max_rpc_payload;
	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
		server->wsize = NFS_MAX_FILE_IO_SIZE;
	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        if (server->wpages > NFS_WRITE_MAXIOV) {
		server->wpages = NFS_WRITE_MAXIOV;
                server->wsize = server->wpages << PAGE_CACHE_SHIFT;
	}

	if (sb->s_blocksize == 0)
		sb->s_blocksize = nfs_block_bits(server->wsize,
+2 −2
Original line number Diff line number Diff line
@@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name)
	nfs_port          = -1;
	nfs_data.version  = NFS_MOUNT_VERSION;
	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
	nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
	nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
	nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;
	nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE;
	nfs_data.acregmin = 3;
	nfs_data.acregmax = 60;
	nfs_data.acdirmin = 30;
+3 −3
Original line number Diff line number Diff line
@@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
	int		result;
	struct nfs_read_data *rdata;

	rdata = nfs_readdata_alloc();
	rdata = nfs_readdata_alloc(1);
	if (!rdata)
		return -ENOMEM;

@@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)

	nbytes = req->wb_bytes;
	for(;;) {
		data = nfs_readdata_alloc();
		data = nfs_readdata_alloc(1);
		if (!data)
			goto out_bad;
		INIT_LIST_HEAD(&data->pages);
@@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
		return nfs_pagein_multi(head, inode);

	data = nfs_readdata_alloc();
	data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
	if (!data)
		goto out_bad;

+22 −7
Original line number Diff line number Diff line
@@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool;

static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);

static inline struct nfs_write_data *nfs_commit_alloc(void)
static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);

	if (p) {
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
		if (pagecount < NFS_PAGEVEC_SIZE)
			p->pagevec = &p->page_array[0];
		else {
			size_t size = ++pagecount * sizeof(struct page *);
			p->pagevec = kmalloc(size, GFP_NOFS);
			if (p->pagevec) {
				memset(p->pagevec, 0, size);
			} else {
				mempool_free(p, nfs_commit_mempool);
				p = NULL;
			}
		}
	}
	return p;
}

static inline void nfs_commit_free(struct nfs_write_data *p)
{
	if (p && (p->pagevec != &p->page_array[0]))
		kfree(p->pagevec);
	mempool_free(p, nfs_commit_mempool);
}

@@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
	int		result, written = 0;
	struct nfs_write_data *wdata;

	wdata = nfs_writedata_alloc();
	wdata = nfs_writedata_alloc(1);
	if (!wdata)
		return -ENOMEM;

@@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)

	nbytes = req->wb_bytes;
	for (;;) {
		data = nfs_writedata_alloc();
		data = nfs_writedata_alloc(1);
		if (!data)
			goto out_bad;
		list_add(&data->pages, &list);
@@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
		return nfs_flush_multi(head, inode, how);

	data = nfs_writedata_alloc();
	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
	if (!data)
		goto out_bad;

@@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 * Commit dirty pages
 */
static int
nfs_commit_list(struct list_head *head, int how)
nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
	struct nfs_write_data	*data;
	struct nfs_page         *req;

	data = nfs_commit_alloc();
	data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);

	if (!data)
		goto out_bad;
@@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how)
	res = nfs_scan_commit(inode, &head, 0, 0);
	spin_unlock(&nfsi->req_lock);
	if (res) {
		error = nfs_commit_list(&head, how);
		error = nfs_commit_list(inode, &head, how);
		if (error < 0)
			return error;
	}
Loading