Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 561f0ed4 authored by J. Bruce Fields's avatar J. Bruce Fields
Browse files

nfsd4: allow large readdirs



Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: default avatarJ. Bruce Fields <bfields@redhat.com>
parent 32aaa62e
Loading
Loading
Loading
Loading
+5 −4
Original line number Original line Diff line number Diff line
@@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)


static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
{
	u32 maxcount = svc_max_payload(rqstp);
	u32 rlen = op->u.readdir.rd_maxcount;
	u32 rlen = op->u.readdir.rd_maxcount;


	if (rlen > PAGE_SIZE)
	if (rlen > maxcount)
		rlen = PAGE_SIZE;
		rlen = maxcount;


	return (op_encode_hdr_size + op_encode_verifier_maxsz)
	return (op_encode_hdr_size + op_encode_verifier_maxsz +
		 * sizeof(__be32) + rlen;
		XDR_QUADLEN(rlen)) * sizeof(__be32);
}
}


static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+75 −62
Original line number Original line Diff line number Diff line
@@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
}
}


static __be32
static __be32
nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
		const char *name, int namlen, __be32 **p, int buflen)
			const char *name, int namlen)
{
{
	struct svc_export *exp = cd->rd_fhp->fh_export;
	struct svc_export *exp = cd->rd_fhp->fh_export;
	struct dentry *dentry;
	struct dentry *dentry;
@@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,


	}
	}
out_encode:
out_encode:
	nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry,
	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
					cd->rd_bmval,
					cd->rd_rqstp, ignore_crossmnt);
					cd->rd_rqstp, ignore_crossmnt);
out_put:
out_put:
	dput(dentry);
	dput(dentry);
@@ -2638,9 +2637,12 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
}
}


static __be32 *
static __be32 *
nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
{
	if (buflen < 6)
	__be32 *p;

	p = xdr_reserve_space(xdr, 6);
	if (!p)
		return NULL;
		return NULL;
	*p++ = htonl(2);
	*p++ = htonl(2);
	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
{
{
	struct readdir_cd *ccd = ccdv;
	struct readdir_cd *ccd = ccdv;
	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
	int buflen;
	struct xdr_stream *xdr = cd->xdr;
	__be32 *p = cd->buffer;
	int start_offset = xdr->buf->len;
	__be32 *cookiep;
	int cookie_offset;
	int entry_bytes;
	__be32 nfserr = nfserr_toosmall;
	__be32 nfserr = nfserr_toosmall;
	__be64 wire_offset;
	__be32 *p;


	/* In nfsv4, "." and ".." never make it onto the wire.. */
	/* In nfsv4, "." and ".." never make it onto the wire.. */
	if (name && isdotent(name, namlen)) {
	if (name && isdotent(name, namlen)) {
@@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
		return 0;
		return 0;
	}
	}


	if (cd->offset)
	if (cd->cookie_offset) {
		xdr_encode_hyper(cd->offset, (u64) offset);
		wire_offset = cpu_to_be64(offset);
		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
							&wire_offset, 8);
	}


	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
	p = xdr_reserve_space(xdr, 4);
	if (buflen < 0)
	if (!p)
		goto fail;
		goto fail;

	*p++ = xdr_one;                             /* mark entry present */
	*p++ = xdr_one;                             /* mark entry present */
	cookiep = p;
	cookie_offset = xdr->buf->len;
	p = xdr_reserve_space(xdr, 3*4 + namlen);
	if (!p)
		goto fail;
	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
	p = xdr_encode_array(p, name, namlen);      /* name length & name */
	p = xdr_encode_array(p, name, namlen);      /* name length & name */


	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
	switch (nfserr) {
	switch (nfserr) {
	case nfs_ok:
	case nfs_ok:
		break;
		break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
		 */
		 */
		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
			goto fail;
			goto fail;
		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
		p = nfsd4_encode_rdattr_error(xdr, nfserr);
		if (p == NULL) {
		if (p == NULL) {
			nfserr = nfserr_toosmall;
			nfserr = nfserr_toosmall;
			goto fail;
			goto fail;
		}
		}
	}
	}
	cd->buflen -= (p - cd->buffer);
	nfserr = nfserr_toosmall;
	cd->buffer = p;
	entry_bytes = xdr->buf->len - start_offset;
	cd->offset = cookiep;
	if (entry_bytes > cd->rd_maxcount)
		goto fail;
	cd->rd_maxcount -= entry_bytes;
	cd->cookie_offset = cookie_offset;
skip_entry:
skip_entry:
	cd->common.err = nfs_ok;
	cd->common.err = nfs_ok;
	return 0;
	return 0;
fail:
fail:
	xdr_truncate_encode(xdr, start_offset);
	cd->common.err = nfserr;
	cd->common.err = nfserr;
	return -EINVAL;
	return -EINVAL;
}
}
@@ -3206,10 +3220,11 @@ static __be32
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
{
{
	int maxcount;
	int maxcount;
	int bytes_left;
	loff_t offset;
	loff_t offset;
	__be64 wire_offset;
	struct xdr_stream *xdr = &resp->xdr;
	struct xdr_stream *xdr = &resp->xdr;
	int starting_len = xdr->buf->len;
	int starting_len = xdr->buf->len;
	__be32 *page, *tailbase;
	__be32 *p;
	__be32 *p;


	if (nfserr)
	if (nfserr)
@@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
	if (!p)
	if (!p)
		return nfserr_resource;
		return nfserr_resource;


	if (resp->xdr.buf->page_len)
		return nfserr_resource;
	if (!*resp->rqstp->rq_next_page)
		return nfserr_resource;

	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
	WRITE32(0);
	WRITE32(0);
	WRITE32(0);
	WRITE32(0);
	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
				- (char *)resp->xdr.buf->head[0].iov_base;
				- (char *)resp->xdr.buf->head[0].iov_base;
	tailbase = p;

	maxcount = PAGE_SIZE;
	if (maxcount > readdir->rd_maxcount)
		maxcount = readdir->rd_maxcount;


	/*
	/*
	 * Convert from bytes to words, account for the two words already
	 * Number of bytes left for directory entries allowing for the
	 * written, make sure to leave two words at the end for the next
	 * final 8 bytes of the readdir and a following failed op:
	 * pointer and eof field.
	 */
	bytes_left = xdr->buf->buflen - xdr->buf->len
			- COMPOUND_ERR_SLACK_SPACE - 8;
	if (bytes_left < 0) {
		nfserr = nfserr_resource;
		goto err_no_verf;
	}
	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
	/*
	 * Note the rfc defines rd_maxcount as the size of the
	 * READDIR4resok structure, which includes the verifier above
	 * and the 8 bytes encoded at the end of this function:
	 */
	 */
	maxcount = (maxcount >> 2) - 4;
	if (maxcount < 16) {
	if (maxcount < 0) {
		nfserr = nfserr_toosmall;
		nfserr = nfserr_toosmall;
		goto err_no_verf;
		goto err_no_verf;
	}
	}
	maxcount = min_t(int, maxcount-16, bytes_left);


	page = page_address(*(resp->rqstp->rq_next_page++));
	readdir->xdr = xdr;
	readdir->rd_maxcount = maxcount;
	readdir->common.err = 0;
	readdir->common.err = 0;
	readdir->buflen = maxcount;
	readdir->cookie_offset = 0;
	readdir->buffer = page;
	readdir->offset = NULL;


	offset = readdir->rd_cookie;
	offset = readdir->rd_cookie;
	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
			      &readdir->common, nfsd4_encode_dirent);
			      &readdir->common, nfsd4_encode_dirent);
	if (nfserr == nfs_ok &&
	if (nfserr == nfs_ok &&
	    readdir->common.err == nfserr_toosmall &&
	    readdir->common.err == nfserr_toosmall &&
	    readdir->buffer == page) 
	    xdr->buf->len == starting_len + 8) {
		/* nothing encoded; which limit did we hit?: */
		if (maxcount - 16 < bytes_left)
			/* It was the fault of rd_maxcount: */
			nfserr = nfserr_toosmall;
			nfserr = nfserr_toosmall;
		else
			/* We ran out of buffer space: */
			nfserr = nfserr_resource;
	}
	if (nfserr)
	if (nfserr)
		goto err_no_verf;
		goto err_no_verf;


	if (readdir->offset)
	if (readdir->cookie_offset) {
		xdr_encode_hyper(readdir->offset, offset);
		wire_offset = cpu_to_be64(offset);
		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
							&wire_offset, 8);
	}


	p = readdir->buffer;
	p = xdr_reserve_space(xdr, 8);
	if (!p) {
		WARN_ON_ONCE(1);
		goto err_no_verf;
	}
	*p++ = 0;	/* no more entries */
	*p++ = 0;	/* no more entries */
	*p++ = htonl(readdir->common.err == nfserr_eof);
	*p++ = htonl(readdir->common.err == nfserr_eof);
	resp->xdr.buf->page_len = ((char *)p) -
		(char*)page_address(*(resp->rqstp->rq_next_page-1));
	xdr->buf->len += xdr->buf->page_len;

	xdr->iov = xdr->buf->tail;

	xdr->page_ptr++;
	xdr->buf->buflen -= PAGE_SIZE;
	xdr->iov = xdr->buf->tail;

	/* Use rest of head for padding and remaining ops: */
	resp->xdr.buf->tail[0].iov_base = tailbase;
	resp->xdr.buf->tail[0].iov_len = 0;
	resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
	resp->xdr.end = resp->xdr.p +
			(PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;


	return 0;
	return 0;
err_no_verf:
err_no_verf:
+2 −3
Original line number Original line Diff line number Diff line
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
	struct svc_fh * rd_fhp;             /* response */
	struct svc_fh * rd_fhp;             /* response */


	struct readdir_cd	common;
	struct readdir_cd	common;
	__be32 *		buffer;
	struct xdr_stream	*xdr;
	int			buflen;
	int			cookie_offset;
	__be32 *		offset;
};
};


struct nfsd4_release_lockowner {
struct nfsd4_release_lockowner {