Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 82b145c5 authored by Chuck Lever's avatar Chuck Lever Committed by Trond Myklebust
Browse files

NFS: alloc nfs_read/write_data as direct I/O is scheduled



Re-arrange the logic in the NFS direct I/O path so that nfs_read/write_data
structs are allocated just before they are scheduled, rather than
allocating them all at once before we start scheduling requests.

Signed-off-by: default avatarChuck Lever <cel@netapp.com>
Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent 06cf6f2e
Loading
Loading
Loading
Loading
+65 −145
Original line number Original line Diff line number Diff line
@@ -68,8 +68,6 @@ struct nfs_direct_req {
	struct kref		kref;		/* release manager */
	struct kref		kref;		/* release manager */


	/* I/O parameters */
	/* I/O parameters */
	struct list_head	list,		/* nfs_read/write_data structs */
				rewrite_list;	/* saved nfs_write_data structs */
	struct nfs_open_context	*ctx;		/* file open context info */
	struct nfs_open_context	*ctx;		/* file open context info */
	struct kiocb *		iocb;		/* controlling i/o request */
	struct kiocb *		iocb;		/* controlling i/o request */
	struct inode *		inode;		/* target file of i/o */
	struct inode *		inode;		/* target file of i/o */
@@ -82,6 +80,7 @@ struct nfs_direct_req {
	struct completion	completion;	/* wait for i/o completion */
	struct completion	completion;	/* wait for i/o completion */


	/* commit state */
	/* commit state */
	struct list_head	rewrite_list;	/* saved nfs_write_data structs */
	struct nfs_write_data *	commit_data;	/* special write_data for commits */
	struct nfs_write_data *	commit_data;	/* special write_data for commits */
	int			flags;
	int			flags;
#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
@@ -116,6 +115,11 @@ static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
	return page_count;
	return page_count;
}
}


static inline unsigned int nfs_max_pages(unsigned int size)
{
	return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}

/**
/**
 * nfs_direct_IO - NFS address space operation for direct I/O
 * nfs_direct_IO - NFS address space operation for direct I/O
 * @rw: direction (read or write)
 * @rw: direction (read or write)
@@ -164,8 +168,8 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
		return NULL;
		return NULL;


	kref_init(&dreq->kref);
	kref_init(&dreq->kref);
	kref_get(&dreq->kref);
	init_completion(&dreq->completion);
	init_completion(&dreq->completion);
	INIT_LIST_HEAD(&dreq->list);
	INIT_LIST_HEAD(&dreq->rewrite_list);
	INIT_LIST_HEAD(&dreq->rewrite_list);
	dreq->iocb = NULL;
	dreq->iocb = NULL;
	dreq->ctx = NULL;
	dreq->ctx = NULL;
@@ -227,49 +231,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
	kref_put(&dreq->kref, nfs_direct_req_release);
	kref_put(&dreq->kref, nfs_direct_req_release);
}
}


/*
 * Note we also set the number of requests we have in the dreq when we are
 * done.  This prevents races with I/O completion so we will always wait
 * until all requests have been dispatched and completed.
 */
static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
{
	struct list_head *list;
	struct nfs_direct_req *dreq;
	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;

	dreq = nfs_direct_req_alloc();
	if (!dreq)
		return NULL;

	list = &dreq->list;
	for(;;) {
		struct nfs_read_data *data = nfs_readdata_alloc(rpages);

		if (unlikely(!data)) {
			while (!list_empty(list)) {
				data = list_entry(list->next,
						  struct nfs_read_data, pages);
				list_del(&data->pages);
				nfs_readdata_free(data);
			}
			kref_put(&dreq->kref, nfs_direct_req_release);
			return NULL;
		}

		INIT_LIST_HEAD(&data->pages);
		list_add(&data->pages, list);

		data->req = (struct nfs_page *) dreq;
		get_dreq(dreq);
		if (nbytes <= rsize)
			break;
		nbytes -= rsize;
	}
	kref_get(&dreq->kref);
	return dreq;
}

/*
/*
 * We must hold a reference to all the pages in this direct read request
 * We must hold a reference to all the pages in this direct read request
 * until the RPCs complete.  This could be long *after* we are woken up in
 * until the RPCs complete.  This could be long *after* we are woken up in
@@ -305,42 +266,53 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
};
};


/*
/*
 * For each nfs_read_data struct that was allocated on the list, dispatch
 * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
 * an NFS READ operation.  If get_user_pages() fails, we stop sending reads.
 * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
 * Read length accounting is handled by nfs_direct_read_result().
 * bail and stop sending more reads.  Read length accounting is
 * Otherwise, if no requests have been sent, just return an error.
 * handled automatically by nfs_direct_read_result().  Otherwise, if
 * no requests have been sent, just return an error.
 */
 */
static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
{
{
	struct nfs_open_context *ctx = dreq->ctx;
	struct nfs_open_context *ctx = dreq->ctx;
	struct inode *inode = ctx->dentry->d_inode;
	struct inode *inode = ctx->dentry->d_inode;
	struct list_head *list = &dreq->list;
	size_t rsize = NFS_SERVER(inode)->rsize;
	size_t rsize = NFS_SERVER(inode)->rsize;
	unsigned int rpages = nfs_max_pages(rsize);
	unsigned int pgbase;
	unsigned int pgbase;
	int result;
	int result;
	ssize_t started = 0;
	ssize_t started = 0;
	struct nfs_read_data *data;

	get_dreq(dreq);


	pgbase = user_addr & ~PAGE_MASK;
	pgbase = user_addr & ~PAGE_MASK;
	do {
	do {
		struct nfs_read_data *data;
		size_t bytes;
		size_t bytes;


		result = -ENOMEM;
		data = nfs_readdata_alloc(rpages);
		if (unlikely(!data))
			break;

		bytes = rsize;
		bytes = rsize;
		if (count < rsize)
		if (count < rsize)
			bytes = count;
			bytes = count;


		BUG_ON(list_empty(list));
		data = list_entry(list->next, struct nfs_read_data, pages);
		list_del_init(&data->pages);

		data->npages = nfs_direct_count_pages(user_addr, bytes);
		data->npages = nfs_direct_count_pages(user_addr, bytes);
		down_read(&current->mm->mmap_sem);
		down_read(&current->mm->mmap_sem);
		result = get_user_pages(current, current->mm, user_addr,
		result = get_user_pages(current, current->mm, user_addr,
					data->npages, 1, 0, data->pagevec, NULL);
					data->npages, 1, 0, data->pagevec, NULL);
		up_read(&current->mm->mmap_sem);
		up_read(&current->mm->mmap_sem);
		if (unlikely(result < data->npages))
		if (unlikely(result < data->npages)) {
			goto out_err;
			if (result > 0)
				nfs_direct_release_pages(data->pagevec, result);
			nfs_readdata_release(data);
			break;
		}

		get_dreq(dreq);


		data->req = (struct nfs_page *) dreq;
		data->inode = inode;
		data->inode = inode;
		data->cred = ctx->cred;
		data->cred = ctx->cred;
		data->args.fh = NFS_FH(inode);
		data->args.fh = NFS_FH(inode);
@@ -378,21 +350,9 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo


		count -= bytes;
		count -= bytes;
	} while (count != 0);
	} while (count != 0);
	BUG_ON(!list_empty(list));
	return 0;


out_err:
	if (result > 0)
		nfs_direct_release_pages(data->pagevec, result);

	list_add(&data->pages, list);
	while (!list_empty(list)) {
		data = list_entry(list->next, struct nfs_read_data, pages);
		list_del(&data->pages);
		nfs_readdata_free(data);
	if (put_dreq(dreq))
	if (put_dreq(dreq))
		nfs_direct_complete(dreq);
		nfs_direct_complete(dreq);
	}


	if (started)
	if (started)
		return 0;
		return 0;
@@ -401,13 +361,13 @@ out_err:


static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
{
{
	ssize_t result;
	ssize_t result = 0;
	sigset_t oldset;
	sigset_t oldset;
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	struct rpc_clnt *clnt = NFS_CLIENT(inode);
	struct rpc_clnt *clnt = NFS_CLIENT(inode);
	struct nfs_direct_req *dreq;
	struct nfs_direct_req *dreq;


	dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
	dreq = nfs_direct_req_alloc();
	if (!dreq)
	if (!dreq)
		return -ENOMEM;
		return -ENOMEM;


@@ -428,9 +388,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size


static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
{
{
	list_splice_init(&dreq->rewrite_list, &dreq->list);
	while (!list_empty(&dreq->rewrite_list)) {
	while (!list_empty(&dreq->list)) {
		struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
		struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
		list_del(&data->pages);
		list_del(&data->pages);
		nfs_direct_release_pages(data->pagevec, data->npages);
		nfs_direct_release_pages(data->pagevec, data->npages);
		nfs_writedata_release(data);
		nfs_writedata_release(data);
@@ -584,47 +543,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
}
}
#endif
#endif


static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
{
	struct list_head *list;
	struct nfs_direct_req *dreq;
	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;

	dreq = nfs_direct_req_alloc();
	if (!dreq)
		return NULL;

	list = &dreq->list;
	for(;;) {
		struct nfs_write_data *data = nfs_writedata_alloc(wpages);

		if (unlikely(!data)) {
			while (!list_empty(list)) {
				data = list_entry(list->next,
						  struct nfs_write_data, pages);
				list_del(&data->pages);
				nfs_writedata_free(data);
			}
			kref_put(&dreq->kref, nfs_direct_req_release);
			return NULL;
		}

		INIT_LIST_HEAD(&data->pages);
		list_add(&data->pages, list);

		data->req = (struct nfs_page *) dreq;
		get_dreq(dreq);
		if (nbytes <= wsize)
			break;
		nbytes -= wsize;
	}

	nfs_alloc_commit_data(dreq);

	kref_get(&dreq->kref);
	return dreq;
}

static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
{
	struct nfs_write_data *data = calldata;
	struct nfs_write_data *data = calldata;
@@ -677,43 +595,55 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
};
};


/*
/*
 * For each nfs_write_data struct that was allocated on the list, dispatch
 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
 * an NFS WRITE operation.  If get_user_pages() fails, we stop sending writes.
 * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
 * Write length accounting is handled by nfs_direct_write_result().
 * bail and stop sending more writes.  Write length accounting is
 * Otherwise, if no requests have been sent, just return an error.
 * handled automatically by nfs_direct_write_result().  Otherwise, if
 * no requests have been sent, just return an error.
 */
 */
static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
{
{
	struct nfs_open_context *ctx = dreq->ctx;
	struct nfs_open_context *ctx = dreq->ctx;
	struct inode *inode = ctx->dentry->d_inode;
	struct inode *inode = ctx->dentry->d_inode;
	struct list_head *list = &dreq->list;
	size_t wsize = NFS_SERVER(inode)->wsize;
	size_t wsize = NFS_SERVER(inode)->wsize;
	unsigned int wpages = nfs_max_pages(wsize);
	unsigned int pgbase;
	unsigned int pgbase;
	int result;
	int result;
	ssize_t started = 0;
	ssize_t started = 0;
	struct nfs_write_data *data;

	get_dreq(dreq);


	pgbase = user_addr & ~PAGE_MASK;
	pgbase = user_addr & ~PAGE_MASK;
	do {
	do {
		struct nfs_write_data *data;
		size_t bytes;
		size_t bytes;


		result = -ENOMEM;
		data = nfs_writedata_alloc(wpages);
		if (unlikely(!data))
			break;

		bytes = wsize;
		bytes = wsize;
		if (count < wsize)
		if (count < wsize)
			bytes = count;
			bytes = count;


		BUG_ON(list_empty(list));
		data = list_entry(list->next, struct nfs_write_data, pages);

		data->npages = nfs_direct_count_pages(user_addr, bytes);
		data->npages = nfs_direct_count_pages(user_addr, bytes);
		down_read(&current->mm->mmap_sem);
		down_read(&current->mm->mmap_sem);
		result = get_user_pages(current, current->mm, user_addr,
		result = get_user_pages(current, current->mm, user_addr,
					data->npages, 0, 0, data->pagevec, NULL);
					data->npages, 0, 0, data->pagevec, NULL);
		up_read(&current->mm->mmap_sem);
		up_read(&current->mm->mmap_sem);
		if (unlikely(result < data->npages))
		if (unlikely(result < data->npages)) {
			goto out_err;
			if (result > 0)
				nfs_direct_release_pages(data->pagevec, result);
			nfs_writedata_release(data);
			break;
		}

		get_dreq(dreq);


		list_move_tail(&data->pages, &dreq->rewrite_list);
		list_move_tail(&data->pages, &dreq->rewrite_list);


		data->req = (struct nfs_page *) dreq;
		data->inode = inode;
		data->inode = inode;
		data->cred = ctx->cred;
		data->cred = ctx->cred;
		data->args.fh = NFS_FH(inode);
		data->args.fh = NFS_FH(inode);
@@ -752,21 +682,9 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l


		count -= bytes;
		count -= bytes;
	} while (count != 0);
	} while (count != 0);
	BUG_ON(!list_empty(list));
	return 0;

out_err:
	if (result > 0)
		nfs_direct_release_pages(data->pagevec, result);


	list_add(&data->pages, list);
	while (!list_empty(list)) {
		data = list_entry(list->next, struct nfs_write_data, pages);
		list_del(&data->pages);
		nfs_writedata_free(data);
	if (put_dreq(dreq))
	if (put_dreq(dreq))
		nfs_direct_write_complete(dreq, inode);
		nfs_direct_write_complete(dreq, inode);
	}


	if (started)
	if (started)
		return 0;
		return 0;
@@ -775,7 +693,7 @@ out_err:


static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
{
{
	ssize_t result;
	ssize_t result = 0;
	sigset_t oldset;
	sigset_t oldset;
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	struct inode *inode = iocb->ki_filp->f_mapping->host;
	struct rpc_clnt *clnt = NFS_CLIENT(inode);
	struct rpc_clnt *clnt = NFS_CLIENT(inode);
@@ -783,9 +701,11 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
	size_t wsize = NFS_SERVER(inode)->wsize;
	size_t wsize = NFS_SERVER(inode)->wsize;
	int sync = 0;
	int sync = 0;


	dreq = nfs_direct_write_alloc(count, wsize);
	dreq = nfs_direct_req_alloc();
	if (!dreq)
	if (!dreq)
		return -ENOMEM;
		return -ENOMEM;
	nfs_alloc_commit_data(dreq);

	if (dreq->commit_data == NULL || count < wsize)
	if (dreq->commit_data == NULL || count < wsize)
		sync = FLUSH_STABLE;
		sync = FLUSH_STABLE;