Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e9f7bee1 authored by Trond Myklebust's avatar Trond Myklebust Committed by Linus Torvalds
Browse files

[PATCH] NFS: large non-page-aligned direct I/O clobbers memory



The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can
allow data->npages to be one larger than rpages.  This causes a page
pointer to be written beyond the end of the pagevec in nfs_read_data (or
nfs_write_data).

Fix this by making nfs_(read|write)_alloc() calculate the size of the
pagevec array, and initialise data->npages.

Also get rid of the redundant argument to nfs_commit_alloc().

Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 016eb4a0
Loading
Loading
Loading
Loading
+14 −36
Original line number Original line Diff line number Diff line
@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
	return atomic_dec_and_test(&dreq->io_count);
	return atomic_dec_and_test(&dreq->io_count);
}
}


/*
 * "size" is never larger than rsize or wsize.
 */
static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
{
	int page_count;

	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
	page_count -= user_addr >> PAGE_SHIFT;
	BUG_ON(page_count < 0);

	return page_count;
}

static inline unsigned int nfs_max_pages(unsigned int size)
{
	return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}

/**
/**
 * nfs_direct_IO - NFS address space operation for direct I/O
 * nfs_direct_IO - NFS address space operation for direct I/O
 * @rw: direction (read or write)
 * @rw: direction (read or write)
@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
	struct nfs_open_context *ctx = dreq->ctx;
	struct nfs_open_context *ctx = dreq->ctx;
	struct inode *inode = ctx->dentry->d_inode;
	struct inode *inode = ctx->dentry->d_inode;
	size_t rsize = NFS_SERVER(inode)->rsize;
	size_t rsize = NFS_SERVER(inode)->rsize;
	unsigned int rpages = nfs_max_pages(rsize);
	unsigned int pgbase;
	unsigned int pgbase;
	int result;
	int result;
	ssize_t started = 0;
	ssize_t started = 0;


	get_dreq(dreq);
	get_dreq(dreq);


	pgbase = user_addr & ~PAGE_MASK;
	do {
	do {
		struct nfs_read_data *data;
		struct nfs_read_data *data;
		size_t bytes;
		size_t bytes;


		pgbase = user_addr & ~PAGE_MASK;
		bytes = min(rsize,count);

		result = -ENOMEM;
		result = -ENOMEM;
		data = nfs_readdata_alloc(rpages);
		data = nfs_readdata_alloc(pgbase + bytes);
		if (unlikely(!data))
		if (unlikely(!data))
			break;
			break;


		bytes = rsize;
		if (count < rsize)
			bytes = count;

		data->npages = nfs_direct_count_pages(user_addr, bytes);
		down_read(&current->mm->mmap_sem);
		down_read(&current->mm->mmap_sem);
		result = get_user_pages(current, current->mm, user_addr,
		result = get_user_pages(current, current->mm, user_addr,
					data->npages, 1, 0, data->pagevec, NULL);
					data->npages, 1, 0, data->pagevec, NULL);
@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
		started += bytes;
		started += bytes;
		user_addr += bytes;
		user_addr += bytes;
		pos += bytes;
		pos += bytes;
		/* FIXME: Remove this unnecessary math from final patch */
		pgbase += bytes;
		pgbase += bytes;
		pgbase &= ~PAGE_MASK;
		pgbase &= ~PAGE_MASK;
		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));


		count -= bytes;
		count -= bytes;
	} while (count != 0);
	} while (count != 0);
@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode


static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
{
{
	dreq->commit_data = nfs_commit_alloc(0);
	dreq->commit_data = nfs_commit_alloc();
	if (dreq->commit_data != NULL)
	if (dreq->commit_data != NULL)
		dreq->commit_data->req = (struct nfs_page *) dreq;
		dreq->commit_data->req = (struct nfs_page *) dreq;
}
}
@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
	struct nfs_open_context *ctx = dreq->ctx;
	struct nfs_open_context *ctx = dreq->ctx;
	struct inode *inode = ctx->dentry->d_inode;
	struct inode *inode = ctx->dentry->d_inode;
	size_t wsize = NFS_SERVER(inode)->wsize;
	size_t wsize = NFS_SERVER(inode)->wsize;
	unsigned int wpages = nfs_max_pages(wsize);
	unsigned int pgbase;
	unsigned int pgbase;
	int result;
	int result;
	ssize_t started = 0;
	ssize_t started = 0;


	get_dreq(dreq);
	get_dreq(dreq);


	pgbase = user_addr & ~PAGE_MASK;
	do {
	do {
		struct nfs_write_data *data;
		struct nfs_write_data *data;
		size_t bytes;
		size_t bytes;


		pgbase = user_addr & ~PAGE_MASK;
		bytes = min(wsize,count);

		result = -ENOMEM;
		result = -ENOMEM;
		data = nfs_writedata_alloc(wpages);
		data = nfs_writedata_alloc(pgbase + bytes);
		if (unlikely(!data))
		if (unlikely(!data))
			break;
			break;


		bytes = wsize;
		if (count < wsize)
			bytes = count;

		data->npages = nfs_direct_count_pages(user_addr, bytes);
		down_read(&current->mm->mmap_sem);
		down_read(&current->mm->mmap_sem);
		result = get_user_pages(current, current->mm, user_addr,
		result = get_user_pages(current, current->mm, user_addr,
					data->npages, 0, 0, data->pagevec, NULL);
					data->npages, 0, 0, data->pagevec, NULL);
@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
		started += bytes;
		started += bytes;
		user_addr += bytes;
		user_addr += bytes;
		pos += bytes;
		pos += bytes;

		/* FIXME: Remove this useless math from the final patch */
		pgbase += bytes;
		pgbase += bytes;
		pgbase &= ~PAGE_MASK;
		pgbase &= ~PAGE_MASK;
		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));


		count -= bytes;
		count -= bytes;
	} while (count != 0);
	} while (count != 0);
+13 −11
Original line number Original line Diff line number Diff line
@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool;


#define MIN_POOL_READ	(32)
#define MIN_POOL_READ	(32)


struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
struct nfs_read_data *nfs_readdata_alloc(size_t len)
{
{
	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);


	if (p) {
	if (p) {
		memset(p, 0, sizeof(*p));
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
		INIT_LIST_HEAD(&p->pages);
		p->npages = pagecount;
		if (pagecount <= ARRAY_SIZE(p->page_array))
		if (pagecount <= ARRAY_SIZE(p->page_array))
			p->pagevec = p->page_array;
			p->pagevec = p->page_array;
		else {
		else {
@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
	int		result;
	int		result;
	struct nfs_read_data *rdata;
	struct nfs_read_data *rdata;


	rdata = nfs_readdata_alloc(1);
	rdata = nfs_readdata_alloc(count);
	if (!rdata)
	if (!rdata)
		return -ENOMEM;
		return -ENOMEM;


@@ -336,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
	struct nfs_page *req = nfs_list_entry(head->next);
	struct nfs_page *req = nfs_list_entry(head->next);
	struct page *page = req->wb_page;
	struct page *page = req->wb_page;
	struct nfs_read_data *data;
	struct nfs_read_data *data;
	unsigned int rsize = NFS_SERVER(inode)->rsize;
	size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
	unsigned int nbytes, offset;
	unsigned int offset;
	int requests = 0;
	int requests = 0;
	LIST_HEAD(list);
	LIST_HEAD(list);


	nfs_list_remove_request(req);
	nfs_list_remove_request(req);


	nbytes = req->wb_bytes;
	nbytes = req->wb_bytes;
	for(;;) {
	do {
		data = nfs_readdata_alloc(1);
		size_t len = min(nbytes,rsize);

		data = nfs_readdata_alloc(len);
		if (!data)
		if (!data)
			goto out_bad;
			goto out_bad;
		INIT_LIST_HEAD(&data->pages);
		INIT_LIST_HEAD(&data->pages);
		list_add(&data->pages, &list);
		list_add(&data->pages, &list);
		requests++;
		requests++;
		if (nbytes <= rsize)
		nbytes -= len;
			break;
	} while(nbytes != 0);
		nbytes -= rsize;
	}
	atomic_set(&req->wb_complete, requests);
	atomic_set(&req->wb_complete, requests);


	ClearPageError(page);
	ClearPageError(page);
@@ -402,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
		return nfs_pagein_multi(head, inode);
		return nfs_pagein_multi(head, inode);


	data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
	data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
	if (!data)
	if (!data)
		goto out_bad;
		goto out_bad;


+15 −22
Original line number Original line Diff line number Diff line
@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool;


static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);


struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
struct nfs_write_data *nfs_commit_alloc(void)
{
{
	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);


	if (p) {
	if (p) {
		memset(p, 0, sizeof(*p));
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
		INIT_LIST_HEAD(&p->pages);
		if (pagecount <= ARRAY_SIZE(p->page_array))
			p->pagevec = p->page_array;
		else {
			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
			if (!p->pagevec) {
				mempool_free(p, nfs_commit_mempool);
				p = NULL;
			}
		}
	}
	}
	return p;
	return p;
}
}
@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p)
	mempool_free(p, nfs_commit_mempool);
	mempool_free(p, nfs_commit_mempool);
}
}


struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
struct nfs_write_data *nfs_writedata_alloc(size_t len)
{
{
	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);


	if (p) {
	if (p) {
		memset(p, 0, sizeof(*p));
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
		INIT_LIST_HEAD(&p->pages);
		p->npages = pagecount;
		if (pagecount <= ARRAY_SIZE(p->page_array))
		if (pagecount <= ARRAY_SIZE(p->page_array))
			p->pagevec = p->page_array;
			p->pagevec = p->page_array;
		else {
		else {
@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
	int		result, written = 0;
	int		result, written = 0;
	struct nfs_write_data *wdata;
	struct nfs_write_data *wdata;


	wdata = nfs_writedata_alloc(1);
	wdata = nfs_writedata_alloc(wsize);
	if (!wdata)
	if (!wdata)
		return -ENOMEM;
		return -ENOMEM;


@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
	struct nfs_page *req = nfs_list_entry(head->next);
	struct nfs_page *req = nfs_list_entry(head->next);
	struct page *page = req->wb_page;
	struct page *page = req->wb_page;
	struct nfs_write_data *data;
	struct nfs_write_data *data;
	unsigned int wsize = NFS_SERVER(inode)->wsize;
	size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
	unsigned int nbytes, offset;
	unsigned int offset;
	int requests = 0;
	int requests = 0;
	LIST_HEAD(list);
	LIST_HEAD(list);


	nfs_list_remove_request(req);
	nfs_list_remove_request(req);


	nbytes = req->wb_bytes;
	nbytes = req->wb_bytes;
	for (;;) {
	do {
		data = nfs_writedata_alloc(1);
		size_t len = min(nbytes, wsize);

		data = nfs_writedata_alloc(len);
		if (!data)
		if (!data)
			goto out_bad;
			goto out_bad;
		list_add(&data->pages, &list);
		list_add(&data->pages, &list);
		requests++;
		requests++;
		if (nbytes <= wsize)
		nbytes -= len;
			break;
	} while (nbytes != 0);
		nbytes -= wsize;
	}
	atomic_set(&req->wb_complete, requests);
	atomic_set(&req->wb_complete, requests);


	ClearPageError(page);
	ClearPageError(page);
@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
	struct nfs_write_data	*data;
	struct nfs_write_data	*data;
	unsigned int		count;
	unsigned int		count;


	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
	data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
	if (!data)
	if (!data)
		goto out_bad;
		goto out_bad;


@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
	struct nfs_write_data	*data;
	struct nfs_write_data	*data;
	struct nfs_page         *req;
	struct nfs_page         *req;


	data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
	data = nfs_commit_alloc();


	if (!data)
	if (!data)
		goto out_bad;
		goto out_bad;
+3 −3
Original line number Original line Diff line number Diff line
@@ -427,7 +427,7 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
extern void nfs_writedata_release(void *);
extern void nfs_writedata_release(void *);


#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
struct nfs_write_data *nfs_commit_alloc(void);
void nfs_commit_free(struct nfs_write_data *p);
void nfs_commit_free(struct nfs_write_data *p);
#endif
#endif


@@ -478,7 +478,7 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
/*
/*
 * Allocate nfs_write_data structures
 * Allocate nfs_write_data structures
 */
 */
extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
extern struct nfs_write_data *nfs_writedata_alloc(size_t len);


/*
/*
 * linux/fs/nfs/read.c
 * linux/fs/nfs/read.c
@@ -492,7 +492,7 @@ extern void nfs_readdata_release(void *data);
/*
/*
 * Allocate nfs_read_data structures
 * Allocate nfs_read_data structures
 */
 */
extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
extern struct nfs_read_data *nfs_readdata_alloc(size_t len);


/*
/*
 * linux/fs/nfs3proc.c
 * linux/fs/nfs3proc.c
+2 −2
Original line number Original line Diff line number Diff line
@@ -729,7 +729,7 @@ struct nfs_read_data {
	struct list_head	pages;	/* Coalesced read requests */
	struct list_head	pages;	/* Coalesced read requests */
	struct nfs_page		*req;	/* multi ops per nfs_page */
	struct nfs_page		*req;	/* multi ops per nfs_page */
	struct page		**pagevec;
	struct page		**pagevec;
	unsigned int		npages;	/* active pages in pagevec */
	unsigned int		npages;	/* Max length of pagevec */
	struct nfs_readargs args;
	struct nfs_readargs args;
	struct nfs_readres  res;
	struct nfs_readres  res;
#ifdef CONFIG_NFS_V4
#ifdef CONFIG_NFS_V4
@@ -748,7 +748,7 @@ struct nfs_write_data {
	struct list_head	pages;		/* Coalesced requests we wish to flush */
	struct list_head	pages;		/* Coalesced requests we wish to flush */
	struct nfs_page		*req;		/* multi ops per nfs_page */
	struct nfs_page		*req;		/* multi ops per nfs_page */
	struct page		**pagevec;
	struct page		**pagevec;
	unsigned int		npages;		/* active pages in pagevec */
	unsigned int		npages;		/* Max length of pagevec */
	struct nfs_writeargs	args;		/* argument struct */
	struct nfs_writeargs	args;		/* argument struct */
	struct nfs_writeres	res;		/* result struct */
	struct nfs_writeres	res;		/* result struct */
#ifdef CONFIG_NFS_V4
#ifdef CONFIG_NFS_V4