Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03158cd7 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds
Browse files

fs: restore nobh



Implement nobh in new aops.  This is a bit tricky.  FWIW, nobh_truncate is
now implemented in a way that does not create blocks in sparse regions,
which is a silly thing for it to have been doing (isn't it?)

ext2 survives fsx and fsstress. jfs is converted as well... ext3
should be easy to do (but not done yet).

[akpm@linux-foundation.org: coding-style fixes]
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b6af1bcd
Loading
Loading
Loading
Loading
+150 −79
Original line number Original line Diff line number Diff line
@@ -2369,7 +2369,7 @@ out_unlock:
}
}


/*
/*
 * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
 * nobh_write_begin()'s prereads are special: the buffer_heads are freed
 * immediately, while under the page lock.  So it needs a special end_io
 * immediately, while under the page lock.  So it needs a special end_io
 * handler which does not touch the bh after unlocking it.
 * handler which does not touch the bh after unlocking it.
 */
 */
@@ -2378,17 +2378,46 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
	__end_buffer_read_notouch(bh, uptodate);
	__end_buffer_read_notouch(bh, uptodate);
}
}


/*
 * Attach the singly-linked list of buffers created by nobh_write_begin, to
 * the page (converting it to circular linked list and taking care of page
 * dirty races).
 */
static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
{
	struct buffer_head *bh;

	BUG_ON(!PageLocked(page));

	spin_lock(&page->mapping->private_lock);
	bh = head;
	do {
		if (PageDirty(page))
			set_buffer_dirty(bh);
		if (!bh->b_this_page)
			bh->b_this_page = head;
		bh = bh->b_this_page;
	} while (bh != head);
	attach_page_buffers(page, head);
	spin_unlock(&page->mapping->private_lock);
}

/*
/*
 * On entry, the page is fully not uptodate.
 * On entry, the page is fully not uptodate.
 * On exit the page is fully uptodate in the areas outside (from,to)
 * On exit the page is fully uptodate in the areas outside (from,to)
 */
 */
int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
int nobh_write_begin(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned flags,
			struct page **pagep, void **fsdata,
			get_block_t *get_block)
			get_block_t *get_block)
{
{
	struct inode *inode = page->mapping->host;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	const unsigned blocksize = 1 << blkbits;
	struct buffer_head *head, *bh;
	struct buffer_head *head, *bh;
	struct page *page;
	pgoff_t index;
	unsigned from, to;
	unsigned block_in_page;
	unsigned block_in_page;
	unsigned block_start, block_end;
	unsigned block_start, block_end;
	sector_t block_in_file;
	sector_t block_in_file;
@@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
	int ret = 0;
	int ret = 0;
	int is_mapped_to_disk = 1;
	int is_mapped_to_disk = 1;


	if (page_has_buffers(page))
	index = pos >> PAGE_CACHE_SHIFT;
		return block_prepare_write(page, from, to, get_block);
	from = pos & (PAGE_CACHE_SIZE - 1);
	to = from + len;

	page = __grab_cache_page(mapping, index);
	if (!page)
		return -ENOMEM;
	*pagep = page;
	*fsdata = NULL;

	if (page_has_buffers(page)) {
		unlock_page(page);
		page_cache_release(page);
		*pagep = NULL;
		return block_write_begin(file, mapping, pos, len, flags, pagep,
					fsdata, get_block);
	}


	if (PageMappedToDisk(page))
	if (PageMappedToDisk(page))
		return 0;
		return 0;
@@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
	 * than the circular one we're used to.
	 * than the circular one we're used to.
	 */
	 */
	head = alloc_page_buffers(page, blocksize, 0);
	head = alloc_page_buffers(page, blocksize, 0);
	if (!head)
	if (!head) {
		return -ENOMEM;
		ret = -ENOMEM;
		goto out_release;
	}


	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);


@@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
	if (is_mapped_to_disk)
	if (is_mapped_to_disk)
		SetPageMappedToDisk(page);
		SetPageMappedToDisk(page);


	do {
	*fsdata = head; /* to be released by nobh_write_end */
		bh = head;
		head = head->b_this_page;
		free_buffer_head(bh);
	} while (head);


	return 0;
	return 0;


failed:
failed:
	BUG_ON(!ret);
	/*
	/*
	 * Error recovery is a bit difficult. We need to zero out blocks that
	 * Error recovery is a bit difficult. We need to zero out blocks that
	 * were newly allocated, and dirty them to ensure they get written out.
	 * were newly allocated, and dirty them to ensure they get written out.
@@ -2499,64 +2542,57 @@ failed:
	 * the handling of potential IO errors during writeout would be hard
	 * the handling of potential IO errors during writeout would be hard
	 * (could try doing synchronous writeout, but what if that fails too?)
	 * (could try doing synchronous writeout, but what if that fails too?)
	 */
	 */
	spin_lock(&page->mapping->private_lock);
	attach_nobh_buffers(page, head);
	bh = head;
	page_zero_new_buffers(page, from, to);
	block_start = 0;
	do {
		if (PageUptodate(page))
			set_buffer_uptodate(bh);
		if (PageDirty(page))
			set_buffer_dirty(bh);


		block_end = block_start+blocksize;
out_release:
		if (block_end <= from)
	unlock_page(page);
			goto next;
	page_cache_release(page);
		if (block_start >= to)
	*pagep = NULL;
			goto next;


		if (buffer_new(bh)) {
	if (pos + len > inode->i_size)
			clear_buffer_new(bh);
		vmtruncate(inode, inode->i_size);
			if (!buffer_uptodate(bh)) {
				zero_user_page(page, block_start, bh->b_size, KM_USER0);
				set_buffer_uptodate(bh);
			}
			mark_buffer_dirty(bh);
		}
next:
		block_start = block_end;
		if (!bh->b_this_page)
			bh->b_this_page = head;
		bh = bh->b_this_page;
	} while (bh != head);
	attach_page_buffers(page, head);
	spin_unlock(&page->mapping->private_lock);


	return ret;
	return ret;
}
}
EXPORT_SYMBOL(nobh_prepare_write);
EXPORT_SYMBOL(nobh_write_begin);


/*
int nobh_write_end(struct file *file, struct address_space *mapping,
 * Make sure any changes to nobh_commit_write() are reflected in
			loff_t pos, unsigned len, unsigned copied,
 * nobh_truncate_page(), since it doesn't call commit_write().
			struct page *page, void *fsdata)
 */
int nobh_commit_write(struct file *file, struct page *page,
		unsigned from, unsigned to)
{
{
	struct inode *inode = page->mapping->host;
	struct inode *inode = page->mapping->host;
	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
	struct buffer_head *head = NULL;
	struct buffer_head *bh;


	if (!PageMappedToDisk(page)) {
		if (unlikely(copied < len) && !page_has_buffers(page))
			attach_nobh_buffers(page, head);
		if (page_has_buffers(page))
		if (page_has_buffers(page))
		return generic_commit_write(file, page, from, to);
			return generic_write_end(file, mapping, pos, len,
						copied, page, fsdata);
	}


	SetPageUptodate(page);
	SetPageUptodate(page);
	set_page_dirty(page);
	set_page_dirty(page);
	if (pos > inode->i_size) {
	if (pos+copied > inode->i_size) {
		i_size_write(inode, pos);
		i_size_write(inode, pos+copied);
		mark_inode_dirty(inode);
		mark_inode_dirty(inode);
	}
	}
	return 0;

	unlock_page(page);
	page_cache_release(page);

	head = fsdata;
	while (head) {
		bh = head;
		head = head->b_this_page;
		free_buffer_head(bh);
	}

	return copied;
}
}
EXPORT_SYMBOL(nobh_commit_write);
EXPORT_SYMBOL(nobh_write_end);


/*
/*
 * nobh_writepage() - based on block_full_write_page() except
 * nobh_writepage() - based on block_full_write_page() except
@@ -2609,44 +2645,79 @@ out:
}
}
EXPORT_SYMBOL(nobh_writepage);
EXPORT_SYMBOL(nobh_writepage);


/*
int nobh_truncate_page(struct address_space *mapping,
 * This function assumes that ->prepare_write() uses nobh_prepare_write().
			loff_t from, get_block_t *get_block)
 */
int nobh_truncate_page(struct address_space *mapping, loff_t from)
{
{
	struct inode *inode = mapping->host;
	unsigned blocksize = 1 << inode->i_blkbits;
	pgoff_t index = from >> PAGE_CACHE_SHIFT;
	pgoff_t index = from >> PAGE_CACHE_SHIFT;
	unsigned offset = from & (PAGE_CACHE_SIZE-1);
	unsigned offset = from & (PAGE_CACHE_SIZE-1);
	unsigned to;
	unsigned blocksize;
	sector_t iblock;
	unsigned length, pos;
	struct inode *inode = mapping->host;
	struct page *page;
	struct page *page;
	const struct address_space_operations *a_ops = mapping->a_ops;
	struct buffer_head map_bh;
	int ret = 0;
	int err;


	if ((offset & (blocksize - 1)) == 0)
	blocksize = 1 << inode->i_blkbits;
		goto out;
	length = offset & (blocksize - 1);

	/* Block boundary? Nothing to do */
	if (!length)
		return 0;

	length = blocksize - length;
	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);


	ret = -ENOMEM;
	page = grab_cache_page(mapping, index);
	page = grab_cache_page(mapping, index);
	err = -ENOMEM;
	if (!page)
	if (!page)
		goto out;
		goto out;


	to = (offset + blocksize) & ~(blocksize - 1);
	if (page_has_buffers(page)) {
	ret = a_ops->prepare_write(NULL, page, offset, to);
has_buffers:
	if (ret == 0) {
		unlock_page(page);
		zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
		page_cache_release(page);
				KM_USER0);
		return block_truncate_page(mapping, from, get_block);
		/*
	}
		 * It would be more correct to call aops->commit_write()

		 * here, but this is more efficient.
	/* Find the buffer that contains "offset" */
		 */
	pos = blocksize;
		SetPageUptodate(page);
	while (offset >= pos) {
		set_page_dirty(page);
		iblock++;
		pos += blocksize;
	}

	err = get_block(inode, iblock, &map_bh, 0);
	if (err)
		goto unlock;
	/* unmapped? It's a hole - nothing to do */
	if (!buffer_mapped(&map_bh))
		goto unlock;

	/* Ok, it's mapped. Make sure it's up-to-date */
	if (!PageUptodate(page)) {
		err = mapping->a_ops->readpage(NULL, page);
		if (err) {
			page_cache_release(page);
			goto out;
		}
		lock_page(page);
		if (!PageUptodate(page)) {
			err = -EIO;
			goto unlock;
		}
		if (page_has_buffers(page))
			goto has_buffers;
	}
	}
	zero_user_page(page, offset, length, KM_USER0);
	set_page_dirty(page);
	err = 0;

unlock:
	unlock_page(page);
	unlock_page(page);
	page_cache_release(page);
	page_cache_release(page);
out:
out:
	return ret;
	return err;
}
}
EXPORT_SYMBOL(nobh_truncate_page);
EXPORT_SYMBOL(nobh_truncate_page);


+18 −2
Original line number Original line Diff line number Diff line
@@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
	return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
	return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
}
}


static int
ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	/*
	 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
	 * directory handling code to pass around offsets rather than struct
	 * pages in order to make this work easily.
	 */
	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
							ext2_get_block);
}

static int ext2_nobh_writepage(struct page *page,
static int ext2_nobh_writepage(struct page *page,
			struct writeback_control *wbc)
			struct writeback_control *wbc)
{
{
@@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
	.readpages		= ext2_readpages,
	.readpages		= ext2_readpages,
	.writepage		= ext2_nobh_writepage,
	.writepage		= ext2_nobh_writepage,
	.sync_page		= block_sync_page,
	.sync_page		= block_sync_page,
	/* XXX: todo */
	.write_begin		= ext2_nobh_write_begin,
	.write_end		= nobh_write_end,
	.bmap			= ext2_bmap,
	.bmap			= ext2_bmap,
	.direct_IO		= ext2_direct_IO,
	.direct_IO		= ext2_direct_IO,
	.writepages		= ext2_writepages,
	.writepages		= ext2_writepages,
@@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode)
	if (mapping_is_xip(inode->i_mapping))
	if (mapping_is_xip(inode->i_mapping))
		xip_truncate_page(inode->i_mapping, inode->i_size);
		xip_truncate_page(inode->i_mapping, inode->i_size);
	else if (test_opt(inode->i_sb, NOBH))
	else if (test_opt(inode->i_sb, NOBH))
		nobh_truncate_page(inode->i_mapping, inode->i_size);
		nobh_truncate_page(inode->i_mapping,
				inode->i_size, ext2_get_block);
	else
	else
		block_truncate_page(inode->i_mapping,
		block_truncate_page(inode->i_mapping,
				inode->i_size, ext2_get_block);
				inode->i_size, ext2_get_block);
+3 −4
Original line number Original line Diff line number Diff line
@@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
				loff_t pos, unsigned len, unsigned flags,
				loff_t pos, unsigned len, unsigned flags,
				struct page **pagep, void **fsdata)
				struct page **pagep, void **fsdata)
{
{
	*pagep = NULL;
	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
				jfs_get_block);
				jfs_get_block);
}
}


@@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = {
	.writepages	= jfs_writepages,
	.writepages	= jfs_writepages,
	.sync_page	= block_sync_page,
	.sync_page	= block_sync_page,
	.write_begin	= jfs_write_begin,
	.write_begin	= jfs_write_begin,
	.write_end	= generic_write_end,
	.write_end	= nobh_write_end,
	.bmap		= jfs_bmap,
	.bmap		= jfs_bmap,
	.direct_IO	= jfs_direct_IO,
	.direct_IO	= jfs_direct_IO,
};
};
@@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip)
{
{
	jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
	jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);


	block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
	nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);


	IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
	IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
	jfs_truncate_nolock(ip, ip->i_size);
	jfs_truncate_nolock(ip, ip->i_size);
+7 −3
Original line number Original line Diff line number Diff line
@@ -226,9 +226,13 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
int file_fsync(struct file *, struct dentry *, int);
int file_fsync(struct file *, struct dentry *, int);
int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
int nobh_write_begin(struct file *, struct address_space *,
int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
				loff_t, unsigned, unsigned,
int nobh_truncate_page(struct address_space *, loff_t);
				struct page **, void **, get_block_t*);
int nobh_write_end(struct file *, struct address_space *,
				loff_t, unsigned, unsigned,
				struct page *, void *);
int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
int nobh_writepage(struct page *page, get_block_t *get_block,
int nobh_writepage(struct page *page, get_block_t *get_block,
                        struct writeback_control *wbc);
                        struct writeback_control *wbc);