Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9ebefb18 authored by Chris Mason's avatar Chris Mason Committed by David Woodhouse
Browse files

Btrfs: patch queue: page_mkwrite

parent f1ace244
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1142,6 +1142,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
			struct btrfs_root *root, struct btrfs_path *path,
			u64 isize);
/* inode.c */
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_delete_inode(struct inode *inode);
void btrfs_read_locked_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, int wait);
+2 −2
Original line number Diff line number Diff line
@@ -155,9 +155,9 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
	desc.tfm = tfm;
	desc.flags = 0;
	sg_init_one(&sg, data, len);
	spin_lock(&root->fs_info->hash_lock);
	spin_lock_irq(&root->fs_info->hash_lock);
	ret = crypto_hash_digest(&desc, &sg, 1, result);
	spin_unlock(&root->fs_info->hash_lock);
	spin_unlock_irq(&root->fs_info->hash_lock);
	if (ret) {
		printk("digest failed\n");
	}
+1 −1
Original line number Diff line number Diff line
@@ -228,6 +228,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
			   path->nodes[0]->b_data,
			   root->fs_info->sb->s_blocksize);
	ret = btrfs_csum_data(root, data, len, &item->csum);
// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum));
	btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
	btrfs_release_path(root, path);
@@ -298,4 +299,3 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root,
	mutex_unlock(&root->fs_info->fs_mutex);
	return ret;
}
+26 −150
Original line number Diff line number Diff line
@@ -103,10 +103,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
		/* FIXME, one block at a time */

		mutex_lock(&root->fs_info->fs_mutex);
		trans = btrfs_start_transaction(root, 1);
		btrfs_set_trans_block_group(trans, inode);

		bh = page_buffers(pages[i]);

		if (buffer_mapped(bh) && bh->b_blocknr == 0) {
@@ -115,6 +111,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
			char *ptr, *kaddr;
			u32 datasize;

			mutex_lock(&root->fs_info->fs_mutex);
			trans = btrfs_start_transaction(root, 1);
			btrfs_set_trans_block_group(trans, inode);

			/* create an inline extent, and copy the data in */
			path = btrfs_alloc_path();
			BUG_ON(!path);
@@ -135,24 +135,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
			btrfs_set_file_extent_type(ei,
						   BTRFS_FILE_EXTENT_INLINE);
			ptr = btrfs_file_extent_inline_start(ei);

			kaddr = kmap_atomic(bh->b_page, KM_USER0);
			btrfs_memcpy(root, path->nodes[0]->b_data,
				     ptr, kaddr + bh_offset(bh),
				     offset + write_bytes);
			kunmap_atomic(kaddr, KM_USER0);

			mark_buffer_dirty(path->nodes[0]);
			btrfs_free_path(path);
		} else if (buffer_mapped(bh)) {
			/* csum the file data */
			btrfs_csum_file_block(trans, root, inode->i_ino,
				      pages[i]->index << PAGE_CACHE_SHIFT,
				      kmap(pages[i]), PAGE_CACHE_SIZE);
			kunmap(pages[i]);
		}
		SetPageChecked(pages[i]);
			ret = btrfs_end_transaction(trans, root);
			BUG_ON(ret);
			mutex_unlock(&root->fs_info->fs_mutex);
		}

		ret = btrfs_commit_write(file, pages[i], offset,
					 offset + this_write);
@@ -503,7 +498,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
			ret = mpage_readpage(pinned[0], btrfs_get_block);
			ret = btrfs_readpage(NULL, pinned[0]);
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
@@ -513,7 +508,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
			ret = mpage_readpage(pinned[1], btrfs_get_block);
			ret = btrfs_readpage(NULL, pinned[1]);
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
@@ -633,138 +628,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
	return num_written ? num_written : err;
}

/*
 * FIXME, do this by stuffing the csum we want in the info hanging off
 * page->private.  For now, verify file csums on read
 */
static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
			unsigned long offset, unsigned long size)
{
	char *kaddr;
	unsigned long left, count = desc->count;
	struct inode *inode = page->mapping->host;

	if (size > count)
		size = count;

	if (!PageChecked(page)) {
		/* FIXME, do it per block */
		struct btrfs_root *root = BTRFS_I(inode)->root;
		int ret;
		struct buffer_head *bh;

		if (page_has_buffers(page)) {
			bh = page_buffers(page);
			if (!buffer_mapped(bh)) {
				SetPageChecked(page);
				goto checked;
			}
		}

		ret = btrfs_csum_verify_file_block(root,
				  page->mapping->host->i_ino,
				  page->index << PAGE_CACHE_SHIFT,
				  kmap(page), PAGE_CACHE_SIZE);
		if (ret) {
			if (ret != -ENOENT) {
				printk("failed to verify ino %lu page %lu ret %d\n",
				       page->mapping->host->i_ino,
				       page->index, ret);
				memset(page_address(page), 1, PAGE_CACHE_SIZE);
				flush_dcache_page(page);
			}
		}
		SetPageChecked(page);
		kunmap(page);
	}
checked:
	/*
	 * Faults on the destination of a read are common, so do it before
	 * taking the kmap.
	 */
	if (!fault_in_pages_writeable(desc->arg.buf, size)) {
		kaddr = kmap_atomic(page, KM_USER0);
		left = __copy_to_user_inatomic(desc->arg.buf,
						kaddr + offset, size);
		kunmap_atomic(kaddr, KM_USER0);
		if (left == 0)
			goto success;
	}

	/* Do it the slow way */
	kaddr = kmap(page);
	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
	kunmap(page);

	if (left) {
		size -= left;
		desc->error = -EFAULT;
	}
success:
	desc->count = count - size;
	desc->written += size;
	desc->arg.buf += size;
	return size;
}

/**
 * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
 * @iocb:	kernel I/O control block
 * @iov:	io vector request
 * @nr_segs:	number of segments in the iovec
 * @pos:	current file position
 */
static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
{
	struct file *filp = iocb->ki_filp;
	ssize_t retval;
	unsigned long seg;
	size_t count;
	loff_t *ppos = &iocb->ki_pos;

	count = 0;
	for (seg = 0; seg < nr_segs; seg++) {
		const struct iovec *iv = &iov[seg];

		/*
		 * If any segment has a negative length, or the cumulative
		 * length ever wraps negative then return -EINVAL.
		 */
		count += iv->iov_len;
		if (unlikely((ssize_t)(count|iv->iov_len) < 0))
			return -EINVAL;
		if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
			continue;
		if (seg == 0)
			return -EFAULT;
		nr_segs = seg;
		count -= iv->iov_len;	/* This segment is no good */
		break;
	}
	retval = 0;
	if (count) {
		for (seg = 0; seg < nr_segs; seg++) {
			read_descriptor_t desc;

			desc.written = 0;
			desc.arg.buf = iov[seg].iov_base;
			desc.count = iov[seg].iov_len;
			if (desc.count == 0)
				continue;
			desc.error = 0;
			do_generic_file_read(filp, ppos, &desc,
					     btrfs_read_actor);
			retval += desc.written;
			if (desc.error) {
				retval = retval ?: desc.error;
				break;
			}
		}
	}
	return retval;
}

static int btrfs_sync_file(struct file *file,
			   struct dentry *dentry, int datasync)
{
@@ -789,12 +652,25 @@ static int btrfs_sync_file(struct file *file,
	return ret > 0 ? EIO : ret;
}

static struct vm_operations_struct btrfs_file_vm_ops = {
	.nopage		= filemap_nopage,
	.populate	= filemap_populate,
	.page_mkwrite	= btrfs_page_mkwrite,
};

static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
{
	vma->vm_ops = &btrfs_file_vm_ops;
	file_accessed(filp);
	return 0;
}

struct file_operations btrfs_file_operations = {
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
	.aio_read       = btrfs_file_aio_read,
	.aio_read       = generic_file_aio_read,
	.write		= btrfs_file_write,
	.mmap		= generic_file_mmap,
	.mmap		= btrfs_file_mmap,
	.open		= generic_file_open,
	.ioctl		= btrfs_ioctl,
	.fsync		= btrfs_sync_file,
+297 −6
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/bit_spinlock.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -561,7 +562,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
		goto out;

	if (!PageUptodate(page)) {
		ret = mpage_readpage(page, btrfs_get_block);
		ret = btrfs_readpage(NULL, page);
		lock_page(page);
		if (!PageUptodate(page)) {
			ret = -EIO;
@@ -1257,8 +1258,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
	path = btrfs_alloc_path();
	BUG_ON(!path);
	if (create & BTRFS_GET_BLOCK_CREATE) {
		WARN_ON(1);
		/* this almost but not quite works */
		/*
		 * danger!, this only works if the page is properly up
		 * to date somehow
		 */
		trans = btrfs_start_transaction(root, 1);
		if (!trans) {
			err = -ENOMEM;
@@ -1353,7 +1356,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
					       ins.objectid, ins.offset,
					       ins.offset);
		BUG_ON(ret);
		SetPageChecked(result->b_page);
		btrfs_map_bh_to_logical(root, result, ins.objectid);
	}
out:
@@ -1374,6 +1376,40 @@ int btrfs_get_block(struct inode *inode, sector_t iblock,
	return err;
}

int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
		    struct buffer_head *result, int create)
{
	int ret;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct page *page = result->b_page;
	u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result);
	struct btrfs_csum_item *item;
	struct btrfs_path *path = NULL;

	mutex_lock(&root->fs_info->fs_mutex);
	ret = btrfs_get_block_lock(inode, iblock, result, create);
	if (ret)
		goto out;

	path = btrfs_alloc_path();
	item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0);
	if (IS_ERR(item)) {
		ret = PTR_ERR(item);
		/* a csum that isn't present is a preallocated region. */
		if (ret == -ENOENT || ret == -EFBIG)
			ret = 0;
		result->b_private = 0;
		goto out;
	}
	memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum));
out:
	if (path)
		btrfs_free_path(path);
	mutex_unlock(&root->fs_info->fs_mutex);
	return ret;
}

static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
			   struct buffer_head *result, int create)
{
@@ -1395,9 +1431,198 @@ static int btrfs_prepare_write(struct file *file, struct page *page,
	return block_prepare_write(page, from, to, btrfs_get_block);
}

static int btrfs_readpage(struct file *file, struct page *page)
static void buffer_io_error(struct buffer_head *bh)
{
	return mpage_readpage(page, btrfs_get_block);
	char b[BDEVNAME_SIZE];

	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
			bdevname(bh->b_bdev, b),
			(unsigned long long)bh->b_blocknr);
}

/*
 * I/O completion handler for block_read_full_page() - pages
 * which come unlocked at the end of I/O.
 */
static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
	unsigned long flags;
	struct buffer_head *first;
	struct buffer_head *tmp;
	struct page *page;
	int page_uptodate = 1;
	struct inode *inode;
	int ret;

	BUG_ON(!buffer_async_read(bh));

	page = bh->b_page;
	inode = page->mapping->host;
	if (uptodate) {
		void *kaddr;
		struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
		if (bh->b_private) {
			char csum[BTRFS_CRC32_SIZE];
			kaddr = kmap_atomic(page, KM_IRQ0);
			ret = btrfs_csum_data(root, kaddr + bh_offset(bh),
					      bh->b_size, csum);
			BUG_ON(ret);
			if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) {
				u64 offset;
				offset = (page->index << PAGE_CACHE_SHIFT) +
					bh_offset(bh);
				printk("btrfs csum failed ino %lu off %llu\n",
				       page->mapping->host->i_ino,
				       (unsigned long long)offset);
				memset(kaddr + bh_offset(bh), 1, bh->b_size);
				flush_dcache_page(page);
printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum);
			}
			kunmap_atomic(kaddr, KM_IRQ0);
		}
		set_buffer_uptodate(bh);
	} else {
		clear_buffer_uptodate(bh);
		if (printk_ratelimit())
			buffer_io_error(bh);
		SetPageError(page);
	}

	/*
	 * Be _very_ careful from here on. Bad things can happen if
	 * two buffer heads end IO at almost the same time and both
	 * decide that the page is now completely done.
	 */
	first = page_buffers(page);
	local_irq_save(flags);
	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
	clear_buffer_async_read(bh);
	unlock_buffer(bh);
	tmp = bh;
	do {
		if (!buffer_uptodate(tmp))
			page_uptodate = 0;
		if (buffer_async_read(tmp)) {
			BUG_ON(!buffer_locked(tmp));
			goto still_busy;
		}
		tmp = tmp->b_this_page;
	} while (tmp != bh);
	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
	local_irq_restore(flags);

	/*
	 * If none of the buffers had errors and they are all
	 * uptodate then we can set the page uptodate.
	 */
	if (page_uptodate && !PageError(page))
		SetPageUptodate(page);
	unlock_page(page);
	return;

still_busy:
	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
	local_irq_restore(flags);
	return;
}

/*
 * Generic "read page" function for block devices that have the normal
 * get_block functionality. This is most of the block device filesystems.
 * Reads the page asynchronously --- the unlock_buffer() and
 * set/clear_buffer_uptodate() functions propagate buffer state into the
 * page struct once IO has completed.
 */
int btrfs_readpage(struct file *file, struct page *page)
{
	struct inode *inode = page->mapping->host;
	sector_t iblock, lblock;
	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
	unsigned int blocksize;
	int nr, i;
	int fully_mapped = 1;

	BUG_ON(!PageLocked(page));
	blocksize = 1 << inode->i_blkbits;
	if (!page_has_buffers(page))
		create_empty_buffers(page, blocksize, 0);
	head = page_buffers(page);

	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
	bh = head;
	nr = 0;
	i = 0;

	do {
		if (buffer_uptodate(bh))
			continue;

		if (!buffer_mapped(bh)) {
			int err = 0;

			fully_mapped = 0;
			if (iblock < lblock) {
				WARN_ON(bh->b_size != blocksize);
				err = btrfs_get_block_csum(inode, iblock,
							   bh, 0);
				if (err)
					SetPageError(page);
			}
			if (!buffer_mapped(bh)) {
				void *kaddr = kmap_atomic(page, KM_USER0);
				memset(kaddr + i * blocksize, 0, blocksize);
				flush_dcache_page(page);
				kunmap_atomic(kaddr, KM_USER0);
				if (!err)
					set_buffer_uptodate(bh);
				continue;
			}
			/*
			 * get_block() might have updated the buffer
			 * synchronously
			 */
			if (buffer_uptodate(bh))
				continue;
		}
		arr[nr++] = bh;
	} while (i++, iblock++, (bh = bh->b_this_page) != head);

	if (fully_mapped)
		SetPageMappedToDisk(page);

	if (!nr) {
		/*
		 * All buffers are uptodate - we can set the page uptodate
		 * as well. But not if get_block() returned an error.
		 */
		if (!PageError(page))
			SetPageUptodate(page);
		unlock_page(page);
		return 0;
	}

	/* Stage two: lock the buffers */
	for (i = 0; i < nr; i++) {
		bh = arr[i];
		lock_buffer(bh);
		bh->b_end_io = btrfs_end_buffer_async_read;
		set_buffer_async_read(bh);
	}

	/*
	 * Stage 3: start the IO.  Check for uptodateness
	 * inside the buffer lock in case another process reading
	 * the underlying blockdev brought it uptodate (the sct fix).
	 */
	for (i = 0; i < nr; i++) {
		bh = arr[i];
		if (buffer_uptodate(bh))
			btrfs_end_buffer_async_read(bh, 1);
		else
			submit_bh(READ, bh);
	}
	return 0;
}

/*
@@ -1424,6 +1649,7 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
	struct buffer_head *bh, *head;
	const unsigned blocksize = 1 << inode->i_blkbits;
	int nr_underway = 0;
	struct btrfs_root *root = BTRFS_I(inode)->root;

	BUG_ON(!PageLocked(page));

@@ -1496,6 +1722,24 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
			continue;
		}
		if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
			struct btrfs_trans_handle *trans;
			int ret;
			u64 off = page->index << PAGE_CACHE_SHIFT;
			char *kaddr;

			off += bh_offset(bh);
			mutex_lock(&root->fs_info->fs_mutex);
			trans = btrfs_start_transaction(root, 1);
			btrfs_set_trans_block_group(trans, inode);
			kaddr = kmap(page);
			ret = btrfs_csum_file_block(trans, root, inode->i_ino,
						    off, kaddr + bh_offset(bh),
						    bh->b_size);
			kunmap(page);
			BUG_ON(ret);
			ret = btrfs_end_transaction(trans, root);
			BUG_ON(ret);
			mutex_unlock(&root->fs_info->fs_mutex);
			mark_buffer_async_write(bh);
		} else {
			unlock_buffer(bh);
@@ -1617,6 +1861,53 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
	return __btrfs_write_full_page(inode, page, wbc);
}

/*
 * btrfs_page_mkwrite() is not allowed to change the file size as it gets
 * called from a page fault handler when a page is first dirtied. Hence we must
 * be careful to check for EOF conditions here. We set the page up correctly
 * for a written page which means we get ENOSPC checking when writing into
 * holes and correct delalloc and unwritten extent mapping on filesystems that
 * support these features.
 *
 * We are not allowed to take the i_mutex here so we have to play games to
 * protect against truncate races as the page could now be beyond EOF.  Because
 * vmtruncate() writes the inode size before removing pages, once we have the
 * page lock we can determine safely if the page is beyond EOF. If it is not
 * beyond EOF, then the page is guaranteed safe against truncation until we
 * unlock the page.
 */
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
{
	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
	unsigned long end;
	loff_t size;
	int ret = -EINVAL;

	lock_page(page);
	wait_on_page_writeback(page);
printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index);
	size = i_size_read(inode);
	if ((page->mapping != inode->i_mapping) ||
	    ((page->index << PAGE_CACHE_SHIFT) > size)) {
		/* page got truncated out from underneath us */
		goto out_unlock;
	}

	/* page is wholly or partially inside EOF */
	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
		end = size & ~PAGE_CACHE_MASK;
	else
		end = PAGE_CACHE_SIZE;

	ret = btrfs_prepare_write(NULL, page, 0, end);
	if (!ret)
		ret = btrfs_commit_write(NULL, page, 0, end);

out_unlock:
	unlock_page(page);
	return ret;
}

static void btrfs_truncate(struct inode *inode)
{
	struct btrfs_root *root = BTRFS_I(inode)->root;