Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8c2383c3 authored by Chris Mason's avatar Chris Mason Committed by David Woodhouse
Browse files

Subject: Rework btrfs_file_write to only allocate while page locks are held

parent 9ebefb18
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -541,6 +541,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
	else
		ret = submit_bh(WRITE, bh);
	if (ret == -EOPNOTSUPP) {
		get_bh(bh);
		lock_buffer(bh);
		set_buffer_uptodate(bh);
		root->fs_info->do_barriers = 0;
		ret = submit_bh(WRITE, bh);
+0 −1
Original line number Diff line number Diff line
@@ -1477,7 +1477,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
			break;
		if (wret < 0)
			ret = wret;
		btrfs_btree_balance_dirty(root);
	}
	for (i = 0; i <= orig_level; i++) {
		if (path->nodes[i]) {
+0 −1
Original line number Diff line number Diff line
@@ -228,7 +228,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
			   path->nodes[0]->b_data,
			   root->fs_info->sb->s_blocksize);
	ret = btrfs_csum_data(root, data, len, &item->csum);
// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum));
	btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
	btrfs_release_path(root, path);
+133 −111
Original line number Diff line number Diff line
@@ -207,6 +207,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
			}
			path->slots[0]--;
		}
next_slot:
		keep = 0;
		bookend = 0;
		found_extent = 0;
@@ -214,15 +215,15 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
		extent = NULL;
		leaf = btrfs_buffer_leaf(path->nodes[0]);
		slot = path->slots[0];
		ret = 0;
		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
		if (key.offset >= end || key.objectid != inode->i_ino) {
			ret = 0;
			goto out;
		}
		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
			ret = 0;
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
			goto out;
		}
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
			found_type = btrfs_file_extent_type(extent);
@@ -234,19 +235,28 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
				found_inline = 1;
				extent_end = key.offset +
			     btrfs_file_extent_inline_len(leaf->items + slot);
				     btrfs_file_extent_inline_len(leaf->items +
								  slot);
			}
		} else {
			extent_end = search_start;
		}

		/* we found nothing we can drop */
		if (!found_extent && !found_inline) {
			ret = 0;
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
			nritems = btrfs_header_nritems(
					btrfs_buffer_header(path->nodes[0]));
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
			} else {
				path->slots[0]++;
			}

		/* we found nothing inside the range */
		if (search_start >= extent_end) {
			ret = 0;
			goto out;
			goto next_slot;
		}

		/* FIXME, there's only one inline extent allowed right now */
@@ -272,7 +282,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
			WARN_ON(found_inline);
			bookend = 1;
		}

		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
@@ -337,10 +346,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
			ins.offset = end;
			ins.flags = 0;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);

			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));

			if (ret) {
				btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0]));
				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end);
			}
			BUG_ON(ret);
			extent = btrfs_item_ptr(
				    btrfs_buffer_leaf(path->nodes[0]),
@@ -387,8 +400,7 @@ static int prepare_pages(struct btrfs_root *root,
			 loff_t pos,
			 unsigned long first_index,
			 unsigned long last_index,
			 size_t write_bytes,
			 u64 alloc_extent_start)
			 size_t write_bytes)
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
@@ -399,6 +411,16 @@ static int prepare_pages(struct btrfs_root *root,
	struct buffer_head *bh;
	struct buffer_head *head;
	loff_t isize = i_size_read(inode);
	struct btrfs_trans_handle *trans;
	u64 hint_block;
	u64 num_blocks;
	u64 alloc_extent_start;
	u64 start_pos;
	struct btrfs_key ins;

	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
			inode->i_blkbits;

	memset(pages, 0, num_pages * sizeof(struct page *));

@@ -408,6 +430,72 @@ static int prepare_pages(struct btrfs_root *root,
			err = -ENOMEM;
			goto failed_release;
		}
	}

	mutex_lock(&root->fs_info->fs_mutex);
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		err = -ENOMEM;
		mutex_unlock(&root->fs_info->fs_mutex);
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
	/* FIXME blocksize != 4096 */
	inode->i_blocks += num_blocks << 3;
	hint_block = 0;

	/* FIXME...EIEIO, ENOSPC and more */

	/* step one, delete the existing extents in this range */
	/* FIXME blocksize != pagesize */
	if (start_pos < inode->i_size) {
		err = btrfs_drop_extents(trans, root, inode,
			 start_pos, (pos + write_bytes + root->blocksize -1) &
			 ~((u64)root->blocksize - 1), &hint_block);
		BUG_ON(err);
	}

	/* insert any holes we need to create */
	if (inode->i_size < start_pos) {
		u64 last_pos_in_file;
		u64 hole_size;
		u64 mask = root->blocksize - 1;
		last_pos_in_file = (isize + mask) & ~mask;
		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
		hole_size >>= inode->i_blkbits;
		if (last_pos_in_file < start_pos) {
			err = btrfs_insert_file_extent(trans, root,
						       inode->i_ino,
						       last_pos_in_file,
						       0, 0, hole_size);
		}
		BUG_ON(err);
	}

	/*
	 * either allocate an extent for the new bytes or setup the key
	 * to show we are doing inline data in the extent
	 */
	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
		err = btrfs_alloc_extent(trans, root, inode->i_ino,
					 num_blocks, hint_block, (u64)-1,
					 &ins, 1);
		BUG_ON(err);
		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
				       start_pos, ins.objectid, ins.offset,
				       ins.offset);
		BUG_ON(err);
	} else {
		ins.offset = 0;
		ins.objectid = 0;
	}
	BUG_ON(err);
	alloc_extent_start = ins.objectid;
	err = btrfs_end_transaction(trans, root);
	mutex_unlock(&root->fs_info->fs_mutex);

	for (i = 0; i < num_pages; i++) {
		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
		wait_on_page_writeback(pages[i]);
		offset = pos & (PAGE_CACHE_SIZE -1);
@@ -444,6 +532,11 @@ static int prepare_pages(struct btrfs_root *root,
	if (pos > isize)
		vmtruncate(inode, isize);
	return err;

out_unlock:
	mutex_unlock(&root->fs_info->fs_mutex);
	goto failed_release;

}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
@@ -455,16 +548,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
	int ret = 0;
	struct inode *inode = file->f_path.dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct page *pages[8];
	struct page **pages = NULL;
	int nrptrs;
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
	u64 start_pos;
	u64 num_blocks;
	u64 alloc_extent_start;
	u64 hint_block;
	struct btrfs_trans_handle *trans;
	struct btrfs_key ins;

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
	pinned[0] = NULL;
	pinned[1] = NULL;
	if (file->f_flags & O_DIRECT)
@@ -482,9 +573,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
		goto out;
	file_update_time(file);

	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
	num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
			inode->i_blkbits;
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);

	mutex_lock(&inode->i_mutex);
	first_index = pos >> PAGE_CACHE_SHIFT;
@@ -516,87 +605,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
		}
	}

	mutex_lock(&root->fs_info->fs_mutex);
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		err = -ENOMEM;
		mutex_unlock(&root->fs_info->fs_mutex);
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
	/* FIXME blocksize != 4096 */
	inode->i_blocks += num_blocks << 3;
	hint_block = 0;

	/* FIXME...EIEIO, ENOSPC and more */

	/* step one, delete the existing extents in this range */
	if (start_pos < inode->i_size) {
		/* FIXME blocksize != pagesize */
		ret = btrfs_drop_extents(trans, root, inode,
					 start_pos,
					 (pos + count + root->blocksize -1) &
					 ~((u64)root->blocksize - 1),
					 &hint_block);
		BUG_ON(ret);
	}

	/* insert any holes we need to create */
	if (inode->i_size < start_pos) {
		u64 last_pos_in_file;
		u64 hole_size;
		u64 mask = root->blocksize - 1;
		last_pos_in_file = (inode->i_size + mask) & ~mask;
		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
		hole_size >>= inode->i_blkbits;
		if (last_pos_in_file < start_pos) {
			ret = btrfs_insert_file_extent(trans, root,
						       inode->i_ino,
						       last_pos_in_file,
						       0, 0, hole_size);
		}
		BUG_ON(ret);
	}

	/*
	 * either allocate an extent for the new bytes or setup the key
	 * to show we are doing inline data in the extent
	 */
	if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
	    pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
		ret = btrfs_alloc_extent(trans, root, inode->i_ino,
					 num_blocks, hint_block, (u64)-1,
					 &ins, 1);
		BUG_ON(ret);
		ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
				       start_pos, ins.objectid, ins.offset,
				       ins.offset);
		BUG_ON(ret);
	} else {
		ins.offset = 0;
		ins.objectid = 0;
	}
	BUG_ON(ret);
	alloc_extent_start = ins.objectid;
	ret = btrfs_end_transaction(trans, root);
	mutex_unlock(&root->fs_info->fs_mutex);

	while(count > 0) {
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
		size_t write_bytes = min(count,
					 (size_t)PAGE_CACHE_SIZE - offset);
		size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE -
					 offset);
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

		WARN_ON(num_pages > nrptrs);
		memset(pages, 0, sizeof(pages));
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
				    write_bytes, alloc_extent_start);
				    write_bytes);
		BUG_ON(ret);

		/* FIXME blocks != pagesize */
		if (alloc_extent_start)
			alloc_extent_start += num_pages;
		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
		BUG_ON(ret);
@@ -611,13 +633,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
		pos += write_bytes;
		num_written += write_bytes;

		balance_dirty_pages_ratelimited(inode->i_mapping);
		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
		btrfs_btree_balance_dirty(root);
		cond_resched();
	}
out_unlock:
	mutex_unlock(&inode->i_mutex);
out:
	kfree(pages);
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
+7 −4
Original line number Diff line number Diff line
@@ -962,7 +962,6 @@ void btrfs_dirty_inode(struct inode *inode)
	btrfs_update_inode(trans, root, inode);
	btrfs_end_transaction(trans, root);
	mutex_unlock(&root->fs_info->fs_mutex);
	btrfs_btree_balance_dirty(root);
}

static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
@@ -1402,7 +1401,6 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
		goto out;
	}
	memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum));
out:
	if (path)
		btrfs_free_path(path);
@@ -1476,7 +1474,6 @@ static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				       (unsigned long long)offset);
				memset(kaddr + bh_offset(bh), 1, bh->b_size);
				flush_dcache_page(page);
printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum);
			}
			kunmap_atomic(kaddr, KM_IRQ0);
		}
@@ -1655,6 +1652,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,

	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;

	/* no csumming allowed when from PF_MEMALLOC */
	if (current->flags & PF_MEMALLOC) {
		redirty_page_for_writepage(wbc, page);
		unlock_page(page);
		return 0;
	}

	if (!page_has_buffers(page)) {
		create_empty_buffers(page, blocksize,
					(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1885,7 +1889,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)

	lock_page(page);
	wait_on_page_writeback(page);
printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index);
	size = i_size_read(inode);
	if ((page->mapping != inode->i_mapping) ||
	    ((page->index << PAGE_CACHE_SHIFT) > size)) {
Loading