Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2aaa6655 authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason
Browse files

Btrfs: add hole punching



This patch adds hole punching via fallocate.  Thanks,

Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
parent 2671485d
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -3250,6 +3250,8 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct inode *dir, u64 objectid,
			const char *name, int name_len);
int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
			int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root,
			       struct inode *inode, u64 new_size,
@@ -3323,7 +3325,7 @@ extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root, struct inode *inode,
			 struct btrfs_path *path, u64 start, u64 end,
			 int drop_cache);
			 u64 *drop_end, int drop_cache);
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, struct inode *inode, u64 start,
		       u64 end, int drop_cache);
+2 −0
Original line number Diff line number Diff line
@@ -4132,6 +4132,8 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
void btrfs_free_block_rsv(struct btrfs_root *root,
			  struct btrfs_block_rsv *rsv)
{
	if (!rsv)
		return;
	btrfs_block_rsv_release(root, rsv, (u64)-1);
	kfree(rsv);
}
+328 −4
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include "tree-log.h"
#include "locking.h"
#include "compat.h"
#include "volumes.h"

/*
 * when auto defrag is enabled we
@@ -584,7 +585,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root, struct inode *inode,
			 struct btrfs_path *path, u64 start, u64 end,
			 int drop_cache)
			 u64 *drop_end, int drop_cache)
{
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *fi;
@@ -822,6 +823,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
			btrfs_abort_transaction(trans, root, ret);
	}

	if (drop_end)
		*drop_end = min(end, extent_end);
	btrfs_release_path(path);
	return ret;
}
@@ -836,7 +839,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	ret = __btrfs_drop_extents(trans, root, inode, path, start, end,
	ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
				   drop_cache);
	btrfs_free_path(path);
	return ret;
@@ -1645,6 +1648,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
	return 0;
}

static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
			  int slot, u64 start, u64 end)
{
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;

	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
		return 0;

	btrfs_item_key_to_cpu(leaf, &key, slot);
	if (key.objectid != btrfs_ino(inode) ||
	    key.type != BTRFS_EXTENT_DATA_KEY)
		return 0;

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);

	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
		return 0;

	if (btrfs_file_extent_disk_bytenr(leaf, fi))
		return 0;

	if (key.offset == end)
		return 1;
	if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
		return 1;
	return 0;
}

static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
		      struct btrfs_path *path, u64 offset, u64 end)
{
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *fi;
	struct extent_map *hole_em;
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
	struct btrfs_key key;
	int ret;

	key.objectid = btrfs_ino(inode);
	key.type = BTRFS_EXTENT_DATA_KEY;
	key.offset = offset;


	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
	if (ret < 0)
		return ret;
	BUG_ON(!ret);

	leaf = path->nodes[0];
	if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
		u64 num_bytes;

		path->slots[0]--;
		fi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
			end - offset;
		btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
		btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
		btrfs_set_file_extent_offset(leaf, fi, 0);
		btrfs_mark_buffer_dirty(leaf);
		goto out;
	}

	if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
		u64 num_bytes;

		path->slots[0]++;
		key.offset = offset;
		btrfs_set_item_key_safe(trans, root, path, &key);
		fi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
			offset;
		btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
		btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
		btrfs_set_file_extent_offset(leaf, fi, 0);
		btrfs_mark_buffer_dirty(leaf);
		goto out;
	}
	btrfs_release_path(path);

	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
				       0, 0, end - offset, 0, end - offset,
				       0, 0, 0);
	if (ret)
		return ret;

out:
	btrfs_release_path(path);

	hole_em = alloc_extent_map();
	if (!hole_em) {
		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			&BTRFS_I(inode)->runtime_flags);
	} else {
		hole_em->start = offset;
		hole_em->len = end - offset;
		hole_em->orig_start = offset;

		hole_em->block_start = EXTENT_MAP_HOLE;
		hole_em->block_len = 0;
		hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
		hole_em->compress_type = BTRFS_COMPRESS_NONE;
		hole_em->generation = trans->transid;

		do {
			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
			write_lock(&em_tree->lock);
			ret = add_extent_mapping(em_tree, hole_em);
			if (!ret)
				list_move(&hole_em->list,
					  &em_tree->modified_extents);
			write_unlock(&em_tree->lock);
		} while (ret == -EEXIST);
		free_extent_map(hole_em);
		if (ret)
			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
				&BTRFS_I(inode)->runtime_flags);
	}

	return 0;
}

static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct extent_state *cached_state = NULL;
	struct btrfs_path *path;
	struct btrfs_block_rsv *rsv;
	struct btrfs_trans_handle *trans;
	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
	u64 lockstart = (offset + mask) & ~mask;
	u64 lockend = ((offset + len) & ~mask) - 1;
	u64 cur_offset = lockstart;
	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
	u64 drop_end;
	unsigned long nr;
	int ret = 0;
	int err = 0;
	bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
		((offset + len) >> PAGE_CACHE_SHIFT);

	btrfs_wait_ordered_range(inode, offset, len);

	mutex_lock(&inode->i_mutex);
	if (offset >= inode->i_size) {
		mutex_unlock(&inode->i_mutex);
		return 0;
	}

	/*
	 * Only do this if we are in the same page and we aren't doing the
	 * entire page.
	 */
	if (same_page && len < PAGE_CACHE_SIZE) {
		ret = btrfs_truncate_page(inode, offset, len, 0);
		mutex_unlock(&inode->i_mutex);
		return ret;
	}

	/* zero back part of the first page */
	ret = btrfs_truncate_page(inode, offset, 0, 0);
	if (ret) {
		mutex_unlock(&inode->i_mutex);
		return ret;
	}

	/* zero the front end of the last page */
	ret = btrfs_truncate_page(inode, offset + len, 0, 1);
	if (ret) {
		mutex_unlock(&inode->i_mutex);
		return ret;
	}

	if (lockend < lockstart) {
		mutex_unlock(&inode->i_mutex);
		return 0;
	}

	while (1) {
		struct btrfs_ordered_extent *ordered;

		truncate_pagecache_range(inode, lockstart, lockend);

		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
				 0, &cached_state);
		ordered = btrfs_lookup_first_ordered_extent(inode, lockend);

		/*
		 * We need to make sure we have no ordered extents in this range
		 * and nobody raced in and read a page in this range, if we did
		 * we need to try again.
		 */
		if ((!ordered ||
		    (ordered->file_offset + ordered->len < lockstart ||
		     ordered->file_offset > lockend)) &&
		     !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
				     lockend, EXTENT_UPTODATE, 0,
				     cached_state)) {
			if (ordered)
				btrfs_put_ordered_extent(ordered);
			break;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);
		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
				     lockend, &cached_state, GFP_NOFS);
		btrfs_wait_ordered_range(inode, lockstart,
					 lockend - lockstart + 1);
	}

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto out;
	}

	rsv = btrfs_alloc_block_rsv(root);
	if (!rsv) {
		ret = -ENOMEM;
		goto out_free;
	}
	rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
	rsv->failfast = 1;

	/*
	 * 1 - update the inode
	 * 1 - removing the extents in the range
	 * 1 - adding the hole extent
	 */
	trans = btrfs_start_transaction(root, 3);
	if (IS_ERR(trans)) {
		err = PTR_ERR(trans);
		goto out_free;
	}

	ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
				      min_size);
	BUG_ON(ret);
	trans->block_rsv = rsv;

	while (cur_offset < lockend) {
		ret = __btrfs_drop_extents(trans, root, inode, path,
					   cur_offset, lockend + 1,
					   &drop_end, 1);
		if (ret != -ENOSPC)
			break;

		trans->block_rsv = &root->fs_info->trans_block_rsv;

		ret = fill_holes(trans, inode, path, cur_offset, drop_end);
		if (ret) {
			err = ret;
			break;
		}

		cur_offset = drop_end;

		ret = btrfs_update_inode(trans, root, inode);
		if (ret) {
			err = ret;
			break;
		}

		nr = trans->blocks_used;
		btrfs_end_transaction(trans, root);
		btrfs_btree_balance_dirty(root, nr);

		trans = btrfs_start_transaction(root, 3);
		if (IS_ERR(trans)) {
			ret = PTR_ERR(trans);
			trans = NULL;
			break;
		}

		ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
					      rsv, min_size);
		BUG_ON(ret);	/* shouldn't happen */
		trans->block_rsv = rsv;
	}

	if (ret) {
		err = ret;
		goto out_trans;
	}

	trans->block_rsv = &root->fs_info->trans_block_rsv;
	ret = fill_holes(trans, inode, path, cur_offset, drop_end);
	if (ret) {
		err = ret;
		goto out_trans;
	}

out_trans:
	if (!trans)
		goto out_free;

	trans->block_rsv = &root->fs_info->trans_block_rsv;
	ret = btrfs_update_inode(trans, root, inode);
	nr = trans->blocks_used;
	btrfs_end_transaction(trans, root);
	btrfs_btree_balance_dirty(root, nr);
out_free:
	btrfs_free_path(path);
	btrfs_free_block_rsv(root, rsv);
out:
	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			     &cached_state, GFP_NOFS);
	mutex_unlock(&inode->i_mutex);
	if (ret && !err)
		err = ret;
	return err;
}

static long btrfs_fallocate(struct file *file, int mode,
			    loff_t offset, loff_t len)
{
@@ -1663,10 +1984,13 @@ static long btrfs_fallocate(struct file *file, int mode,
	alloc_start = offset & ~mask;
	alloc_end =  (offset + len + mask) & ~mask;

	/* We only support the FALLOC_FL_KEEP_SIZE mode */
	if (mode & ~FALLOC_FL_KEEP_SIZE)
	/* Make sure we aren't being give some crap mode */
	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
		return -EOPNOTSUPP;

	if (mode & FALLOC_FL_PUNCH_HOLE)
		return btrfs_punch_hole(inode, offset, len);

	/*
	 * Make sure we have enough space before we do the
	 * allocation.
+21 −7
Original line number Diff line number Diff line
@@ -3475,12 +3475,20 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
}

/*
 * taken from block_truncate_page, but does cow as it zeros out
 * any bytes left in the last page in the file.
 * btrfs_truncate_page - read, zero a chunk and write a page
 * @inode - inode that we're zeroing
 * @from - the offset to start zeroing
 * @len - the length to zero, 0 to zero the entire range respective to the
 *	offset
 * @front - zero up to the offset instead of from the offset on
 *
 * This will find the page for the "from" offset and cow the page and zero the
 * part we want to zero.  This is used with truncate and hole punching.
 */
static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
			int front)
{
	struct inode *inode = mapping->host;
	struct address_space *mapping = inode->i_mapping;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
	struct btrfs_ordered_extent *ordered;
@@ -3495,7 +3503,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
	u64 page_start;
	u64 page_end;

	if ((offset & (blocksize - 1)) == 0)
	if ((offset & (blocksize - 1)) == 0 &&
	    (!len || ((len & (blocksize - 1)) == 0)))
		goto out;
	ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
	if (ret)
@@ -3555,8 +3564,13 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)

	ret = 0;
	if (offset != PAGE_CACHE_SIZE) {
		if (!len)
			len = PAGE_CACHE_SIZE - offset;
		kaddr = kmap(page);
		memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
		if (front)
			memset(kaddr, 0, offset);
		else
			memset(kaddr + offset, 0, len);
		flush_dcache_page(page);
		kunmap(page);
	}
@@ -6796,7 +6810,7 @@ static int btrfs_truncate(struct inode *inode)
	u64 mask = root->sectorsize - 1;
	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);

	ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
	ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
	if (ret)
		return ret;

+1 −1
Original line number Diff line number Diff line
@@ -2842,7 +2842,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,

	if (BTRFS_I(inode)->logged_trans == trans->transid) {
		ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
					   start + len, 0);
					   start + len, NULL, 0);
		if (ret)
			return ret;
	}