Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2ab28f32 authored by Josef Bacik's avatar Josef Bacik
Browse files

Btrfs: wait on ordered extents at the last possible moment



Since we don't actually copy the extent information from the source tree in
the fast case we don't need to wait for ordered io to be completed in order
to fsync, we just need to wait for the io to be completed.  So when we're
logging our file just attach all of the ordered extents to the log, and then
when the log syncs just wait for IO_DONE on the ordered extents and then
write the super.  Thanks,

Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
parent dfd79829
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -1623,6 +1623,9 @@ struct btrfs_root {

	struct list_head root_list;

	spinlock_t log_extents_lock[2];
	struct list_head logged_list[2];

	spinlock_t orphan_lock;
	atomic_t orphan_inodes;
	struct btrfs_block_rsv *orphan_block_rsv;
+4 −0
Original line number Diff line number Diff line
@@ -1178,9 +1178,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,

	INIT_LIST_HEAD(&root->dirty_list);
	INIT_LIST_HEAD(&root->root_list);
	INIT_LIST_HEAD(&root->logged_list[0]);
	INIT_LIST_HEAD(&root->logged_list[1]);
	spin_lock_init(&root->orphan_lock);
	spin_lock_init(&root->inode_lock);
	spin_lock_init(&root->accounting_lock);
	spin_lock_init(&root->log_extents_lock[0]);
	spin_lock_init(&root->log_extents_lock[1]);
	mutex_init(&root->objectid_mutex);
	mutex_init(&root->log_mutex);
	init_waitqueue_head(&root->log_writer_wait);
+25 −5
Original line number Diff line number Diff line
@@ -1655,16 +1655,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
	struct btrfs_root *root = BTRFS_I(inode)->root;
	int ret = 0;
	struct btrfs_trans_handle *trans;
	bool full_sync = 0;

	trace_btrfs_sync_file(file, datasync);

	/*
	 * We write the dirty pages in the range and wait until they complete
	 * out of the ->i_mutex. If so, we can flush the dirty pages by
	 * multi-task, and make the performance up.
	 * multi-task, and make the performance up.  See
	 * btrfs_wait_ordered_range for an explanation of the ASYNC check.
	 */
	atomic_inc(&BTRFS_I(inode)->sync_writers);
	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
			     &BTRFS_I(inode)->runtime_flags))
		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
	atomic_dec(&BTRFS_I(inode)->sync_writers);
	if (ret)
		return ret;
@@ -1676,6 +1681,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
	 * range being left.
	 */
	atomic_inc(&root->log_batch);
	full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			     &BTRFS_I(inode)->runtime_flags);
	if (full_sync)
		btrfs_wait_ordered_range(inode, start, end - start + 1);
	atomic_inc(&root->log_batch);

@@ -1743,14 +1751,26 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)

	if (ret != BTRFS_NO_LOG_SYNC) {
		if (ret > 0) {
			/*
			 * If we didn't already wait for ordered extents we need
			 * to do that now.
			 */
			if (!full_sync)
				btrfs_wait_ordered_range(inode, start,
							 end - start + 1);
			ret = btrfs_commit_transaction(trans, root);
		} else {
			ret = btrfs_sync_log(trans, root);
			if (ret == 0)
			if (ret == 0) {
				ret = btrfs_end_transaction(trans, root);
			else
			} else {
				if (!full_sync)
					btrfs_wait_ordered_range(inode, start,
								 end -
								 start + 1);
				ret = btrfs_commit_transaction(trans, root);
			}
		}
	} else {
		ret = btrfs_end_transaction(trans, root);
	}
+8 −0
Original line number Diff line number Diff line
@@ -700,6 +700,8 @@ retry:
		em->start = async_extent->start;
		em->len = async_extent->ram_size;
		em->orig_start = em->start;
		em->mod_start = em->start;
		em->mod_len = em->len;

		em->block_start = ins.objectid;
		em->block_len = ins.offset;
@@ -892,6 +894,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
		em->orig_start = em->start;
		ram_size = ins.offset;
		em->len = ins.offset;
		em->mod_start = em->start;
		em->mod_len = em->len;

		em->block_start = ins.objectid;
		em->block_len = ins.offset;
@@ -1338,6 +1342,8 @@ out_check:
			em->block_start = disk_bytenr;
			em->orig_block_len = disk_num_bytes;
			em->bdev = root->fs_info->fs_devices->latest_bdev;
			em->mod_start = em->start;
			em->mod_len = em->len;
			set_bit(EXTENT_FLAG_PINNED, &em->flags);
			set_bit(EXTENT_FLAG_FILLING, &em->flags);
			em->generation = -1;
@@ -5966,6 +5972,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,

	em->start = start;
	em->orig_start = orig_start;
	em->mod_start = start;
	em->mod_len = len;
	em->len = len;
	em->block_len = block_len;
	em->block_start = block_start;
+68 −0
Original line number Diff line number Diff line
@@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
	entry->file_offset = file_offset;
	entry->start = start;
	entry->len = len;
	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
	    !(type == BTRFS_ORDERED_NOCOW))
		entry->csum_bytes_left = disk_len;
	entry->disk_len = disk_len;
	entry->bytes_left = len;
	entry->inode = igrab(inode);
@@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
	INIT_LIST_HEAD(&entry->root_extent_list);
	INIT_LIST_HEAD(&entry->work_list);
	init_completion(&entry->completion);
	INIT_LIST_HEAD(&entry->log_list);

	trace_btrfs_ordered_extent_add(inode, entry);

@@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
	tree = &BTRFS_I(inode)->ordered_tree;
	spin_lock_irq(&tree->lock);
	list_add_tail(&sum->list, &entry->list);
	WARN_ON(entry->csum_bytes_left < sum->len);
	entry->csum_bytes_left -= sum->len;
	if (entry->csum_bytes_left == 0)
		wake_up(&entry->wait);
	spin_unlock_irq(&tree->lock);
}

@@ -405,6 +413,66 @@ out:
	return ret == 0;
}

/* Needs to either be called under a log transaction or the log_mutex */
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
{
	struct btrfs_ordered_inode_tree *tree;
	struct btrfs_ordered_extent *ordered;
	struct rb_node *n;
	int index = log->log_transid % 2;

	tree = &BTRFS_I(inode)->ordered_tree;
	spin_lock_irq(&tree->lock);
	for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
		ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
		spin_lock(&log->log_extents_lock[index]);
		if (list_empty(&ordered->log_list)) {
			list_add_tail(&ordered->log_list, &log->logged_list[index]);
			atomic_inc(&ordered->refs);
		}
		spin_unlock(&log->log_extents_lock[index]);
	}
	spin_unlock_irq(&tree->lock);
}

void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
{
	struct btrfs_ordered_extent *ordered;
	int index = transid % 2;

	spin_lock_irq(&log->log_extents_lock[index]);
	while (!list_empty(&log->logged_list[index])) {
		ordered = list_first_entry(&log->logged_list[index],
					   struct btrfs_ordered_extent,
					   log_list);
		list_del_init(&ordered->log_list);
		spin_unlock_irq(&log->log_extents_lock[index]);
		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
						   &ordered->flags));
		btrfs_put_ordered_extent(ordered);
		spin_lock_irq(&log->log_extents_lock[index]);
	}
	spin_unlock_irq(&log->log_extents_lock[index]);
}

void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
{
	struct btrfs_ordered_extent *ordered;
	int index = transid % 2;

	spin_lock_irq(&log->log_extents_lock[index]);
	while (!list_empty(&log->logged_list[index])) {
		ordered = list_first_entry(&log->logged_list[index],
					   struct btrfs_ordered_extent,
					   log_list);
		list_del_init(&ordered->log_list);
		spin_unlock_irq(&log->log_extents_lock[index]);
		btrfs_put_ordered_extent(ordered);
		spin_lock_irq(&log->log_extents_lock[index]);
	}
	spin_unlock_irq(&log->log_extents_lock[index]);
}

/*
 * used to drop a reference on an ordered extent.  This will free
 * the extent if the last reference is dropped
Loading