Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a4abeea4 authored by Josef Bacik's avatar Josef Bacik
Browse files

Btrfs: kill trans_mutex



We use trans_mutex for lots of things, here's a basic list

1) To serialize trans_handles joining the currently running transaction
2) To make sure that no new trans handles are started while we are committing
3) To protect the dead_roots list and the transaction lists

Really the serializing trans_handles joining is not too hard, and can really get
bogged down in acquiring a reference to the transaction.  So replace the
trans_mutex with a trans_lock spinlock and use it to do the following

1) Protect fs_info->running_transaction.  All trans handles have to do is check
this, and then take a reference of the transaction and keep on going.
2) Protect the fs_info->trans_list.  This doesn't get used too much, basically
it just holds the current transactions, which will usually just be the currently
committing transaction and the currently running transaction at most.
3) Protect the dead roots list.  This is only ever processed by splicing the
list so this is relatively simple.
4) Protect the fs_info->reloc_ctl stuff.  This is very lightweight and was using
the trans_mutex before, so this is a pretty straightforward change.
5) Protect fs_info->no_trans_join.  Because we don't hold the trans_lock over
the entirety of the commit we need to have a way to block new people from
creating a new transaction while we're doing our work.  So we set no_trans_join
and in join_transaction we test to see if that is set, and if it is we do a
wait_on_commit.
6) Make the transaction use count atomic so we don't need to take locks to
modify it when we're dropping references.
7) Add a commit_lock to the transaction to make sure multiple people trying to
commit the same transaction don't race and commit at the same time.
8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl
trans.

I have tested this with xfstests, but obviously it is a pretty hairy change so
lots of testing is greatly appreciated.  Thanks,

Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
parent 2a1eb461
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -919,7 +919,6 @@ struct btrfs_fs_info {
	 * is required instead of the faster short fsync log commits
	 */
	u64 last_trans_log_full_commit;
	u64 open_ioctl_trans;
	unsigned long mount_opt:20;
	unsigned long compress_type:4;
	u64 max_inline;
@@ -936,7 +935,6 @@ struct btrfs_fs_info {
	struct super_block *sb;
	struct inode *btree_inode;
	struct backing_dev_info bdi;
	struct mutex trans_mutex;
	struct mutex tree_log_mutex;
	struct mutex transaction_kthread_mutex;
	struct mutex cleaner_mutex;
@@ -957,6 +955,7 @@ struct btrfs_fs_info {
	struct rw_semaphore subvol_sem;
	struct srcu_struct subvol_srcu;

	spinlock_t trans_lock;
	struct list_head trans_list;
	struct list_head hashers;
	struct list_head dead_roots;
@@ -969,6 +968,7 @@ struct btrfs_fs_info {
	atomic_t async_submit_draining;
	atomic_t nr_async_bios;
	atomic_t async_delalloc_pages;
	atomic_t open_ioctl_trans;

	/*
	 * this is used by the balancing code to wait for all the pending
@@ -1032,6 +1032,7 @@ struct btrfs_fs_info {
	int closing;
	int log_root_recovering;
	int enospc_unlink;
	int trans_no_join;

	u64 total_pinned;

@@ -1053,7 +1054,6 @@ struct btrfs_fs_info {
	struct reloc_control *reloc_ctl;

	spinlock_t delalloc_lock;
	spinlock_t new_trans_lock;
	u64 delalloc_bytes;

	/* data_alloc_cluster is only used in ssd mode */
+15 −15
Original line number Diff line number Diff line
@@ -1551,22 +1551,22 @@ static int transaction_kthread(void *arg)
		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
		mutex_lock(&root->fs_info->transaction_kthread_mutex);

		spin_lock(&root->fs_info->new_trans_lock);
		spin_lock(&root->fs_info->trans_lock);
		cur = root->fs_info->running_transaction;
		if (!cur) {
			spin_unlock(&root->fs_info->new_trans_lock);
			spin_unlock(&root->fs_info->trans_lock);
			goto sleep;
		}

		now = get_seconds();
		if (!cur->blocked &&
		    (now < cur->start_time || now - cur->start_time < 30)) {
			spin_unlock(&root->fs_info->new_trans_lock);
			spin_unlock(&root->fs_info->trans_lock);
			delay = HZ * 5;
			goto sleep;
		}
		transid = cur->transid;
		spin_unlock(&root->fs_info->new_trans_lock);
		spin_unlock(&root->fs_info->trans_lock);

		trans = btrfs_join_transaction(root);
		BUG_ON(IS_ERR(trans));
@@ -1658,7 +1658,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	INIT_LIST_HEAD(&fs_info->ordered_operations);
	INIT_LIST_HEAD(&fs_info->caching_block_groups);
	spin_lock_init(&fs_info->delalloc_lock);
	spin_lock_init(&fs_info->new_trans_lock);
	spin_lock_init(&fs_info->trans_lock);
	spin_lock_init(&fs_info->ref_cache_lock);
	spin_lock_init(&fs_info->fs_roots_radix_lock);
	spin_lock_init(&fs_info->delayed_iput_lock);
@@ -1687,6 +1687,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	fs_info->sb = sb;
	fs_info->max_inline = 8192 * 1024;
	fs_info->metadata_ratio = 0;
	fs_info->trans_no_join = 0;

	fs_info->thread_pool_size = min_t(unsigned long,
					  num_online_cpus() + 2, 8);
@@ -1735,7 +1736,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	fs_info->do_barriers = 1;


	mutex_init(&fs_info->trans_mutex);
	mutex_init(&fs_info->ordered_operations_mutex);
	mutex_init(&fs_info->tree_log_mutex);
	mutex_init(&fs_info->chunk_mutex);
@@ -3006,10 +3006,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)

	WARN_ON(1);

	mutex_lock(&root->fs_info->trans_mutex);
	mutex_lock(&root->fs_info->transaction_kthread_mutex);

	spin_lock(&root->fs_info->trans_lock);
	list_splice_init(&root->fs_info->trans_list, &list);
	root->fs_info->trans_no_join = 1;
	spin_unlock(&root->fs_info->trans_lock);

	while (!list_empty(&list)) {
		t = list_entry(list.next, struct btrfs_transaction, list);
		if (!t)
@@ -3034,23 +3037,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
		t->blocked = 0;
		if (waitqueue_active(&root->fs_info->transaction_wait))
			wake_up(&root->fs_info->transaction_wait);
		mutex_unlock(&root->fs_info->trans_mutex);

		mutex_lock(&root->fs_info->trans_mutex);
		t->commit_done = 1;
		if (waitqueue_active(&t->commit_wait))
			wake_up(&t->commit_wait);
		mutex_unlock(&root->fs_info->trans_mutex);

		mutex_lock(&root->fs_info->trans_mutex);

		btrfs_destroy_pending_snapshots(t);

		btrfs_destroy_delalloc_inodes(root);

		spin_lock(&root->fs_info->new_trans_lock);
		spin_lock(&root->fs_info->trans_lock);
		root->fs_info->running_transaction = NULL;
		spin_unlock(&root->fs_info->new_trans_lock);
		spin_unlock(&root->fs_info->trans_lock);

		btrfs_destroy_marked_extents(root, &t->dirty_pages,
					     EXTENT_DIRTY);
@@ -3064,8 +3062,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
		kmem_cache_free(btrfs_transaction_cachep, t);
	}

	spin_lock(&root->fs_info->trans_lock);
	root->fs_info->trans_no_join = 0;
	spin_unlock(&root->fs_info->trans_lock);
	mutex_unlock(&root->fs_info->transaction_kthread_mutex);
	mutex_unlock(&root->fs_info->trans_mutex);

	return 0;
}
+2 −1
Original line number Diff line number Diff line
@@ -3200,7 +3200,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)

		/* commit the current transaction and try again */
commit_trans:
		if (!committed && !root->fs_info->open_ioctl_trans) {
		if (!committed &&
		    !atomic_read(&root->fs_info->open_ioctl_trans)) {
			committed = 1;
			trans = btrfs_join_transaction(root);
			if (IS_ERR(trans))
+1 −3
Original line number Diff line number Diff line
@@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync)
	 * the current transaction, we can bail out now without any
	 * syncing
	 */
	mutex_lock(&root->fs_info->trans_mutex);
	smp_mb();
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;
		mutex_unlock(&root->fs_info->trans_mutex);
		goto out;
	}
	mutex_unlock(&root->fs_info->trans_mutex);

	/*
	 * ok we haven't committed the transaction yet, lets do a commit
+3 −9
Original line number Diff line number Diff line
@@ -2177,9 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
	if (ret)
		goto out;

	mutex_lock(&root->fs_info->trans_mutex);
	root->fs_info->open_ioctl_trans++;
	mutex_unlock(&root->fs_info->trans_mutex);
	atomic_inc(&root->fs_info->open_ioctl_trans);

	ret = -ENOMEM;
	trans = btrfs_start_ioctl_transaction(root);
@@ -2190,9 +2188,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
	return 0;

out_drop:
	mutex_lock(&root->fs_info->trans_mutex);
	root->fs_info->open_ioctl_trans--;
	mutex_unlock(&root->fs_info->trans_mutex);
	atomic_dec(&root->fs_info->open_ioctl_trans);
	mnt_drop_write(file->f_path.mnt);
out:
	return ret;
@@ -2426,9 +2422,7 @@ long btrfs_ioctl_trans_end(struct file *file)

	btrfs_end_transaction(trans, root);

	mutex_lock(&root->fs_info->trans_mutex);
	root->fs_info->open_ioctl_trans--;
	mutex_unlock(&root->fs_info->trans_mutex);
	atomic_dec(&root->fs_info->open_ioctl_trans);

	mnt_drop_write(file->f_path.mnt);
	return 0;
Loading