Btrfs: fix file loss on log replay after renaming a file and fsync (2be63d5c) · Commits · e / devices / android_kernel_fairphone_FP4

fs/btrfs/ioctl.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -2471,6 +2471,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		trans->block_rsv = &block_rsv;
		trans->bytes_reserved = block_rsv.size;

		btrfs_record_snapshot_destroy(trans, dir);

		ret = btrfs_unlink_subvol(trans, root, dir,
		dest->root_key.objectid,
		dentry->d_name.name,
		@@ -2522,8 +2524,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		out_end_trans:
		trans->block_rsv = NULL;
		trans->bytes_reserved = 0;
		if (!err)
		btrfs_record_snapshot_destroy(trans, dir);
		ret = btrfs_end_transaction(trans, root);
		if (ret && !err)
		err = ret;

fs/btrfs/tree-log.c

+57 −10

Original line number	Diff line number	Diff line
		@@ -4771,6 +4771,42 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
		return err;
		}

		/*
		* Check if we must fallback to a transaction commit when logging an inode.
		* This must be called after logging the inode and is used only in the context
		* when fsyncing an inode requires the need to log some other inode - in which
		* case we can't lock the i_mutex of each other inode we need to log as that
		* can lead to deadlocks with concurrent fsync against other inodes (as we can
		* log inodes up or down in the hierarchy) or rename operations for example. So
		* we take the log_mutex of the inode after we have logged it and then check for
		* its last_unlink_trans value - this is safe because any task setting
		* last_unlink_trans must take the log_mutex and it must do this before it does
		* the actual unlink operation, so if we do this check before a concurrent task
		* sets last_unlink_trans it means we've logged a consistent version/state of
		* all the inode items, otherwise we are not sure and must do a transaction
		* commit (the concurrent task migth have only updated last_unlink_trans before
		* we logged the inode or it might have also done the unlink).
		*/
		static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
		struct inode *inode)
		{
		struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
		bool ret = false;

		mutex_lock(&BTRFS_I(inode)->log_mutex);
		if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
		/*
		* Make sure any commits to the log are forced to be full
		* commits.
		*/
		btrfs_set_log_full_commit(fs_info, trans);
		ret = true;
		}
		mutex_unlock(&BTRFS_I(inode)->log_mutex);

		return ret;
		}

		/*
		* follow the dentry parent pointers up the chain and see if any
		* of the directories in it require a full commit before they can
		@@ -4784,7 +4820,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
		u64 last_committed)
		{
		int ret = 0;
		struct btrfs_root *root;
		struct dentry *old_parent = NULL;
		struct inode *orig_inode = inode;

		@@ -4816,14 +4851,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
		BTRFS_I(inode)->logged_trans = trans->transid;
		smp_mb();

		if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
		root = BTRFS_I(inode)->root;

		/*
		* make sure any commits to the log are forced
		* to be full commits
		*/
		btrfs_set_log_full_commit(root->fs_info, trans);
		if (btrfs_must_commit_transaction(trans, inode)) {
		ret = 1;
		break;
		}
		@@ -4982,6 +5010,9 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
		btrfs_release_path(path);
		ret = btrfs_log_inode(trans, root, di_inode,
		log_mode, 0, LLONG_MAX, ctx);
		if (!ret &&
		btrfs_must_commit_transaction(trans, di_inode))
		ret = 1;
		iput(di_inode);
		if (ret)
		goto next_dir_inode;
		@@ -5096,6 +5127,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,

		ret = btrfs_log_inode(trans, root, dir_inode,
		LOG_INODE_ALL, 0, LLONG_MAX, ctx);
		if (!ret &&
		btrfs_must_commit_transaction(trans, dir_inode))
		ret = 1;
		iput(dir_inode);
		if (ret)
		goto out;
		@@ -5447,6 +5481,9 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
		* They revolve around files there were unlinked from the directory, and
		* this function updates the parent directory so that a full commit is
		* properly done if it is fsync'd later after the unlinks are done.
		*
		* Must be called before the unlink operations (updates to the subvolume tree,
		* inodes, etc) are done.
		*/
		void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
		struct inode dir, struct inode inode,
		@@ -5462,8 +5499,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
		* into the file. When the file is logged we check it and
		* don't log the parents if the file is fully on disk.
		*/
		if (S_ISREG(inode->i_mode))
		if (S_ISREG(inode->i_mode)) {
		mutex_lock(&BTRFS_I(inode)->log_mutex);
		BTRFS_I(inode)->last_unlink_trans = trans->transid;
		mutex_unlock(&BTRFS_I(inode)->log_mutex);
		}

		/*
		* if this directory was already logged any new
		@@ -5494,7 +5534,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
		return;

		record:
		mutex_lock(&BTRFS_I(dir)->log_mutex);
		BTRFS_I(dir)->last_unlink_trans = trans->transid;
		mutex_unlock(&BTRFS_I(dir)->log_mutex);
		}

		/*
		@@ -5505,11 +5547,16 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
		* corresponding to the deleted snapshot's root, which could lead to replaying
		* it after replaying the log tree of the parent directory (which would replay
		* the snapshot delete operation).
		*
		* Must be called before the actual snapshot destroy operation (updates to the
		* parent root and tree of tree roots trees, etc) are done.
		*/
		void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
		struct inode *dir)
		{
		mutex_lock(&BTRFS_I(dir)->log_mutex);
		BTRFS_I(dir)->last_unlink_trans = trans->transid;
		mutex_unlock(&BTRFS_I(dir)->log_mutex);
		}

		/*