Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5eecb9cc authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from Chris Mason:
 "I held off on my rc5 pull because I hit an oops during log recovery
  after a crash.  I wanted to make sure it wasn't a regression because
  we have some logging fixes in here.

  It turns out that a commit during the merge window just made it much
  more likely to trigger directory logging instead of full commits,
  which exposed an old bug.

  The new backref walking code got some additional fixes.  This should
  be the final set of them.

  Josef fixed up a corner where our O_DIRECT writes and buffered reads
  could expose old file contents (not stale, just not the most recent).
  He and Liu Bo fixed crashes during tree log recover as well.

  Ilya fixed errors while we resume disk balancing operations on
  readonly mounts."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: run delayed directory updates during log replay
  Btrfs: hold a ref on the inode during writepages
  Btrfs: fix tree log remove space corner case
  Btrfs: fix wrong check during log recovery
  Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS
  Btrfs: resume balance on rw (re)mounts properly
  Btrfs: restore restriper state on all mounts
  Btrfs: fix dio write vs buffered read race
  Btrfs: don't count I/O statistic read errors for missing devices
  Btrfs: resolve tree mod log locking issue in btrfs_next_leaf
  Btrfs: fix tree mod log rewind of ADD operations
  Btrfs: leave critical region in btrfs_find_all_roots as soon as possible
  Btrfs: always put insert_ptr modifications into the tree mod log
  Btrfs: fix tree mod log for root replacements at leaf level
  Btrfs: support root level changes in __resolve_indirect_ref
  Btrfs: avoid waiting for delayed refs when we must not
parents 62ad6449 b6305567
Loading
Loading
Loading
Loading
+9 −6
Original line number Diff line number Diff line
@@ -301,11 +301,15 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
		goto out;

	eb = path->nodes[level];
	if (!eb) {
	while (!eb) {
		if (!level) {
			WARN_ON(1);
			ret = 1;
			goto out;
		}
		level--;
		eb = path->nodes[level];
	}

	ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
				time_seq, ref->wanted_disk_byte,
@@ -835,6 +839,7 @@ again:
			}
			ret = __add_delayed_refs(head, delayed_ref_seq,
						 &prefs_delayed);
			mutex_unlock(&head->mutex);
			if (ret) {
				spin_unlock(&delayed_refs->lock);
				goto out;
@@ -928,8 +933,6 @@ again:
	}

out:
	if (head)
		mutex_unlock(&head->mutex);
	btrfs_free_path(path);
	while (!list_empty(&prefs)) {
		ref = list_first_entry(&prefs, struct __prelim_ref, list);
+35 −25
Original line number Diff line number Diff line
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
		if (!looped && !tm)
			return 0;
		/*
		 * we must have key remove operations in the log before the
		 * replace operation.
		 * if there are no tree operation for the oldest root, we simply
		 * return it. this should only happen if that (old) root is at
		 * level 0.
		 */
		BUG_ON(!tm);
		if (!tm)
			break;

		/*
		 * if there's an operation that's not a root replacement, we
		 * found the oldest version of our root. normally, we'll find a
		 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
		 */
		if (tm->op != MOD_LOG_ROOT_REPLACE)
			break;

@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
						      tm->generation);
			break;
		case MOD_LOG_KEY_ADD:
			if (tm->slot != n - 1) {
				o_dst = btrfs_node_key_ptr_offset(tm->slot);
				o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
				memmove_extent_buffer(eb, o_dst, o_src, p_size);
			}
			/* if a move operation is needed it's in the log */
			n--;
			break;
		case MOD_LOG_MOVE_KEYS:
@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
	}

	tm = tree_mod_log_search(root->fs_info, logical, time_seq);
	/*
	 * there was an item in the log when __tree_mod_log_oldest_root
	 * returned. this one must not go away, because the time_seq passed to
	 * us must be blocking its removal.
	 */
	BUG_ON(!tm);

	if (old_root)
		eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
					       root->nodesize);
		eb = alloc_dummy_extent_buffer(logical, root->nodesize);
	else
		eb = btrfs_clone_extent_buffer(root->node);
	btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
		btrfs_set_header_level(eb, old_root->level);
		btrfs_set_header_generation(eb, old_generation);
	}
	if (tm)
		__tree_mod_log_rewind(eb, time_seq, tm);
	else
		WARN_ON(btrfs_header_level(eb) != 0);
	extent_buffer_get(eb);

	return eb;
@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
static void insert_ptr(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, struct btrfs_path *path,
		       struct btrfs_disk_key *key, u64 bytenr,
		       int slot, int level, int tree_mod_log)
		       int slot, int level)
{
	struct extent_buffer *lower;
	int nritems;
@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
	BUG_ON(slot > nritems);
	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
	if (slot != nritems) {
		if (tree_mod_log && level)
		if (level)
			tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
					     slot, nritems - slot);
		memmove_extent_buffer(lower,
@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
			      btrfs_node_key_ptr_offset(slot),
			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
	}
	if (tree_mod_log && level) {
	if (level) {
		ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
					      MOD_LOG_KEY_ADD);
		BUG_ON(ret < 0);
@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
	btrfs_mark_buffer_dirty(split);

	insert_ptr(trans, root, path, &disk_key, split->start,
		   path->slots[level + 1] + 1, level + 1, 1);
		   path->slots[level + 1] + 1, level + 1);

	if (path->slots[level] >= mid) {
		path->slots[level] -= mid;
@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
	btrfs_set_header_nritems(l, mid);
	btrfs_item_key(right, &disk_key, 0);
	insert_ptr(trans, root, path, &disk_key, right->start,
		   path->slots[1] + 1, 1, 0);
		   path->slots[1] + 1, 1);

	btrfs_mark_buffer_dirty(right);
	btrfs_mark_buffer_dirty(l);
@@ -3848,7 +3846,7 @@ again:
		if (mid <= slot) {
			btrfs_set_header_nritems(right, 0);
			insert_ptr(trans, root, path, &disk_key, right->start,
				   path->slots[1] + 1, 1, 0);
				   path->slots[1] + 1, 1);
			btrfs_tree_unlock(path->nodes[0]);
			free_extent_buffer(path->nodes[0]);
			path->nodes[0] = right;
@@ -3857,7 +3855,7 @@ again:
		} else {
			btrfs_set_header_nritems(right, 0);
			insert_ptr(trans, root, path, &disk_key, right->start,
					  path->slots[1], 1, 0);
					  path->slots[1], 1);
			btrfs_tree_unlock(path->nodes[0]);
			free_extent_buffer(path->nodes[0]);
			path->nodes[0] = right;
@@ -5121,6 +5119,18 @@ again:

		if (!path->skip_locking) {
			ret = btrfs_try_tree_read_lock(next);
			if (!ret && time_seq) {
				/*
				 * If we don't get the lock, we may be racing
				 * with push_leaf_left, holding that lock while
				 * itself waiting for the leaf we've currently
				 * locked. To solve this situation, we give up
				 * on our lock and cycle.
				 */
				btrfs_release_path(path);
				cond_resched();
				goto again;
			}
			if (!ret) {
				btrfs_set_path_blocking(path);
				btrfs_tree_read_lock(next);
+21 −13
Original line number Diff line number Diff line
@@ -2354,12 +2354,17 @@ retry_root_backup:
				  BTRFS_CSUM_TREE_OBJECTID, csum_root);
	if (ret)
		goto recovery_tree_root;

	csum_root->track_dirty = 1;

	fs_info->generation = generation;
	fs_info->last_trans_committed = generation;

	ret = btrfs_recover_balance(fs_info);
	if (ret) {
		printk(KERN_WARNING "btrfs: failed to recover balance\n");
		goto fail_block_groups;
	}

	ret = btrfs_init_dev_stats(fs_info);
	if (ret) {
		printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2485,20 +2490,23 @@ retry_root_backup:
		goto fail_trans_kthread;
	}

	if (!(sb->s_flags & MS_RDONLY)) {
	if (sb->s_flags & MS_RDONLY)
		return 0;

	down_read(&fs_info->cleanup_work_sem);
		err = btrfs_orphan_cleanup(fs_info->fs_root);
		if (!err)
			err = btrfs_orphan_cleanup(fs_info->tree_root);
	if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
	    (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
		up_read(&fs_info->cleanup_work_sem);

		if (!err)
			err = btrfs_recover_balance(fs_info->tree_root);

		if (err) {
		close_ctree(tree_root);
			return err;
		return ret;
	}
	up_read(&fs_info->cleanup_work_sem);

	ret = btrfs_resume_balance_async(fs_info);
	if (ret) {
		printk(KERN_WARNING "btrfs: failed to resume balance\n");
		close_ctree(tree_root);
		return ret;
	}

	return 0;
+6 −5
Original line number Diff line number Diff line
@@ -2347,12 +2347,10 @@ next:
	return count;
}


static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
			unsigned long num_refs)
			       unsigned long num_refs,
			       struct list_head *first_seq)
{
	struct list_head *first_seq = delayed_refs->seq_head.next;

	spin_unlock(&delayed_refs->lock);
	pr_debug("waiting for more refs (num %ld, first %p)\n",
		 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
	struct btrfs_delayed_ref_root *delayed_refs;
	struct btrfs_delayed_ref_node *ref;
	struct list_head cluster;
	struct list_head *first_seq = NULL;
	int ret;
	u64 delayed_start;
	int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
				 */
				consider_waiting = 1;
				num_refs = delayed_refs->num_entries;
				first_seq = root->fs_info->tree_mod_seq_list.next;
			} else {
				wait_for_more_refs(delayed_refs, num_refs);
				wait_for_more_refs(delayed_refs,
						   num_refs, first_seq);
				/*
				 * after waiting, things have changed. we
				 * dropped the lock and someone else might have
+14 −0
Original line number Diff line number Diff line
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
			     writepage_t writepage, void *data,
			     void (*flush_fn)(void *))
{
	struct inode *inode = mapping->host;
	int ret = 0;
	int done = 0;
	int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
	int scanned = 0;
	int tag;

	/*
	 * We have to hold onto the inode so that ordered extents can do their
	 * work when the IO finishes.  The alternative to this is failing to add
	 * an ordered extent if the igrab() fails there and that is a huge pain
	 * to deal with, so instead just hold onto the inode throughout the
	 * writepages operation.  If it fails here we are freeing up the inode
	 * anyway and we'd rather not waste our time writing out stuff that is
	 * going to be truncated anyway.
	 */
	if (!igrab(inode))
		return 0;

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
		index = 0;
		goto retry;
	}
	btrfs_add_delayed_iput(inode);
	return ret;
}

Loading