Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 07be1337 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from Chris Mason:
 "This has our merge window series of cleanups and fixes.  These target
  a wide range of issues, but do include some important fixes for
  qgroups, O_DIRECT, and fsync handling.  Jeff Mahoney moved around a
  few definitions to make them easier for userland to consume.

  Also whiteout support is included now that issues with overlayfs have
  been cleared up.

  I have one more fix pending for page faults during btrfs_copy_from_user,
  but I wanted to get this bulk out the door first"

* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
  btrfs: fix memory leak during RAID 5/6 device replacement
  Btrfs: add semaphore to synchronize direct IO writes with fsync
  Btrfs: fix race between block group relocation and nocow writes
  Btrfs: fix race between fsync and direct IO writes for prealloc extents
  Btrfs: fix number of transaction units for renames with whiteout
  Btrfs: pin logs earlier when doing a rename exchange operation
  Btrfs: unpin logs if rename exchange operation fails
  Btrfs: fix inode leak on failure to setup whiteout inode in rename
  btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
  Btrfs: pin log earlier when renaming
  Btrfs: unpin log if rename operation fails
  Btrfs: don't do unnecessary delalloc flushes when relocating
  Btrfs: don't wait for unrelated IO to finish before relocation
  Btrfs: fix empty symlink after creating symlink and fsync parent dir
  Btrfs: fix for incorrect directory entries after fsync log replay
  btrfs: build fixup for qgroup_account_snapshot
  btrfs: qgroup: Fix qgroup accounting when creating snapshot
  Btrfs: fix fspath error deallocation
  btrfs: make find_workspace warn if there are no workspaces
  btrfs: make find_workspace always succeed
  ...
parents 63d222b9 c315ef8d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,

	ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
	if (!ifp) {
		kfree(fspath);
		vfree(fspath);
		return ERR_PTR(-ENOMEM);
	}

+10 −0
Original line number Diff line number Diff line
@@ -196,6 +196,16 @@ struct btrfs_inode {
	struct list_head delayed_iput;
	long delayed_iput_count;

	/*
	 * To avoid races between lockless (i_mutex not held) direct IO writes
	 * and concurrent fsync requests. Direct IO writes must acquire read
	 * access on this semaphore for creating an extent map and its
	 * corresponding ordered extent. The fast fsync path must acquire write
	 * access on this semaphore before it collects ordered extents and
	 * extent maps.
	 */
	struct rw_semaphore dio_sem;

	struct inode vfs_inode;
};

+61 −24
Original line number Diff line number Diff line
@@ -743,8 +743,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
static struct {
	struct list_head idle_ws;
	spinlock_t ws_lock;
	int num_ws;
	atomic_t alloc_ws;
	/* Number of free workspaces */
	int free_ws;
	/* Total number of allocated workspaces */
	atomic_t total_ws;
	/* Waiters for a free workspace */
	wait_queue_head_t ws_wait;
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];

@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
	int i;

	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
		struct list_head *workspace;

		INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
		spin_lock_init(&btrfs_comp_ws[i].ws_lock);
		atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
		atomic_set(&btrfs_comp_ws[i].total_ws, 0);
		init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);

		/*
		 * Preallocate one workspace for each compression type so
		 * we can guarantee forward progress in the worst case
		 */
		workspace = btrfs_compress_op[i]->alloc_workspace();
		if (IS_ERR(workspace)) {
			printk(KERN_WARNING
	"BTRFS: cannot preallocate compression workspace, will try later");
		} else {
			atomic_set(&btrfs_comp_ws[i].total_ws, 1);
			btrfs_comp_ws[i].free_ws = 1;
			list_add(workspace, &btrfs_comp_ws[i].idle_ws);
		}
	}
}

/*
 * this finds an available workspace or allocates a new one
 * ERR_PTR is returned if things go bad.
 * This finds an available workspace or allocates a new one.
 * If it's not possible to allocate a new one, waits until there's one.
 * Preallocation makes a forward progress guarantees and we do not return
 * errors.
 */
static struct list_head *find_workspace(int type)
{
@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)

	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;
	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock;
	atomic_t *alloc_ws		= &btrfs_comp_ws[idx].alloc_ws;
	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;
	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait;
	int *num_ws			= &btrfs_comp_ws[idx].num_ws;
	int *free_ws			= &btrfs_comp_ws[idx].free_ws;
again:
	spin_lock(ws_lock);
	if (!list_empty(idle_ws)) {
		workspace = idle_ws->next;
		list_del(workspace);
		(*num_ws)--;
		(*free_ws)--;
		spin_unlock(ws_lock);
		return workspace;

	}
	if (atomic_read(alloc_ws) > cpus) {
	if (atomic_read(total_ws) > cpus) {
		DEFINE_WAIT(wait);

		spin_unlock(ws_lock);
		prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
		if (atomic_read(alloc_ws) > cpus && !*num_ws)
		if (atomic_read(total_ws) > cpus && !*free_ws)
			schedule();
		finish_wait(ws_wait, &wait);
		goto again;
	}
	atomic_inc(alloc_ws);
	atomic_inc(total_ws);
	spin_unlock(ws_lock);

	workspace = btrfs_compress_op[idx]->alloc_workspace();
	if (IS_ERR(workspace)) {
		atomic_dec(alloc_ws);
		atomic_dec(total_ws);
		wake_up(ws_wait);

		/*
		 * Do not return the error but go back to waiting. There's a
		 * workspace preallocated for each type and the compression
		 * time is bounded so we get to a workspace eventually. This
		 * makes our caller's life easier.
		 *
		 * To prevent silent and low-probability deadlocks (when the
		 * initial preallocation fails), check if there are any
		 * workspaces at all.
		 */
		if (atomic_read(total_ws) == 0) {
			static DEFINE_RATELIMIT_STATE(_rs,
					/* once per minute */ 60 * HZ,
					/* no burst */ 1);

			if (__ratelimit(&_rs)) {
				printk(KERN_WARNING
			    "no compression workspaces, low memory, retrying");
			}
		}
		goto again;
	}
	return workspace;
}
@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
	int idx = type - 1;
	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;
	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock;
	atomic_t *alloc_ws		= &btrfs_comp_ws[idx].alloc_ws;
	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;
	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait;
	int *num_ws			= &btrfs_comp_ws[idx].num_ws;
	int *free_ws			= &btrfs_comp_ws[idx].free_ws;

	spin_lock(ws_lock);
	if (*num_ws < num_online_cpus()) {
	if (*free_ws < num_online_cpus()) {
		list_add(workspace, idle_ws);
		(*num_ws)++;
		(*free_ws)++;
		spin_unlock(ws_lock);
		goto wake;
	}
	spin_unlock(ws_lock);

	btrfs_compress_op[idx]->free_workspace(workspace);
	atomic_dec(alloc_ws);
	atomic_dec(total_ws);
wake:
	/*
	 * Make sure counter is updated before we wake up waiters.
@@ -857,7 +900,7 @@ static void free_workspaces(void)
			workspace = btrfs_comp_ws[i].idle_ws.next;
			list_del(workspace);
			btrfs_compress_op[i]->free_workspace(workspace);
			atomic_dec(&btrfs_comp_ws[i].alloc_ws);
			atomic_dec(&btrfs_comp_ws[i].total_ws);
		}
	}
}
@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
	int ret;

	workspace = find_workspace(type);
	if (IS_ERR(workspace))
		return PTR_ERR(workspace);

	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
						      start, len, pages,
@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
	int ret;

	workspace = find_workspace(type);
	if (IS_ERR(workspace))
		return PTR_ERR(workspace);

	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
							 disk_start,
@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
	int ret;

	workspace = find_workspace(type);
	if (IS_ERR(workspace))
		return PTR_ERR(workspace);

	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
						  dest_page, start_byte,
+3 −3
Original line number Diff line number Diff line
@@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
			return ret;
		if (refs == 0) {
			ret = -EROFS;
			btrfs_std_error(root->fs_info, ret, NULL);
			btrfs_handle_fs_error(root->fs_info, ret, NULL);
			return ret;
		}
	} else {
@@ -1928,7 +1928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
		child = read_node_slot(root, mid, 0);
		if (!child) {
			ret = -EROFS;
			btrfs_std_error(root->fs_info, ret, NULL);
			btrfs_handle_fs_error(root->fs_info, ret, NULL);
			goto enospc;
		}

@@ -2031,7 +2031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
		 */
		if (!left) {
			ret = -EROFS;
			btrfs_std_error(root->fs_info, ret, NULL);
			btrfs_handle_fs_error(root->fs_info, ret, NULL);
			goto enospc;
		}
		wret = balance_node_right(trans, root, mid, left);
+70 −1053

File changed.

Preview size limit exceeded, changes collapsed.

Loading