Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ccc5ff94 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: fix btrfs fallocate oops and deadlock
  Btrfs: use the right node in reada_for_balance
  Btrfs: fix oops on page->mapping->host during writepage
  Btrfs: add a priority queue to the async thread helpers
  Btrfs: use WRITE_SYNC for synchronous writes
parents c19c6c32 546888da
Loading
Loading
Loading
Loading
+47 −13
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#define WORK_QUEUED_BIT 0
#define WORK_DONE_BIT 1
#define WORK_ORDER_DONE_BIT 2
#define WORK_HIGH_PRIO_BIT 3

/*
 * container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {

	/* list of struct btrfs_work that are waiting for service */
	struct list_head pending;
	struct list_head prio_pending;

	/* list of worker threads from struct btrfs_workers */
	struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,

	spin_lock_irqsave(&workers->lock, flags);

	while (!list_empty(&workers->order_list)) {
	while (1) {
		if (!list_empty(&workers->prio_order_list)) {
			work = list_entry(workers->prio_order_list.next,
					  struct btrfs_work, order_list);
		} else if (!list_empty(&workers->order_list)) {
			work = list_entry(workers->order_list.next,
					  struct btrfs_work, order_list);

		} else {
			break;
		}
		if (!test_bit(WORK_DONE_BIT, &work->flags))
			break;

@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
	do {
		spin_lock_irq(&worker->lock);
again_locked:
		while (!list_empty(&worker->pending)) {
		while (1) {
			if (!list_empty(&worker->prio_pending))
				cur = worker->prio_pending.next;
			else if (!list_empty(&worker->pending))
				cur = worker->pending.next;
			else
				break;

			work = list_entry(cur, struct btrfs_work, list);
			list_del(&work->list);
			clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ static int worker_loop(void *arg)

			spin_lock_irq(&worker->lock);
			check_idle_worker(worker);

		}
		if (freezing(current)) {
			worker->working = 0;
@@ -178,7 +191,8 @@ static int worker_loop(void *arg)
				 * jump_in?
				 */
				smp_mb();
				if (!list_empty(&worker->pending))
				if (!list_empty(&worker->pending) ||
				    !list_empty(&worker->prio_pending))
					continue;

				/*
@@ -191,7 +205,8 @@ static int worker_loop(void *arg)
				 */
				schedule_timeout(1);
				smp_mb();
				if (!list_empty(&worker->pending))
				if (!list_empty(&worker->pending) ||
				    !list_empty(&worker->prio_pending))
					continue;

				if (kthread_should_stop())
@@ -200,7 +215,8 @@ static int worker_loop(void *arg)
				/* still no more work?, sleep for real */
				spin_lock_irq(&worker->lock);
				set_current_state(TASK_INTERRUPTIBLE);
				if (!list_empty(&worker->pending))
				if (!list_empty(&worker->pending) ||
				    !list_empty(&worker->prio_pending))
					goto again_locked;

				/*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
	INIT_LIST_HEAD(&workers->worker_list);
	INIT_LIST_HEAD(&workers->idle_list);
	INIT_LIST_HEAD(&workers->order_list);
	INIT_LIST_HEAD(&workers->prio_order_list);
	spin_lock_init(&workers->lock);
	workers->max_workers = max;
	workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
		}

		INIT_LIST_HEAD(&worker->pending);
		INIT_LIST_HEAD(&worker->prio_pending);
		INIT_LIST_HEAD(&worker->worker_list);
		spin_lock_init(&worker->lock);
		atomic_set(&worker->num_pending, 0);
@@ -396,6 +414,9 @@ int btrfs_requeue_work(struct btrfs_work *work)
		goto out;

	spin_lock_irqsave(&worker->lock, flags);
	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
		list_add_tail(&work->list, &worker->prio_pending);
	else
		list_add_tail(&work->list, &worker->pending);
	atomic_inc(&worker->num_pending);

@@ -422,6 +443,11 @@ int btrfs_requeue_work(struct btrfs_work *work)
	return 0;
}

void btrfs_set_work_high_prio(struct btrfs_work *work)
{
	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}

/*
 * places a struct btrfs_work into the pending queue of one of the kthreads
 */
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
	worker = find_worker(workers);
	if (workers->ordered) {
		spin_lock_irqsave(&workers->lock, flags);
		if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
			list_add_tail(&work->order_list,
				      &workers->prio_order_list);
		} else {
			list_add_tail(&work->order_list, &workers->order_list);
		}
		spin_unlock_irqrestore(&workers->lock, flags);
	} else {
		INIT_LIST_HEAD(&work->order_list);
@@ -446,6 +477,9 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)

	spin_lock_irqsave(&worker->lock, flags);

	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
		list_add_tail(&work->list, &worker->prio_pending);
	else
		list_add_tail(&work->list, &worker->pending);
	atomic_inc(&worker->num_pending);
	check_busy_worker(worker);
+2 −0
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ struct btrfs_workers {
	 * of work items waiting for completion
	 */
	struct list_head order_list;
	struct list_head prio_order_list;

	/* lock for finding the next worker thread to queue on */
	spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
int btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
int btrfs_requeue_work(struct btrfs_work *work);
void btrfs_set_work_high_prio(struct btrfs_work *work);
#endif
+12 −5
Original line number Diff line number Diff line
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
	int ret = 0;
	int blocksize;

	parent = path->nodes[level - 1];
	parent = path->nodes[level + 1];
	if (!parent)
		return 0;

	nritems = btrfs_header_nritems(parent);
	slot = path->slots[level];
	slot = path->slots[level + 1];
	blocksize = btrfs_level_size(root, level);

	if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
			block1 = 0;
		free_extent_buffer(eb);
	}
	if (slot < nritems) {
	if (slot + 1 < nritems) {
		block2 = btrfs_node_blockptr(parent, slot + 1);
		gen = btrfs_node_ptr_generation(parent, slot + 1);
		eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
	}
	if (block1 || block2) {
		ret = -EAGAIN;

		/* release the whole path */
		btrfs_release_path(root, path);

		/* read the blocks */
		if (block1)
			readahead_tree_block(root, block1, blocksize, 0);
		if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
			eb = read_tree_block(root, block1, blocksize, 0);
			free_extent_buffer(eb);
		}
		if (block1) {
		if (block2) {
			eb = read_tree_block(root, block2, blocksize, 0);
			free_extent_buffer(eb);
		}
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans,
	 * of the btree by dropping locks before
	 * we read.
	 */
	btrfs_release_path(NULL, p);
	btrfs_unlock_up_safe(p, level + 1);
	btrfs_set_path_blocking(p);

	if (tmp)
		free_extent_buffer(tmp);
	if (p->reada)
		reada_for_search(root, p, level, slot, key->objectid);

	btrfs_release_path(NULL, p);
	tmp = read_tree_block(root, blocknr, blocksize, gen);
	if (tmp)
		free_extent_buffer(tmp);
+7 −2
Original line number Diff line number Diff line
@@ -579,6 +579,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
	async->bio_flags = bio_flags;

	atomic_inc(&fs_info->nr_async_submits);

	if (rw & (1 << BIO_RW_SYNCIO))
		btrfs_set_work_high_prio(&async->work);

	btrfs_queue_worker(&fs_info->workers, &async->work);
#if 0
	int limit = btrfs_async_submit_limit(fs_info);
@@ -656,6 +660,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
				     mirror_num, 0);
	}

	/*
	 * kthread helpers are used to submit writes so that checksumming
	 * can happen in parallel across all CPUs
@@ -2095,10 +2100,10 @@ static int write_dev_supers(struct btrfs_device *device,
				device->barriers = 0;
				get_bh(bh);
				lock_buffer(bh);
				ret = submit_bh(WRITE, bh);
				ret = submit_bh(WRITE_SYNC, bh);
			}
		} else {
			ret = submit_bh(WRITE, bh);
			ret = submit_bh(WRITE_SYNC, bh);
		}

		if (!ret && wait) {
+63 −23
Original line number Diff line number Diff line
@@ -50,7 +50,10 @@ struct extent_page_data {
	/* tells writepage not to lock the state bits for this range
	 * it still does the unlocking
	 */
	int extent_locked;
	unsigned int extent_locked:1;

	/* tells the submit_bio code to use a WRITE_SYNC */
	unsigned int sync_io:1;
};

int __init extent_io_init(void)
@@ -2101,6 +2104,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
	return ret;
}

static noinline void update_nr_written(struct page *page,
				      struct writeback_control *wbc,
				      unsigned long nr_written)
{
	wbc->nr_to_write -= nr_written;
	if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
	    wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
		page->mapping->writeback_index = page->index + nr_written;
}

/*
 * the writepage semantics are similar to regular writepage.  extent
 * records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2149,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
	u64 delalloc_end;
	int page_started;
	int compressed;
	int write_flags;
	unsigned long nr_written = 0;

	if (wbc->sync_mode == WB_SYNC_ALL)
		write_flags = WRITE_SYNC_PLUG;
	else
		write_flags = WRITE;

	WARN_ON(!PageLocked(page));
	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
	if (page->index > end_index ||
@@ -2164,6 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
	delalloc_end = 0;
	page_started = 0;
	if (!epd->extent_locked) {
		/*
		 * make sure the wbc mapping index is at least updated
		 * to this page.
		 */
		update_nr_written(page, wbc, 0);

		while (delalloc_end < page_end) {
			nr_delalloc = find_lock_delalloc_range(inode, tree,
						       page,
@@ -2185,7 +2210,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
		 */
		if (page_started) {
			ret = 0;
			goto update_nr_written;
			/*
			 * we've unlocked the page, so we can't update
			 * the mapping's writeback index, just update
			 * nr_to_write.
			 */
			wbc->nr_to_write -= nr_written;
			goto done_unlocked;
		}
	}
	lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2229,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
		if (ret == -EAGAIN) {
			unlock_extent(tree, start, page_end, GFP_NOFS);
			redirty_page_for_writepage(wbc, page);
			update_nr_written(page, wbc, nr_written);
			unlock_page(page);
			ret = 0;
			goto update_nr_written;
			goto done_unlocked;
		}
	}

	nr_written++;
	/*
	 * we don't want to touch the inode after unlocking the page,
	 * so we update the mapping writeback index now
	 */
	update_nr_written(page, wbc, nr_written + 1);

	end = page_end;
	if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2350,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
				       (unsigned long long)end);
			}

			ret = submit_extent_page(WRITE, tree, page, sector,
						 iosize, pg_offset, bdev,
						 &epd->bio, max_nr,
			ret = submit_extent_page(write_flags, tree, page,
						 sector, iosize, pg_offset,
						 bdev, &epd->bio, max_nr,
						 end_bio_extent_writepage,
						 0, 0, 0);
			if (ret)
@@ -2336,11 +2372,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
		unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
	unlock_page(page);

update_nr_written:
	wbc->nr_to_write -= nr_written;
	if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
	    wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
		page->mapping->writeback_index = page->index + nr_written;
done_unlocked:

	return 0;
}

@@ -2460,15 +2493,23 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
	return ret;
}

static noinline void flush_write_bio(void *data)
static void flush_epd_write_bio(struct extent_page_data *epd)
{
	struct extent_page_data *epd = data;
	if (epd->bio) {
		if (epd->sync_io)
			submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
		else
			submit_one_bio(WRITE, epd->bio, 0, 0);
		epd->bio = NULL;
	}
}

static noinline void flush_write_bio(void *data)
{
	struct extent_page_data *epd = data;
	flush_epd_write_bio(epd);
}

int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
			  get_extent_t *get_extent,
			  struct writeback_control *wbc)
@@ -2480,23 +2521,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
		.tree = tree,
		.get_extent = get_extent,
		.extent_locked = 0,
		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
	};
	struct writeback_control wbc_writepages = {
		.bdi		= wbc->bdi,
		.sync_mode	= WB_SYNC_NONE,
		.sync_mode	= wbc->sync_mode,
		.older_than_this = NULL,
		.nr_to_write	= 64,
		.range_start	= page_offset(page) + PAGE_CACHE_SIZE,
		.range_end	= (loff_t)-1,
	};


	ret = __extent_writepage(page, wbc, &epd);

	extent_write_cache_pages(tree, mapping, &wbc_writepages,
				 __extent_writepage, &epd, flush_write_bio);
	if (epd.bio)
		submit_one_bio(WRITE, epd.bio, 0, 0);
	flush_epd_write_bio(&epd);
	return ret;
}

@@ -2515,6 +2555,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
		.tree = tree,
		.get_extent = get_extent,
		.extent_locked = 1,
		.sync_io = mode == WB_SYNC_ALL,
	};
	struct writeback_control wbc_writepages = {
		.bdi		= inode->i_mapping->backing_dev_info,
@@ -2540,8 +2581,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
		start += PAGE_CACHE_SIZE;
	}

	if (epd.bio)
		submit_one_bio(WRITE, epd.bio, 0, 0);
	flush_epd_write_bio(&epd);
	return ret;
}

@@ -2556,13 +2596,13 @@ int extent_writepages(struct extent_io_tree *tree,
		.tree = tree,
		.get_extent = get_extent,
		.extent_locked = 0,
		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
	};

	ret = extent_write_cache_pages(tree, mapping, wbc,
				       __extent_writepage, &epd,
				       flush_write_bio);
	if (epd.bio)
		submit_one_bio(WRITE, epd.bio, 0, 0);
	flush_epd_write_bio(&epd);
	return ret;
}

Loading