Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9be3395b authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: use a btrfs bioset instead of abusing bio internals



Btrfs has been pointer tagging bi_private and using bi_bdev
to store the stripe index and mirror number of failed IOs.

As bios bubble back up through the call chain, we use these
to decide if and how to retry our IOs.  They are also used
to count IO failures on a per device basis.

Recently a bio tracepoint was added lead to crashes because
we were abusing bi_bdev.

This commit adds a btrfs bioset, and creates explicit fields
for the mirror number and stripe index.  The plan is to
extend this structure for all of the fields currently in
struct btrfs_bio, which will mean one less kmalloc in
our IO path.

Signed-off-by: default avatarChris Mason <chris.mason@fusionio.com>
Reported-by: default avatarTejun Heo <tj@kernel.org>
parent 667e7d94
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
		unsigned int j;
		DECLARE_COMPLETION_ONSTACK(complete);

		bio = bio_alloc(GFP_NOFS, num_pages - i);
		bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
		if (!bio) {
			printk(KERN_INFO
			       "btrfsic: bio_alloc() for %u pages failed!\n",
+1 −1
Original line number Diff line number Diff line
@@ -3128,7 +3128,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
	 * caller
	 */
	device->flush_bio = NULL;
	bio = bio_alloc(GFP_NOFS, 0);
	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
	if (!bio)
		return -ENOMEM;

+41 −8
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@

static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
static struct bio_set *btrfs_bioset;

#ifdef CONFIG_BTRFS_DEBUG
static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
	if (!extent_buffer_cache)
		goto free_state_cache;

	btrfs_bioset = bioset_create(BIO_POOL_SIZE,
				     offsetof(struct btrfs_io_bio, bio));
	if (!btrfs_bioset)
		goto free_buffer_cache;
	return 0;

free_buffer_cache:
	kmem_cache_destroy(extent_buffer_cache);
	extent_buffer_cache = NULL;

free_state_cache:
	kmem_cache_destroy(extent_state_cache);
	extent_state_cache = NULL;
	return -ENOMEM;
}

@@ -145,6 +156,8 @@ void extent_io_exit(void)
		kmem_cache_destroy(extent_state_cache);
	if (extent_buffer_cache)
		kmem_cache_destroy(extent_buffer_cache);
	if (btrfs_bioset)
		bioset_free(btrfs_bioset);
}

void extent_io_tree_init(struct extent_io_tree *tree,
@@ -2046,7 +2059,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
	if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
		return 0;

	bio = bio_alloc(GFP_NOFS, 1);
	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
	if (!bio)
		return -EIO;
	bio->bi_private = &compl;
@@ -2336,7 +2349,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
		return -EIO;
	}

	bio = bio_alloc(GFP_NOFS, 1);
	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
	if (!bio) {
		free_io_failure(inode, failrec, 0);
		return -EIO;
@@ -2457,10 +2470,11 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
		struct page *page = bvec->bv_page;
		struct extent_state *cached = NULL;
		struct extent_state *state;
		struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);

		pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
			 "mirror=%ld\n", (u64)bio->bi_sector, err,
			 (long int)bio->bi_bdev);
			 "mirror=%lu\n", (u64)bio->bi_sector, err,
			 io_bio->mirror_num);
		tree = &BTRFS_I(page->mapping->host)->io_tree;

		start = page_offset(page) + bvec->bv_offset;
@@ -2485,7 +2499,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
		}
		spin_unlock(&tree->lock);

		mirror = (int)(unsigned long)bio->bi_bdev;
		mirror = io_bio->mirror_num;
		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
			ret = tree->ops->readpage_end_io_hook(page, start, end,
							      state, mirror);
@@ -2550,17 +2564,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
	bio_put(bio);
}

/*
 * this allocates from the btrfs_bioset.  We're returning a bio right now
 * but you can call btrfs_io_bio for the appropriate container_of magic
 */
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
		gfp_t gfp_flags)
{
	struct bio *bio;

	bio = bio_alloc(gfp_flags, nr_vecs);
	bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);

	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
		while (!bio && (nr_vecs /= 2))
			bio = bio_alloc(gfp_flags, nr_vecs);
		while (!bio && (nr_vecs /= 2)) {
			bio = bio_alloc_bioset(gfp_flags,
					       nr_vecs, btrfs_bioset);
		}
	}

	if (bio) {
@@ -2571,6 +2591,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
	return bio;
}

struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
	return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
}


/* this also allocates from the btrfs_bioset */
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
	return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
}


static int __must_check submit_one_bio(int rw, struct bio *bio,
				       int mirror_num, unsigned long bio_flags)
{
+2 −0
Original line number Diff line number Diff line
@@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
		gfp_t gfp_flags);
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);

struct btrfs_fs_info;

+43 −21
Original line number Diff line number Diff line
@@ -6927,7 +6927,11 @@ struct btrfs_dio_private {
	/* IO errors */
	int errors;

	/* orig_bio is our btrfs_io_bio */
	struct bio *orig_bio;

	/* dio_bio came from fs/direct-io.c */
	struct bio *dio_bio;
};

static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6937,6 +6941,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
	struct bio_vec *bvec = bio->bi_io_vec;
	struct inode *inode = dip->inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct bio *dio_bio;
	u64 start;

	start = dip->logical_offset;
@@ -6976,14 +6981,15 @@ failed:

	unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
		      dip->logical_offset + dip->bytes - 1);
	bio->bi_private = dip->private;
	dio_bio = dip->dio_bio;

	kfree(dip);

	/* If we had a csum failure make sure to clear the uptodate flag */
	if (err)
		clear_bit(BIO_UPTODATE, &bio->bi_flags);
	dio_end_io(bio, err);
		clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
	dio_end_io(dio_bio, err);
	bio_put(bio);
}

static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6994,6 +7000,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
	struct btrfs_ordered_extent *ordered = NULL;
	u64 ordered_offset = dip->logical_offset;
	u64 ordered_bytes = dip->bytes;
	struct bio *dio_bio;
	int ret;

	if (err)
@@ -7021,14 +7028,15 @@ out_test:
		goto again;
	}
out_done:
	bio->bi_private = dip->private;
	dio_bio = dip->dio_bio;

	kfree(dip);

	/* If we had an error make sure to clear the uptodate flag */
	if (err)
		clear_bit(BIO_UPTODATE, &bio->bi_flags);
	dio_end_io(bio, err);
		clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
	dio_end_io(dio_bio, err);
	bio_put(bio);
}

static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7064,10 +7072,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
	if (!atomic_dec_and_test(&dip->pending_bios))
		goto out;

	if (dip->errors)
	if (dip->errors) {
		bio_io_error(dip->orig_bio);
	else {
		set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
	} else {
		set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
		bio_endio(dip->orig_bio, 0);
	}
out:
@@ -7242,25 +7250,34 @@ out_err:
	return 0;
}

static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
				loff_t file_offset)
static void btrfs_submit_direct(int rw, struct bio *dio_bio,
				struct inode *inode, loff_t file_offset)
{
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_dio_private *dip;
	struct bio_vec *bvec = bio->bi_io_vec;
	struct bio_vec *bvec = dio_bio->bi_io_vec;
	struct bio *io_bio;
	int skip_sum;
	int write = rw & REQ_WRITE;
	int ret = 0;

	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;

	io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);

	if (!io_bio) {
		ret = -ENOMEM;
		goto free_ordered;
	}

	dip = kmalloc(sizeof(*dip), GFP_NOFS);
	if (!dip) {
		ret = -ENOMEM;
		goto free_ordered;
		goto free_io_bio;
	}

	dip->private = bio->bi_private;
	dip->private = dio_bio->bi_private;
	io_bio->bi_private = dio_bio->bi_private;
	dip->inode = inode;
	dip->logical_offset = file_offset;

@@ -7268,22 +7285,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
	do {
		dip->bytes += bvec->bv_len;
		bvec++;
	} while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
	} while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));

	dip->disk_bytenr = (u64)bio->bi_sector << 9;
	bio->bi_private = dip;
	dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
	io_bio->bi_private = dip;
	dip->errors = 0;
	dip->orig_bio = bio;
	dip->orig_bio = io_bio;
	dip->dio_bio = dio_bio;
	atomic_set(&dip->pending_bios, 0);

	if (write)
		bio->bi_end_io = btrfs_endio_direct_write;
		io_bio->bi_end_io = btrfs_endio_direct_write;
	else
		bio->bi_end_io = btrfs_endio_direct_read;
		io_bio->bi_end_io = btrfs_endio_direct_read;

	ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
	if (!ret)
		return;

free_io_bio:
	bio_put(io_bio);

free_ordered:
	/*
	 * If this is a write, we need to clean up the reserved space and kill
@@ -7299,7 +7321,7 @@ free_ordered:
		btrfs_put_ordered_extent(ordered);
		btrfs_put_ordered_extent(ordered);
	}
	bio_endio(bio, ret);
	bio_endio(dio_bio, ret);
}

static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
Loading