Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 20d0189b authored by Kent Overstreet's avatar Kent Overstreet
Browse files

block: Introduce new bio_split()



The new bio_split() can split arbitrary bios - it's not restricted to
single page bios, like the old bio_split() (previously renamed to
bio_pair_split()). It also has different semantics - it doesn't allocate
a struct bio_pair, leaving it up to the caller to handle completions.

Then convert the existing bio_pair_split() users to the new bio_split()
- and also nvme, which was open coding bio splitting.

(We have to take that BUG_ON() out of bio_integrity_trim() because this
bio_split() needs to use it, and there's no reason it has to be used on
bios marked as cloned; BIO_CLONED doesn't seem to have clearly
documented semantics anyways.)

Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Neil Brown <neilb@suse.de>
parent ee67891b
Loading
Loading
Loading
Loading
+9 −97
Original line number Diff line number Diff line
@@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
	return total_len;
}

struct nvme_bio_pair {
	struct bio b1, b2, *parent;
	struct bio_vec *bv1, *bv2;
	int err;
	atomic_t cnt;
};

static void nvme_bio_pair_endio(struct bio *bio, int err)
{
	struct nvme_bio_pair *bp = bio->bi_private;

	if (err)
		bp->err = err;

	if (atomic_dec_and_test(&bp->cnt)) {
		bio_endio(bp->parent, bp->err);
		kfree(bp->bv1);
		kfree(bp->bv2);
		kfree(bp);
	}
}

static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
							int len, int offset)
{
	struct nvme_bio_pair *bp;

	BUG_ON(len > bio->bi_iter.bi_size);
	BUG_ON(idx > bio->bi_vcnt);

	bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
	if (!bp)
		return NULL;
	bp->err = 0;

	bp->b1 = *bio;
	bp->b2 = *bio;

	bp->b1.bi_iter.bi_size = len;
	bp->b2.bi_iter.bi_size -= len;
	bp->b1.bi_vcnt = idx;
	bp->b2.bi_iter.bi_idx = idx;
	bp->b2.bi_iter.bi_sector += len >> 9;

	if (offset) {
		bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
								GFP_ATOMIC);
		if (!bp->bv1)
			goto split_fail_1;

		bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
								GFP_ATOMIC);
		if (!bp->bv2)
			goto split_fail_2;

		memcpy(bp->bv1, bio->bi_io_vec,
			bio->bi_max_vecs * sizeof(struct bio_vec));
		memcpy(bp->bv2, bio->bi_io_vec,
			bio->bi_max_vecs * sizeof(struct bio_vec));

		bp->b1.bi_io_vec = bp->bv1;
		bp->b2.bi_io_vec = bp->bv2;
		bp->b2.bi_io_vec[idx].bv_offset += offset;
		bp->b2.bi_io_vec[idx].bv_len -= offset;
		bp->b1.bi_io_vec[idx].bv_len = offset;
		bp->b1.bi_vcnt++;
	} else
		bp->bv1 = bp->bv2 = NULL;

	bp->b1.bi_private = bp;
	bp->b2.bi_private = bp;

	bp->b1.bi_end_io = nvme_bio_pair_endio;
	bp->b2.bi_end_io = nvme_bio_pair_endio;

	bp->parent = bio;
	atomic_set(&bp->cnt, 2);

	return bp;

 split_fail_2:
	kfree(bp->bv1);
 split_fail_1:
	kfree(bp);
	return NULL;
}

static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
						int idx, int len, int offset)
				 int len)
{
	struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
	if (!bp)
	struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
	if (!split)
		return -ENOMEM;

	bio_chain(split, bio);

	if (bio_list_empty(&nvmeq->sq_cong))
		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
	bio_list_add(&nvmeq->sq_cong, &bp->b1);
	bio_list_add(&nvmeq->sq_cong, &bp->b2);
	bio_list_add(&nvmeq->sq_cong, split);
	bio_list_add(&nvmeq->sq_cong, bio);

	return 0;
}
@@ -568,8 +483,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
		} else {
			if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
				return nvme_split_and_submit(bio, nvmeq,
							     iter.bi_idx,
							     length, 0);
							     length);

			sg = sg ? sg + 1 : iod->sg;
			sg_set_page(sg, bvec.bv_page,
@@ -578,9 +492,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
		}

		if (split_len - length < bvec.bv_len)
			return nvme_split_and_submit(bio, nvmeq, iter.bi_idx,
						     split_len,
						     split_len - length);
			return nvme_split_and_submit(bio, nvmeq, split_len);
		length += bvec.bv_len;
		bvprv = bvec;
		first = 0;
+74 −62
Original line number Diff line number Diff line
@@ -2338,26 +2338,8 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
	pkt_bio_finished(pd);
}

static void pkt_make_request(struct request_queue *q, struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
	struct pktcdvd_device *pd;
	char b[BDEVNAME_SIZE];
	sector_t zone;
	struct packet_data *pkt;
	int was_empty, blocked_bio;
	struct pkt_rb_node *node;

	pd = q->queuedata;
	if (!pd) {
		pr_err("%s incorrect request queue\n",
		       bdevname(bio->bi_bdev, b));
		goto end_io;
	}

	/*
	 * Clone READ bios so we can have our own bi_end_io callback.
	 */
	if (bio_data_dir(bio) == READ) {
	struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);

@@ -2368,45 +2350,17 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
	cloned_bio->bi_end_io = pkt_end_io_read_cloned;
	pd->stats.secs_r += bio_sectors(bio);
	pkt_queue_bio(pd, cloned_bio);
		return;
	}

	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
		pkt_notice(pd, "WRITE for ro device (%llu)\n",
			   (unsigned long long)bio->bi_iter.bi_sector);
		goto end_io;
	}

	if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
		pkt_err(pd, "wrong bio size\n");
		goto end_io;
}

	blk_queue_bounce(q, &bio);

	zone = get_zone(bio->bi_iter.bi_sector, pd);
	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
		(unsigned long long)bio->bi_iter.bi_sector,
		(unsigned long long)bio_end_sector(bio));

	/* Check if we have to split the bio */
static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
{
		struct bio_pair *bp;
		sector_t last_zone;
		int first_sectors;
	struct pktcdvd_device *pd = q->queuedata;
	sector_t zone;
	struct packet_data *pkt;
	int was_empty, blocked_bio;
	struct pkt_rb_node *node;

		last_zone = get_zone(bio_end_sector(bio) - 1, pd);
		if (last_zone != zone) {
			BUG_ON(last_zone != zone + pd->settings.size);
			first_sectors = last_zone - bio->bi_iter.bi_sector;
			bp = bio_pair_split(bio, first_sectors);
			BUG_ON(!bp);
			pkt_make_request(q, &bp->bio1);
			pkt_make_request(q, &bp->bio2);
			bio_pair_release(bp);
			return;
		}
	}
	zone = get_zone(bio->bi_iter.bi_sector, pd);

	/*
	 * If we find a matching packet in state WAITING or READ_WAIT, we can
@@ -2480,6 +2434,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
		 */
		wake_up(&pd->wqueue);
	}
}

static void pkt_make_request(struct request_queue *q, struct bio *bio)
{
	struct pktcdvd_device *pd;
	char b[BDEVNAME_SIZE];
	struct bio *split;

	pd = q->queuedata;
	if (!pd) {
		pr_err("%s incorrect request queue\n",
		       bdevname(bio->bi_bdev, b));
		goto end_io;
	}

	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
		(unsigned long long)bio->bi_iter.bi_sector,
		(unsigned long long)bio_end_sector(bio));

	/*
	 * Clone READ bios so we can have our own bi_end_io callback.
	 */
	if (bio_data_dir(bio) == READ) {
		pkt_make_request_read(pd, bio);
		return;
	}

	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
		pkt_notice(pd, "WRITE for ro device (%llu)\n",
			   (unsigned long long)bio->bi_iter.bi_sector);
		goto end_io;
	}

	if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
		pkt_err(pd, "wrong bio size\n");
		goto end_io;
	}

	blk_queue_bounce(q, &bio);

	do {
		sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
		sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);

		if (last_zone != zone) {
			BUG_ON(last_zone != zone + pd->settings.size);

			split = bio_split(bio, last_zone -
					  bio->bi_iter.bi_sector,
					  GFP_NOIO, fs_bio_set);
			bio_chain(split, bio);
		} else {
			split = bio;
		}

		pkt_make_request_write(q, split);
	} while (split != bio);

	return;
end_io:
	bio_io_error(bio);
+0 −1
Original line number Diff line number Diff line
@@ -901,7 +901,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);

struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
void bch_generic_make_request(struct bio *, struct bio_split_pool *);
void __bch_submit_bbio(struct bio *, struct cache_set *);
void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
+2 −80
Original line number Diff line number Diff line
@@ -11,84 +11,6 @@

#include <linux/blkdev.h>

/**
 * bch_bio_split - split a bio
 * @bio:	bio to split
 * @sectors:	number of sectors to split from the front of @bio
 * @gfp:	gfp mask
 * @bs:		bio set to allocate from
 *
 * Allocates and returns a new bio which represents @sectors from the start of
 * @bio, and updates @bio to represent the remaining sectors.
 *
 * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
 * unchanged.
 *
 * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
 * bvec boundry; it is the caller's responsibility to ensure that @bio is not
 * freed before the split.
 */
struct bio *bch_bio_split(struct bio *bio, int sectors,
			  gfp_t gfp, struct bio_set *bs)
{
	unsigned vcnt = 0, nbytes = sectors << 9;
	struct bio_vec bv;
	struct bvec_iter iter;
	struct bio *ret = NULL;

	BUG_ON(sectors <= 0);

	if (sectors >= bio_sectors(bio))
		return bio;

	if (bio->bi_rw & REQ_DISCARD) {
		ret = bio_alloc_bioset(gfp, 1, bs);
		if (!ret)
			return NULL;
		goto out;
	}

	bio_for_each_segment(bv, bio, iter) {
		vcnt++;

		if (nbytes <= bv.bv_len)
			break;

		nbytes -= bv.bv_len;
	}

	ret = bio_alloc_bioset(gfp, vcnt, bs);
	if (!ret)
		return NULL;

	bio_for_each_segment(bv, bio, iter) {
		ret->bi_io_vec[ret->bi_vcnt++] = bv;

		if (ret->bi_vcnt == vcnt)
			break;
	}

	ret->bi_io_vec[ret->bi_vcnt - 1].bv_len = nbytes;
out:
	ret->bi_bdev	= bio->bi_bdev;
	ret->bi_iter.bi_sector	= bio->bi_iter.bi_sector;
	ret->bi_iter.bi_size	= sectors << 9;
	ret->bi_rw	= bio->bi_rw;

	if (bio_integrity(bio)) {
		if (bio_integrity_clone(ret, bio, gfp)) {
			bio_put(ret);
			return NULL;
		}

		bio_integrity_trim(ret, 0, bio_sectors(ret));
	}

	bio_advance(bio, ret->bi_iter.bi_size);

	return ret;
}

static unsigned bch_bio_max_sectors(struct bio *bio)
{
	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
@@ -172,7 +94,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
	bio_get(bio);

	do {
		n = bch_bio_split(bio, bch_bio_max_sectors(bio),
		n = bio_next_split(bio, bch_bio_max_sectors(bio),
				   GFP_NOIO, s->p->bio_split);

		n->bi_end_io	= bch_bio_submit_split_endio;
+6 −6
Original line number Diff line number Diff line
@@ -371,7 +371,7 @@ static void bch_data_insert_start(struct closure *cl)
				       op->writeback))
			goto err;

		n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
		n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);

		n->bi_end_io	= bch_data_insert_endio;
		n->bi_private	= cl;
@@ -679,7 +679,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
	if (KEY_DIRTY(k))
		s->read_dirty_data = true;

	n = bch_bio_split(bio, min_t(uint64_t, INT_MAX,
	n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
				      KEY_OFFSET(k) - bio->bi_iter.bi_sector),
			   GFP_NOIO, s->d->bio_split);

@@ -920,7 +920,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
	struct bio *miss, *cache_bio;

	if (s->cache_miss || s->iop.bypass) {
		miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
		miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
		ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
		goto out_submit;
	}
@@ -943,7 +943,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,

	s->iop.replace = true;

	miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
	miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);

	/* btree_search_recurse()'s btree iterator is no good anymore */
	ret = miss == bio ? MAP_DONE : -EINTR;
Loading