Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f9d03f96 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: improve handling of the magic discard payload



Instead of allocating a single unused biovec for discard requests, send
them down without any payload.  Instead we allow the driver to add a
"special" payload using a biovec embedded into struct request (unioned
over other fields never used while in the driver), and overloading
the number of segments for this case.

This has a couple of advantages:

 - we don't have to allocate the bio_vec
 - the amount of special casing for discard requests in the block
   layer is significantly reduced
 - using this same scheme for other request types is trivial,
   which will be important for implementing the new WRITE_ZEROES
   op on devices where it actually requires a payload (e.g. SCSI)
 - we can get rid of playing games with the request length, as
   we'll never touch it and completions will work just fine
 - it will allow us to support ranged discard operations in the
   future by merging non-contiguous discard bios into a single
   request
 - last but not least it removes a lot of code

This patch is the common base for my WIP series for ranges discards and to
remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES,
so it would be good to get it in quickly.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent be07e14f
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -1840,15 +1840,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
	BUG_ON(sectors <= 0);
	BUG_ON(sectors >= bio_sectors(bio));

	/*
	 * Discards need a mutable bio_vec to accommodate the payload
	 * required by the DSM TRIM and UNMAP commands.
	 */
	if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
		split = bio_clone_bioset(bio, gfp, bs);
	else
	split = bio_clone_fast(bio, gfp, bs);

	if (!split)
		return NULL;

+2 −32
Original line number Diff line number Diff line
@@ -1475,38 +1475,6 @@ void blk_put_request(struct request *req)
}
EXPORT_SYMBOL(blk_put_request);

/**
 * blk_add_request_payload - add a payload to a request
 * @rq: request to update
 * @page: page backing the payload
 * @offset: offset in page
 * @len: length of the payload.
 *
 * This allows to later add a payload to an already submitted request by
 * a block driver.  The driver needs to take care of freeing the payload
 * itself.
 *
 * Note that this is a quite horrible hack and nothing but handling of
 * discard requests should ever use it.
 */
void blk_add_request_payload(struct request *rq, struct page *page,
		int offset, unsigned int len)
{
	struct bio *bio = rq->bio;

	bio->bi_io_vec->bv_page = page;
	bio->bi_io_vec->bv_offset = offset;
	bio->bi_io_vec->bv_len = len;

	bio->bi_iter.bi_size = len;
	bio->bi_vcnt = 1;
	bio->bi_phys_segments = 1;

	rq->__data_len = rq->resid_len = len;
	rq->nr_phys_segments = 1;
}
EXPORT_SYMBOL_GPL(blk_add_request_payload);

bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
			    struct bio *bio)
{
@@ -2642,6 +2610,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
		return false;
	}

	WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);

	req->__data_len -= total_bytes;

	/* update sector only for requests with clear definition of sector */
+1 −1
Original line number Diff line number Diff line
@@ -80,7 +80,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
			req_sects = end_sect - sector;
		}

		bio = next_bio(bio, 1, gfp_mask);
		bio = next_bio(bio, 0, gfp_mask);
		bio->bi_iter.bi_sector = sector;
		bio->bi_bdev = bdev;
		bio_set_op_attrs(bio, op, 0);
+17 −36
Original line number Diff line number Diff line
@@ -241,18 +241,13 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
	if (!bio)
		return 0;

	/*
	 * This should probably be returning 0, but blk_add_request_payload()
	 * (Christoph!!!!)
	 */
	switch (bio_op(bio)) {
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
	case REQ_OP_WRITE_SAME:
	case REQ_OP_WRITE_ZEROES:
		return 0;
	case REQ_OP_WRITE_SAME:
		return 1;
	default:
		break;
	}

	fbio = bio;
@@ -410,39 +405,21 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
	*bvprv = *bvec;
}

static inline int __blk_bvec_map_sg(struct request_queue *q, struct bio_vec bv,
		struct scatterlist *sglist, struct scatterlist **sg)
{
	*sg = sglist;
	sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
	return 1;
}

static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
			     struct scatterlist *sglist,
			     struct scatterlist **sg)
{
	struct bio_vec bvec, bvprv = { NULL };
	struct bvec_iter iter;
	int nsegs, cluster;

	nsegs = 0;
	cluster = blk_queue_cluster(q);

	switch (bio_op(bio)) {
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
	case REQ_OP_WRITE_ZEROES:
		/*
		 * This is a hack - drivers should be neither modifying the
		 * biovec, nor relying on bi_vcnt - but because of
		 * blk_add_request_payload(), a discard bio may or may not have
		 * a payload we need to set up here (thank you Christoph) and
		 * bi_vcnt is really the only way of telling if we need to.
		 */
		if (!bio->bi_vcnt)
			return 0;
		/* Fall through */
	case REQ_OP_WRITE_SAME:
		*sg = sglist;
		bvec = bio_iovec(bio);
		sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
		return 1;
	default:
		break;
	}
	int cluster = blk_queue_cluster(q), nsegs = 0;

	for_each_bio(bio)
		bio_for_each_segment(bvec, bio, iter)
@@ -462,7 +439,11 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
	struct scatterlist *sg = NULL;
	int nsegs = 0;

	if (rq->bio)
	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
		nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg);
	else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
		nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg);
	else if (rq->bio)
		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);

	if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
@@ -495,7 +476,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
	 * Something must have been wrong if the figured number of
	 * segment is bigger than number of req's physical segments
	 */
	WARN_ON(nsegs > rq->nr_phys_segments);
	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));

	return nsegs;
}
+4 −13
Original line number Diff line number Diff line
@@ -239,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
		struct nvme_command *cmnd)
{
	struct nvme_dsm_range *range;
	struct page *page;
	int offset;
	unsigned int nr_bytes = blk_rq_bytes(req);

	range = kmalloc(sizeof(*range), GFP_ATOMIC);
@@ -257,17 +255,10 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
	cmnd->dsm.nr = 0;
	cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);

	req->completion_data = range;
	page = virt_to_page(range);
	offset = offset_in_page(range);
	blk_add_request_payload(req, page, offset, sizeof(*range));

	/*
	 * we set __data_len back to the size of the area to be discarded
	 * on disk. This allows us to report completion on the full amount
	 * of blocks described by the request.
	 */
	req->__data_len = nr_bytes;
	req->special_vec.bv_page = virt_to_page(range);
	req->special_vec.bv_offset = offset_in_page(range);
	req->special_vec.bv_len = sizeof(*range);
	req->rq_flags |= RQF_SPECIAL_PAYLOAD;

	return BLK_MQ_RQ_QUEUE_OK;
}
Loading