Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4363ac7c authored by Martin K. Petersen's avatar Martin K. Petersen Committed by Jens Axboe
Browse files

block: Implement support for WRITE SAME



The WRITE SAME command supported on some SCSI devices allows the same
block to be efficiently replicated throughout a block range. Only a
single logical block is transferred from the host and the storage device
writes the same data to all blocks described by the I/O.

This patch implements support for WRITE SAME in the block layer. The
blkdev_issue_write_same() function can be used by filesystems and block
drivers to replicate a buffer across a block range. This can be used to
efficiently initialize software RAID devices, etc.

Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Acked-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f31dc1cd
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -206,3 +206,17 @@ Description:
		when a discarded area is read the discard_zeroes_data
		parameter will be set to one. Otherwise it will be 0 and
		the result of reading a discarded area is undefined.

What:		/sys/block/<disk>/queue/write_same_max_bytes
Date:		January 2012
Contact:	Martin K. Petersen <martin.petersen@oracle.com>
Description:
		Some devices support a write same operation in which a
		single data block can be written to a range of several
		contiguous blocks on storage. This can be used to wipe
		areas on disk or to initialize drives in a RAID
		configuration. write_same_max_bytes indicates how many
		bytes can be written in a single write same command. If
		write_same_max_bytes is 0, write same is not supported
		by the device.
+12 −2
Original line number Diff line number Diff line
@@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio)
		goto end_io;
	}

	if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
		err = -EOPNOTSUPP;
		goto end_io;
	}

	/*
	 * Various block parts want %current->io_context and lazy ioc
	 * allocation ends up trading a lot of pain for a small amount of
@@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request);
 */
void submit_bio(int rw, struct bio *bio)
{
	int count = bio_sectors(bio);

	bio->bi_rw |= rw;

	/*
@@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio)
	 * go through the normal accounting stuff before submission.
	 */
	if (bio_has_data(bio)) {
		unsigned int count;

		if (unlikely(rw & REQ_WRITE_SAME))
			count = bdev_logical_block_size(bio->bi_bdev) >> 9;
		else
			count = bio_sectors(bio);

		if (rw & WRITE) {
			count_vm_events(PGPGOUT, count);
		} else {
+74 −0
Original line number Diff line number Diff line
@@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);

/**
 * blkdev_issue_write_same - queue a write same operation
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data to write
 *
 * Description:
 *    Issue a write same request for the sectors in question.
 */
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
			    sector_t nr_sects, gfp_t gfp_mask,
			    struct page *page)
{
	DECLARE_COMPLETION_ONSTACK(wait);
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_write_same_sectors;
	struct bio_batch bb;
	struct bio *bio;
	int ret = 0;

	if (!q)
		return -ENXIO;

	max_write_same_sectors = q->limits.max_write_same_sectors;

	if (max_write_same_sectors == 0)
		return -EOPNOTSUPP;

	atomic_set(&bb.done, 1);
	bb.flags = 1 << BIO_UPTODATE;
	bb.wait = &wait;

	while (nr_sects) {
		bio = bio_alloc(gfp_mask, 1);
		if (!bio) {
			ret = -ENOMEM;
			break;
		}

		bio->bi_sector = sector;
		bio->bi_end_io = bio_batch_end_io;
		bio->bi_bdev = bdev;
		bio->bi_private = &bb;
		bio->bi_vcnt = 1;
		bio->bi_io_vec->bv_page = page;
		bio->bi_io_vec->bv_offset = 0;
		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);

		if (nr_sects > max_write_same_sectors) {
			bio->bi_size = max_write_same_sectors << 9;
			nr_sects -= max_write_same_sectors;
			sector += max_write_same_sectors;
		} else {
			bio->bi_size = nr_sects << 9;
			nr_sects = 0;
		}

		atomic_inc(&bb.done);
		submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
	}

	/* Wait for bios in-flight */
	if (!atomic_dec_and_test(&bb.done))
		wait_for_completion(&wait);

	if (!test_bit(BIO_UPTODATE, &bb.flags))
		ret = -ENOTSUPP;

	return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);

/**
 * blkdev_issue_zeroout - generate number of zero filed write bios
 * @bdev:	blockdev to issue
+9 −0
Original line number Diff line number Diff line
@@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
	    || next->special)
		return 0;

	if (req->cmd_flags & REQ_WRITE_SAME &&
	    !blk_write_same_mergeable(req->bio, next->bio))
		return 0;

	/*
	 * If we are allowed to merge, then append bio list
	 * from next to rq and release next. merge_requests_fn
@@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
	if (bio_integrity(bio) != blk_integrity_rq(rq))
		return false;

	/* must be using the same buffer */
	if (rq->cmd_flags & REQ_WRITE_SAME &&
	    !blk_write_same_mergeable(rq->bio, bio))
		return false;

	return true;
}

+16 −0
Original line number Diff line number Diff line
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
	lim->max_write_same_sectors = 0;
	lim->max_discard_sectors = 0;
	lim->discard_granularity = 0;
	lim->discard_alignment = 0;
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
	lim->max_segments = USHRT_MAX;
	lim->max_hw_sectors = UINT_MAX;
	lim->max_sectors = UINT_MAX;
	lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);

@@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);

/**
 * blk_queue_max_write_same_sectors - set max sectors for a single write same
 * @q:  the request queue for the device
 * @max_write_same_sectors: maximum number of sectors to write per command
 **/
void blk_queue_max_write_same_sectors(struct request_queue *q,
				      unsigned int max_write_same_sectors)
{
	q->limits.max_write_same_sectors = max_write_same_sectors;
}
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);

/**
 * blk_queue_max_segments - set max hw segments for a request for this queue
 * @q:  the request queue for the device
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,

	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
	t->max_write_same_sectors = min(t->max_write_same_sectors,
					b->max_write_same_sectors);
	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);

	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
Loading