Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb2dce86 authored by David Woodhouse's avatar David Woodhouse Committed by Jens Axboe
Browse files

Add 'discard' request handling



Some block devices benefit from a hint that they can forget the contents
of certain sectors. Add basic support for this to the block core, along
with a 'blkdev_issue_discard()' helper function which issues such
requests.

The caller doesn't get to provide an end_io functio, since
blkdev_issue_discard() will automatically split the request up into
multiple bios if appropriate. Neither does the function wait for
completion -- it's expected that callers won't care about when, or even
_if_, the request completes. It's only a hint to the device anyway. By
definition, the file system doesn't _care_ about these sectors any more.

[With feedback from OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> and
Jens Axboe <jens.axboe@oracle.com]

Signed-off-by: default avatarDavid Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent d628eaef
Loading
Loading
Loading
Loading
+69 −0
Original line number Diff line number Diff line
@@ -315,3 +315,72 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
	return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);

static void blkdev_discard_end_io(struct bio *bio, int err)
{
	if (err) {
		if (err == -EOPNOTSUPP)
			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
		clear_bit(BIO_UPTODATE, &bio->bi_flags);
	}

	bio_put(bio);
}

/**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 *
 * Description:
 *    Issue a discard request for the sectors in question. Does not wait.
 */
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
			 unsigned nr_sects)
{
	struct request_queue *q;
	struct bio *bio;
	int ret = 0;

	if (bdev->bd_disk == NULL)
		return -ENXIO;

	q = bdev_get_queue(bdev);
	if (!q)
		return -ENXIO;

	if (!q->prepare_discard_fn)
		return -EOPNOTSUPP;

	while (nr_sects && !ret) {
		bio = bio_alloc(GFP_KERNEL, 0);
		if (!bio)
			return -ENOMEM;

		bio->bi_end_io = blkdev_discard_end_io;
		bio->bi_bdev = bdev;

		bio->bi_sector = sector;

		if (nr_sects > q->max_hw_sectors) {
			bio->bi_size = q->max_hw_sectors << 9;
			nr_sects -= q->max_hw_sectors;
			sector += q->max_hw_sectors;
		} else {
			bio->bi_size = nr_sects << 9;
			nr_sects = 0;
		}
		bio_get(bio);
		submit_bio(WRITE_DISCARD, bio);

		/* Check if it failed immediately */
		if (bio_flagged(bio, BIO_EOPNOTSUPP))
			ret = -EOPNOTSUPP;
		else if (!bio_flagged(bio, BIO_UPTODATE))
			ret = -EIO;
		bio_put(bio);
	}
	return ret;
}
EXPORT_SYMBOL(blkdev_issue_discard);
+20 −8
Original line number Diff line number Diff line
@@ -1079,6 +1079,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
	 */
	if (unlikely(bio_barrier(bio)))
		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
	if (unlikely(bio_discard(bio))) {
		req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD);
		req->q->prepare_discard_fn(req->q, req);
	}

	if (bio_sync(bio))
		req->cmd_flags |= REQ_RW_SYNC;
@@ -1095,7 +1099,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
static int __make_request(struct request_queue *q, struct bio *bio)
{
	struct request *req;
	int el_ret, nr_sectors, barrier, err;
	int el_ret, nr_sectors, barrier, discard, err;
	const unsigned short prio = bio_prio(bio);
	const int sync = bio_sync(bio);
	int rw_flags;
@@ -1115,6 +1119,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
		goto end_io;
	}

	discard = bio_discard(bio);
	if (unlikely(discard) && !q->prepare_discard_fn) {
		err = -EOPNOTSUPP;
		goto end_io;
	}

	spin_lock_irq(q->queue_lock);

	if (unlikely(barrier) || elv_queue_empty(q))
@@ -1405,7 +1415,8 @@ static inline void __generic_make_request(struct bio *bio)

		if (bio_check_eod(bio, nr_sectors))
			goto end_io;
		if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
		if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
		    (bio_discard(bio) && !q->prepare_discard_fn)) {
			err = -EOPNOTSUPP;
			goto end_io;
		}
@@ -1487,7 +1498,6 @@ void submit_bio(int rw, struct bio *bio)
	 * go through the normal accounting stuff before submission.
	 */
	if (bio_has_data(bio)) {

		if (rw & WRITE) {
			count_vm_events(PGPGOUT, count);
		} else {
@@ -1881,7 +1891,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
	struct request_queue *q = rq->q;
	unsigned long flags = 0UL;

	if (bio_has_data(rq->bio)) {
	if (bio_has_data(rq->bio) || blk_discard_rq(rq)) {
		if (__end_that_request_first(rq, error, nr_bytes))
			return 1;

@@ -1939,7 +1949,7 @@ EXPORT_SYMBOL_GPL(blk_end_request);
 **/
int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
{
	if (bio_has_data(rq->bio) &&
	if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) &&
	    __end_that_request_first(rq, error, nr_bytes))
		return 1;

@@ -2012,12 +2022,14 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
	   we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
	rq->cmd_flags |= (bio->bi_rw & 3);

	if (bio_has_data(bio)) {
		rq->nr_phys_segments = bio_phys_segments(q, bio);
		rq->nr_hw_segments = bio_hw_segments(q, bio);
		rq->buffer = bio_data(bio);
	}
	rq->current_nr_sectors = bio_cur_sectors(bio);
	rq->hard_cur_sectors = rq->current_nr_sectors;
	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
	rq->buffer = bio_data(bio);
	rq->data_len = bio->bi_size;

	rq->bio = rq->biotail = bio;
+17 −0
Original line number Diff line number Diff line
@@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
}
EXPORT_SYMBOL(blk_queue_prep_rq);

/**
 * blk_queue_set_discard - set a discard_sectors function for queue
 * @q:		queue
 * @dfn:	prepare_discard function
 *
 * It's possible for a queue to register a discard callback which is used
 * to transform a discard request into the appropriate type for the
 * hardware. If none is registered, then discard requests are failed
 * with %EOPNOTSUPP.
 *
 */
void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
{
	q->prepare_discard_fn = dfn;
}
EXPORT_SYMBOL(blk_queue_set_discard);

/**
 * blk_queue_merge_bvec - set a merge_bvec function for queue
 * @q:		queue
+6 −2
Original line number Diff line number Diff line
@@ -149,6 +149,8 @@ struct bio {
 * bit 2 -- barrier
 * bit 3 -- fail fast, don't want low level driver retries
 * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
 * bit 5 -- metadata request
 * bit 6 -- discard sectors
 */
#define BIO_RW		0	/* Must match RW in req flags (blkdev.h) */
#define BIO_RW_AHEAD	1	/* Must match FAILFAST in req flags */
@@ -156,6 +158,7 @@ struct bio {
#define BIO_RW_FAILFAST	3
#define BIO_RW_SYNC	4
#define BIO_RW_META	5
#define BIO_RW_DISCARD	6

/*
 * upper 16 bits of bi_rw define the io priority of this bio
@@ -186,13 +189,14 @@ struct bio {
#define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
#define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
#define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio))
#define bio_discard(bio)	((bio)->bi_rw & (1 << BIO_RW_DISCARD))

static inline unsigned int bio_cur_sectors(struct bio *bio)
{
	if (bio->bi_vcnt)
		return bio_iovec(bio)->bv_len >> 9;

	return 0;
	else /* dataless requests such as discard */
		return bio->bi_size >> 9;
}

static inline void *bio_data(struct bio *bio)
+16 −0
Original line number Diff line number Diff line
@@ -89,6 +89,7 @@ enum {
enum rq_flag_bits {
	__REQ_RW,		/* not set, read. set, write */
	__REQ_FAILFAST,		/* no low level driver retries */
	__REQ_DISCARD,		/* request to discard sectors */
	__REQ_SORTED,		/* elevator knows about this request */
	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
	__REQ_HARDBARRIER,	/* may not be passed by drive either */
@@ -111,6 +112,7 @@ enum rq_flag_bits {
};

#define REQ_RW		(1 << __REQ_RW)
#define REQ_DISCARD	(1 << __REQ_DISCARD)
#define REQ_FAILFAST	(1 << __REQ_FAILFAST)
#define REQ_SORTED	(1 << __REQ_SORTED)
#define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
@@ -252,6 +254,7 @@ typedef void (request_fn_proc) (struct request_queue *q);
typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
typedef void (unplug_fn) (struct request_queue *);
typedef int (prepare_discard_fn) (struct request_queue *, struct request *);

struct bio_vec;
struct bvec_merge_data {
@@ -307,6 +310,7 @@ struct request_queue
	make_request_fn		*make_request_fn;
	prep_rq_fn		*prep_rq_fn;
	unplug_fn		*unplug_fn;
	prepare_discard_fn	*prepare_discard_fn;
	merge_bvec_fn		*merge_bvec_fn;
	prepare_flush_fn	*prepare_flush_fn;
	softirq_done_fn		*softirq_done_fn;
@@ -546,6 +550,7 @@ enum {
#define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
#define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
#define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
#define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
#define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
#define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
/* rq->queuelist of dequeued request must be list_empty() */
@@ -796,6 +801,7 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
extern int blk_do_ordered(struct request_queue *, struct request **);
@@ -837,6 +843,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
}

extern int blkdev_issue_flush(struct block_device *, sector_t *);
extern int blkdev_issue_discard(struct block_device *, sector_t sector,
				unsigned nr_sects);

static inline int sb_issue_discard(struct super_block *sb,
				   sector_t block, unsigned nr_blocks)
{
	block <<= (sb->s_blocksize_bits - 9);
	nr_blocks <<= (sb->s_blocksize_bits - 9);
	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks);
}

/*
* command filter functions
Loading