Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6cc77e9c authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: introduce zoned block devices zone write locking



Components relying only on the request_queue structure for accessing
block devices (e.g. I/O schedulers) have a limited knowledged of the
device characteristics. In particular, the device capacity cannot be
easily discovered, which for a zoned block device also result in the
inability to easily know the number of zones of the device (the zone
size is indicated by the chunk_sectors field of the queue limits).

Introduce the nr_zones field to the request_queue structure to simplify
access to this information. Also, add the bitmap seq_zone_bitmap which
indicates which zones of the device are sequential zones (write
preferred or write required) and the bitmap seq_zones_wlock which
indicates if a zone is write locked, that is, if a write request
targeting a zone was dispatched to the device. These fields are
initialized by the low level block device driver (sd.c for ZBC/ZAC
disks). They are not initialized by stacking drivers (device mappers)
handling zoned block devices (e.g. dm-linear).

Using this, I/O schedulers can introduce zone write locking to control
request dispatching to a zoned block device and avoid write request
reordering by limiting to at most a single write request per zone
outside of the scheduler at any time.

Based on previous patches from Damien Le Moal.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
[Damien]
* Fixed comments and identation in blkdev.h
* Changed helper functions
* Fixed this commit message
Signed-off-by: default avatarDamien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 882d4171
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -1641,6 +1641,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)


	lockdep_assert_held(q->queue_lock);
	lockdep_assert_held(q->queue_lock);


	blk_req_zone_write_unlock(req);
	blk_pm_put_request(req);
	blk_pm_put_request(req);


	elv_completed_request(q, req);
	elv_completed_request(q, req);
+42 −0
Original line number Original line Diff line number Diff line
@@ -21,6 +21,48 @@ static inline sector_t blk_zone_start(struct request_queue *q,
	return sector & ~zone_mask;
	return sector & ~zone_mask;
}
}


/*
 * Return true if a request is a write requests that needs zone write locking.
 */
bool blk_req_needs_zone_write_lock(struct request *rq)
{
	if (!rq->q->seq_zones_wlock)
		return false;

	if (blk_rq_is_passthrough(rq))
		return false;

	switch (req_op(rq)) {
	case REQ_OP_WRITE_ZEROES:
	case REQ_OP_WRITE_SAME:
	case REQ_OP_WRITE:
		return blk_rq_zone_is_seq(rq);
	default:
		return false;
	}
}
EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);

void __blk_req_zone_write_lock(struct request *rq)
{
	if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
					  rq->q->seq_zones_wlock)))
		return;

	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
	rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
}
EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);

void __blk_req_zone_write_unlock(struct request *rq)
{
	rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
	if (rq->q->seq_zones_wlock)
		WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
						 rq->q->seq_zones_wlock));
}
EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);

/*
/*
 * Check that a zone report belongs to the partition.
 * Check that a zone report belongs to the partition.
 * If yes, fix its start sector and write pointer, copy it in the
 * If yes, fix its start sector and write pointer, copy it in the
+111 −0
Original line number Original line Diff line number Diff line
@@ -121,6 +121,8 @@ typedef __u32 __bitwise req_flags_t;
/* Look at ->special_vec for the actual data payload instead of the
/* Look at ->special_vec for the actual data payload instead of the
   bio chain. */
   bio chain. */
#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
/* The per-zone write lock is held for this request */
#define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))


/* flags that prevent us from merging requests: */
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
#define RQF_NOMERGE_FLAGS \
@@ -546,6 +548,22 @@ struct request_queue {


	struct queue_limits	limits;
	struct queue_limits	limits;


	/*
	 * Zoned block device information for request dispatch control.
	 * nr_zones is the total number of zones of the device. This is always
	 * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
	 * bits which indicates if a zone is conventional (bit clear) or
	 * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
	 * bits which indicates if a zone is write locked, that is, if a write
	 * request targeting the zone was dispatched. All three fields are
	 * initialized by the low level device driver (e.g. scsi/sd.c).
	 * Stacking drivers (device mappers) may or may not initialize
	 * these fields.
	 */
	unsigned int		nr_zones;
	unsigned long		*seq_zones_bitmap;
	unsigned long		*seq_zones_wlock;

	/*
	/*
	 * sg stuff
	 * sg stuff
	 */
	 */
@@ -790,6 +808,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
}


static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
{
	return q->nr_zones;
}

static inline unsigned int blk_queue_zone_no(struct request_queue *q,
					     sector_t sector)
{
	if (!blk_queue_is_zoned(q))
		return 0;
	return sector >> ilog2(q->limits.chunk_sectors);
}

static inline bool blk_queue_zone_is_seq(struct request_queue *q,
					 sector_t sector)
{
	if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
		return false;
	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
}

static inline bool rq_is_sync(struct request *rq)
static inline bool rq_is_sync(struct request *rq)
{
{
	return op_is_sync(rq->cmd_flags);
	return op_is_sync(rq->cmd_flags);
@@ -1029,6 +1068,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
	return blk_rq_cur_bytes(rq) >> 9;
	return blk_rq_cur_bytes(rq) >> 9;
}
}


static inline unsigned int blk_rq_zone_no(struct request *rq)
{
	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
}

static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
{
	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
}

/*
/*
 * Some commands like WRITE SAME have a payload or data transfer size which
 * Some commands like WRITE SAME have a payload or data transfer size which
 * is different from the size of the request.  Any driver that supports such
 * is different from the size of the request.  Any driver that supports such
@@ -1578,7 +1627,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)


	if (q)
	if (q)
		return blk_queue_zone_sectors(q);
		return blk_queue_zone_sectors(q);
	return 0;
}


static inline unsigned int bdev_nr_zones(struct block_device *bdev)
{
	struct request_queue *q = bdev_get_queue(bdev);

	if (q)
		return blk_queue_nr_zones(q);
	return 0;
	return 0;
}
}


@@ -1954,6 +2011,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
						struct writeback_control *);
						struct writeback_control *);

#ifdef CONFIG_BLK_DEV_ZONED
bool blk_req_needs_zone_write_lock(struct request *rq);
void __blk_req_zone_write_lock(struct request *rq);
void __blk_req_zone_write_unlock(struct request *rq);

static inline void blk_req_zone_write_lock(struct request *rq)
{
	if (blk_req_needs_zone_write_lock(rq))
		__blk_req_zone_write_lock(rq);
}

static inline void blk_req_zone_write_unlock(struct request *rq)
{
	if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
		__blk_req_zone_write_unlock(rq);
}

static inline bool blk_req_zone_is_write_locked(struct request *rq)
{
	return rq->q->seq_zones_wlock &&
		test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
}

static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
{
	if (!blk_req_needs_zone_write_lock(rq))
		return true;
	return !blk_req_zone_is_write_locked(rq);
}
#else
static inline bool blk_req_needs_zone_write_lock(struct request *rq)
{
	return false;
}

static inline void blk_req_zone_write_lock(struct request *rq)
{
}

static inline void blk_req_zone_write_unlock(struct request *rq)
{
}
static inline bool blk_req_zone_is_write_locked(struct request *rq)
{
	return false;
}

static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
{
	return true;
}
#endif /* CONFIG_BLK_DEV_ZONED */

#else /* CONFIG_BLOCK */
#else /* CONFIG_BLOCK */


struct block_device;
struct block_device;