Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ccce20fc authored by Bart Van Assche's avatar Bart Van Assche Committed by Martin K. Petersen
Browse files

scsi: sd_zbc: Avoid that resetting a zone fails sporadically



Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is
called from sd_probe_async() and since sd_revalidate_disk() calls
sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
concurrently with blkdev_report_zones() and/or blkdev_reset_zones().  That can
cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets
q->nr_zones to zero before restoring it to the actual value, even if no drive
characteristics have changed.  Avoid that this can happen by making the
following changes:

- Protect the code that updates zone information with blk_queue_enter()
  and blk_queue_exit().
- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
  these functions do not modify struct scsi_disk before all zone
  information has been obtained.

Note: since commit 055f6e18 ("block: Make q_usage_counter also track
legacy requests"; kernel v4.15) the request queue freezing mechanism also
affects legacy request queues.

Fixes: 89d94756 ("sd: Implement support for ZBC devices")
Signed-off-by: default avatarBart Van Assche <bart.vanassche@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: stable@vger.kernel.org # v4.16
Reviewed-by: default avatarDamien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 505aa4b6
Loading
Loading
Loading
Loading
+82 −58
Original line number Original line Diff line number Diff line
@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
 *
 *
 * Check that all zones of the device are equal. The last zone can however
 * Check that all zones of the device are equal. The last zone can however
 * be smaller. The zone size must also be a power of two number of LBAs.
 * be smaller. The zone size must also be a power of two number of LBAs.
 *
 * Returns the zone size in bytes upon success or an error code upon failure.
 */
 */
static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
{
{
	u64 zone_blocks = 0;
	u64 zone_blocks = 0;
	sector_t block = 0;
	sector_t block = 0;
@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
	int ret;
	int ret;
	u8 same;
	u8 same;


	sdkp->zone_blocks = 0;

	/* Get a buffer */
	/* Get a buffer */
	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
	if (!buf)
	if (!buf)
@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)


		/* Parse zone descriptors */
		/* Parse zone descriptors */
		while (rec < buf + buf_len) {
		while (rec < buf + buf_len) {
			zone_blocks = get_unaligned_be64(&rec[8]);
			u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
			if (sdkp->zone_blocks == 0) {

				sdkp->zone_blocks = zone_blocks;
			if (zone_blocks == 0) {
			} else if (zone_blocks != sdkp->zone_blocks &&
				zone_blocks = this_zone_blocks;
				   (block + zone_blocks < sdkp->capacity
			} else if (this_zone_blocks != zone_blocks &&
				    || zone_blocks > sdkp->zone_blocks)) {
				   (block + this_zone_blocks < sdkp->capacity
				zone_blocks = 0;
				    || this_zone_blocks > zone_blocks)) {
				this_zone_blocks = 0;
				goto out;
				goto out;
			}
			}
			block += zone_blocks;
			block += this_zone_blocks;
			rec += 64;
			rec += 64;
		}
		}


@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)


	} while (block < sdkp->capacity);
	} while (block < sdkp->capacity);


	zone_blocks = sdkp->zone_blocks;

out:
out:
	if (!zone_blocks) {
	if (!zone_blocks) {
		if (sdkp->first_scan)
		if (sdkp->first_scan)
@@ -488,8 +487,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
				  "Zone size too large\n");
				  "Zone size too large\n");
		ret = -ENODEV;
		ret = -ENODEV;
	} else {
	} else {
		sdkp->zone_blocks = zone_blocks;
		ret = zone_blocks;
		sdkp->zone_shift = ilog2(zone_blocks);
	}
	}


out_free:
out_free:
@@ -500,15 +498,14 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)


/**
/**
 * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
 * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
 * @sdkp: The disk of the bitmap
 * @nr_zones: Number of zones to allocate space for.
 * @numa_node: NUMA node to allocate the memory from.
 */
 */
static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
static inline unsigned long *
sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
{
{
	struct request_queue *q = sdkp->disk->queue;
	return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),

			    GFP_KERNEL, numa_node);
	return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
			    * sizeof(unsigned long),
			    GFP_KERNEL, q->node);
}
}


/**
/**
@@ -516,6 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
 * @sdkp: disk used
 * @sdkp: disk used
 * @buf: report reply buffer
 * @buf: report reply buffer
 * @buflen: length of @buf
 * @buflen: length of @buf
 * @zone_shift: logarithm base 2 of the number of blocks in a zone
 * @seq_zones_bitmap: bitmap of sequential zones to set
 * @seq_zones_bitmap: bitmap of sequential zones to set
 *
 *
 * Parse reported zone descriptors in @buf to identify sequential zones and
 * Parse reported zone descriptors in @buf to identify sequential zones and
@@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
 * Return the LBA after the last zone reported.
 * Return the LBA after the last zone reported.
 */
 */
static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
				     unsigned int buflen,
				     unsigned int buflen, u32 zone_shift,
				     unsigned long *seq_zones_bitmap)
				     unsigned long *seq_zones_bitmap)
{
{
	sector_t lba, next_lba = sdkp->capacity;
	sector_t lba, next_lba = sdkp->capacity;
@@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
		if (type != ZBC_ZONE_TYPE_CONV &&
		if (type != ZBC_ZONE_TYPE_CONV &&
		    cond != ZBC_ZONE_COND_READONLY &&
		    cond != ZBC_ZONE_COND_READONLY &&
		    cond != ZBC_ZONE_COND_OFFLINE)
		    cond != ZBC_ZONE_COND_OFFLINE)
			set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
			set_bit(lba >> zone_shift, seq_zones_bitmap);
		next_lba = lba + get_unaligned_be64(&rec[8]);
		next_lba = lba + get_unaligned_be64(&rec[8]);
		rec += 64;
		rec += 64;
	}
	}
@@ -553,12 +551,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
}
}


/**
/**
 * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
 * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
 * @sdkp: target disk
 * @sdkp: target disk
 * @zone_shift: logarithm base 2 of the number of blocks in a zone
 * @nr_zones: number of zones to set up a seq zone bitmap for
 *
 *
 * Allocate a zone bitmap and initialize it by identifying sequential zones.
 * Allocate a zone bitmap and initialize it by identifying sequential zones.
 */
 */
static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
static unsigned long *
sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
			      u32 nr_zones)
{
{
	struct request_queue *q = sdkp->disk->queue;
	struct request_queue *q = sdkp->disk->queue;
	unsigned long *seq_zones_bitmap;
	unsigned long *seq_zones_bitmap;
@@ -566,9 +568,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
	unsigned char *buf;
	unsigned char *buf;
	int ret = -ENOMEM;
	int ret = -ENOMEM;


	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
	if (!seq_zones_bitmap)
	if (!seq_zones_bitmap)
		return -ENOMEM;
		return ERR_PTR(-ENOMEM);


	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
	if (!buf)
	if (!buf)
@@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
		if (ret)
		if (ret)
			goto out;
			goto out;
		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
					   seq_zones_bitmap);
					   zone_shift, seq_zones_bitmap);
	}
	}


	if (lba != sdkp->capacity) {
	if (lba != sdkp->capacity) {
@@ -591,12 +593,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
	kfree(buf);
	kfree(buf);
	if (ret) {
	if (ret) {
		kfree(seq_zones_bitmap);
		kfree(seq_zones_bitmap);
		return ret;
		return ERR_PTR(ret);
	}
	}

	return seq_zones_bitmap;
	q->seq_zones_bitmap = seq_zones_bitmap;

	return 0;
}
}


static void sd_zbc_cleanup(struct scsi_disk *sdkp)
static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
	q->nr_zones = 0;
	q->nr_zones = 0;
}
}


static int sd_zbc_setup(struct scsi_disk *sdkp)
static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
{
{
	struct request_queue *q = sdkp->disk->queue;
	struct request_queue *q = sdkp->disk->queue;
	u32 zone_shift = ilog2(zone_blocks);
	u32 nr_zones;
	int ret;
	int ret;


	/* READ16/WRITE16 is mandatory for ZBC disks */
	sdkp->device->use_16_for_rw = 1;
	sdkp->device->use_10_for_rw = 0;

	/* chunk_sectors indicates the zone size */
	/* chunk_sectors indicates the zone size */
	blk_queue_chunk_sectors(sdkp->disk->queue,
	blk_queue_chunk_sectors(q,
			logical_to_sectors(sdkp->device, sdkp->zone_blocks));
			logical_to_sectors(sdkp->device, zone_blocks));
	sdkp->nr_zones =
	nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
		round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;


	/*
	/*
	 * Initialize the device request queue information if the number
	 * Initialize the device request queue information if the number
	 * of zones changed.
	 * of zones changed.
	 */
	 */
	if (sdkp->nr_zones != q->nr_zones) {
	if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {

		unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
		sd_zbc_cleanup(sdkp);
		size_t zone_bitmap_size;


		q->nr_zones = sdkp->nr_zones;
		if (nr_zones) {
		if (sdkp->nr_zones) {
			seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
			q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
								   q->node);
			if (!q->seq_zones_wlock) {
			if (!seq_zones_wlock) {
				ret = -ENOMEM;
				ret = -ENOMEM;
				goto err;
				goto err;
			}
			}


			ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
			seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
			if (ret) {
							zone_shift, nr_zones);
				sd_zbc_cleanup(sdkp);
			if (IS_ERR(seq_zones_bitmap)) {
				ret = PTR_ERR(seq_zones_bitmap);
				kfree(seq_zones_wlock);
				goto err;
				goto err;
			}
			}
		}
		}
		zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
			sizeof(unsigned long);
		blk_mq_freeze_queue(q);
		if (q->nr_zones != nr_zones) {
			/* READ16/WRITE16 is mandatory for ZBC disks */
			sdkp->device->use_16_for_rw = 1;
			sdkp->device->use_10_for_rw = 0;


			sdkp->zone_blocks = zone_blocks;
			sdkp->zone_shift = zone_shift;
			sdkp->nr_zones = nr_zones;
			q->nr_zones = nr_zones;
			swap(q->seq_zones_wlock, seq_zones_wlock);
			swap(q->seq_zones_bitmap, seq_zones_bitmap);
		} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
				  zone_bitmap_size) != 0) {
			memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
			       zone_bitmap_size);
		}
		blk_mq_unfreeze_queue(q);
		kfree(seq_zones_wlock);
		kfree(seq_zones_bitmap);
	}
	}


	return 0;
	return 0;
@@ -661,6 +680,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp)


int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
{
{
	int64_t zone_blocks;
	int ret;
	int ret;


	if (!sd_is_zoned(sdkp))
	if (!sd_is_zoned(sdkp))
@@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
	 * Check zone size: only devices with a constant zone size (except
	 * Check zone size: only devices with a constant zone size (except
	 * an eventual last runt zone) that is a power of 2 are supported.
	 * an eventual last runt zone) that is a power of 2 are supported.
	 */
	 */
	ret = sd_zbc_check_zone_size(sdkp);
	zone_blocks = sd_zbc_check_zone_size(sdkp);
	if (ret)
	ret = -EFBIG;
	if (zone_blocks != (u32)zone_blocks)
		goto err;
	ret = zone_blocks;
	if (ret < 0)
		goto err;
		goto err;


	/* The drive satisfies the kernel restrictions: set it up */
	/* The drive satisfies the kernel restrictions: set it up */
	ret = sd_zbc_setup(sdkp);
	ret = sd_zbc_setup(sdkp, zone_blocks);
	if (ret)
	if (ret)
		goto err;
		goto err;


+5 −0
Original line number Original line Diff line number Diff line
@@ -605,6 +605,11 @@ struct request_queue {
	 * initialized by the low level device driver (e.g. scsi/sd.c).
	 * initialized by the low level device driver (e.g. scsi/sd.c).
	 * Stacking drivers (device mappers) may or may not initialize
	 * Stacking drivers (device mappers) may or may not initialize
	 * these fields.
	 * these fields.
	 *
	 * Reads of this information must be protected with blk_queue_enter() /
	 * blk_queue_exit(). Modifying this information is only allowed while
	 * no requests are being processed. See also blk_mq_freeze_queue() and
	 * blk_mq_unfreeze_queue().
	 */
	 */
	unsigned int		nr_zones;
	unsigned int		nr_zones;
	unsigned long		*seq_zones_bitmap;
	unsigned long		*seq_zones_bitmap;