Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 56a83024 authored by Dmitry Fomichev's avatar Dmitry Fomichev Committed by Greg Kroah-Hartman
Browse files

dm zoned: reduce overhead of backing device checks



commit e7fad909b68aa37470d9f2d2731b5bec355ee5d6 upstream.

Commit 75d66ffb48efb3 added backing device health checks and as a part
of these checks, check_events() block ops template call is invoked in
dm-zoned mapping path as well as in reclaim and flush path. Calling
check_events() with ATA or SCSI backing devices introduces a blocking
scsi_test_unit_ready() call being made in sd_check_events(). Even though
the overhead of calling scsi_test_unit_ready() is small for ATA zoned
devices, it is much larger for SCSI and it affects performance in a very
negative way.

Fix this performance regression by executing check_events() only in case
of any I/O errors. The function dmz_bdev_is_dying() is modified to call
only blk_queue_dying(), while calls to check_events() are made in a new
helper function, dmz_check_bdev().

Reported-by: default avatarzhangxiaoxu <zhangxiaoxu5@huawei.com>
Fixes: 75d66ffb48efb3 ("dm zoned: properly handle backing device failure")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarDmitry Fomichev <dmitry.fomichev@wdc.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 10b9bf59
Loading
Loading
Loading
Loading
+19 −10
Original line number Diff line number Diff line
@@ -552,6 +552,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
		       TASK_UNINTERRUPTIBLE);
	if (test_bit(DMZ_META_ERROR, &mblk->state)) {
		dmz_release_mblock(zmd, mblk);
		dmz_check_bdev(zmd->dev);
		return ERR_PTR(-EIO);
	}

@@ -623,6 +624,8 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
	ret = submit_bio_wait(bio);
	bio_put(bio);

	if (ret)
		dmz_check_bdev(zmd->dev);
	return ret;
}

@@ -689,6 +692,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
			       TASK_UNINTERRUPTIBLE);
		if (test_bit(DMZ_META_ERROR, &mblk->state)) {
			clear_bit(DMZ_META_ERROR, &mblk->state);
			dmz_check_bdev(zmd->dev);
			ret = -EIO;
		}
		nr_mblks_submitted--;
@@ -766,7 +770,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
	/* If there are no dirty metadata blocks, just flush the device cache */
	if (list_empty(&write_list)) {
		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
		goto out;
		goto err;
	}

	/*
@@ -776,7 +780,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
	 */
	ret = dmz_log_dirty_mblocks(zmd, &write_list);
	if (ret)
		goto out;
		goto err;

	/*
	 * The log is on disk. It is now safe to update in place
@@ -784,11 +788,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
	 */
	ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary);
	if (ret)
		goto out;
		goto err;

	ret = dmz_write_sb(zmd, zmd->mblk_primary);
	if (ret)
		goto out;
		goto err;

	while (!list_empty(&write_list)) {
		mblk = list_first_entry(&write_list, struct dmz_mblock, link);
@@ -803,16 +807,20 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)

	zmd->sb_gen++;
out:
	if (ret && !list_empty(&write_list)) {
		spin_lock(&zmd->mblk_lock);
		list_splice(&write_list, &zmd->mblk_dirty_list);
		spin_unlock(&zmd->mblk_lock);
	}

	dmz_unlock_flush(zmd);
	up_write(&zmd->mblk_sem);

	return ret;

err:
	if (!list_empty(&write_list)) {
		spin_lock(&zmd->mblk_lock);
		list_splice(&write_list, &zmd->mblk_dirty_list);
		spin_unlock(&zmd->mblk_lock);
	}
	if (!dmz_check_bdev(zmd->dev))
		ret = -EIO;
	goto out;
}

/*
@@ -1235,6 +1243,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
	if (ret) {
		dmz_dev_err(zmd->dev, "Get zone %u report failed",
			    dmz_id(zmd, zone));
		dmz_check_bdev(zmd->dev);
		return ret;
	}

+2 −6
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
			    "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
			    dmz_id(zmd, zone), (unsigned long long)wp_block,
			    (unsigned long long)block, nr_blocks, ret);
		dmz_check_bdev(zrc->dev);
		return ret;
	}

@@ -488,12 +489,7 @@ static void dmz_reclaim_work(struct work_struct *work)
	ret = dmz_do_reclaim(zrc);
	if (ret) {
		dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
		if (ret == -EIO)
			/*
			 * LLD might be performing some error handling sequence
			 * at the underlying device. To not interfere, do not
			 * attempt to schedule the next reclaim run immediately.
			 */
		if (!dmz_check_bdev(zrc->dev))
			return;
	}

+38 −16
Original line number Diff line number Diff line
@@ -79,6 +79,8 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)

	if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
		bio->bi_status = status;
	if (bio->bi_status != BLK_STS_OK)
		bioctx->target->dev->flags |= DMZ_CHECK_BDEV;

	if (atomic_dec_and_test(&bioctx->ref)) {
		struct dm_zone *zone = bioctx->zone;
@@ -564,29 +566,49 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}

/*
 * Check the backing device availability. If it's on the way out,
 * Check if the backing device is being removed. If it's on the way out,
 * start failing I/O. Reclaim and metadata components also call this
 * function to cleanly abort operation in the event of such failure.
 */
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
{
	struct gendisk *disk;
	if (dmz_dev->flags & DMZ_BDEV_DYING)
		return true;

	if (dmz_dev->flags & DMZ_CHECK_BDEV)
		return !dmz_check_bdev(dmz_dev);

	if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
		disk = dmz_dev->bdev->bd_disk;
	if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
		dmz_dev_warn(dmz_dev, "Backing device queue dying");
		dmz_dev->flags |= DMZ_BDEV_DYING;
		} else if (disk->fops->check_events) {
			if (disk->fops->check_events(disk, 0) &
					DISK_EVENT_MEDIA_CHANGE) {
				dmz_dev_warn(dmz_dev, "Backing device offline");
				dmz_dev->flags |= DMZ_BDEV_DYING;
	}

	return dmz_dev->flags & DMZ_BDEV_DYING;
}

/*
 * Check the backing device availability. This detects such events as
 * backing device going offline due to errors, media removals, etc.
 * This check is less efficient than dmz_bdev_is_dying() and should
 * only be performed as a part of error handling.
 */
bool dmz_check_bdev(struct dmz_dev *dmz_dev)
{
	struct gendisk *disk;

	dmz_dev->flags &= ~DMZ_CHECK_BDEV;

	if (dmz_bdev_is_dying(dmz_dev))
		return false;

	disk = dmz_dev->bdev->bd_disk;
	if (disk->fops->check_events &&
	    disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
		dmz_dev_warn(dmz_dev, "Backing device offline");
		dmz_dev->flags |= DMZ_BDEV_DYING;
	}

	return dmz_dev->flags & DMZ_BDEV_DYING;
	return !(dmz_dev->flags & DMZ_BDEV_DYING);
}

/*
@@ -902,8 +924,8 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
	struct dmz_target *dmz = ti->private;

	if (dmz_bdev_is_dying(dmz->dev))
		return -ENODEV;
	if (!dmz_check_bdev(dmz->dev))
		return -EIO;

	*bdev = dmz->dev->bdev;

+2 −0
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@ struct dmz_dev {

/* Device flags. */
#define DMZ_BDEV_DYING		(1 << 0)
#define DMZ_CHECK_BDEV		(2 << 0)

/*
 * Zone descriptor.
@@ -254,5 +255,6 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
 * Functions defined in dm-zoned-target.c
 */
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
bool dmz_check_bdev(struct dmz_dev *dmz_dev);

#endif /* DM_ZONED_H */