Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 130568d5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
 "This is a followup for block changes, that didn't make the initial
  pull request. It's a bit of a mixed bag, this contains:

   - A followup pull request from Sagi for NVMe. Outside of fixups for
     NVMe, it also includes a series for ensuring that we properly
     quiesce hardware queues when browsing live tags.

   - Set of integrity fixes from Dmitry (mostly), fixing various issues
     for folks using DIF/DIX.

   - Fix for a bug introduced in cciss, with the req init changes. From
     Christoph.

   - Fix for a bug in BFQ, from Paolo.

   - Two followup fixes for lightnvm/pblk from Javier.

   - Depth fix from Ming for blk-mq-sched.

   - Also from Ming, performance fix for mtip32xx that was introduced
     with the dynamic initialization of commands"

* 'for-linus' of git://git.kernel.dk/linux-block: (44 commits)
  block: call bio_uninit in bio_endio
  nvmet: avoid unneeded assignment of submit_bio return value
  nvme-pci: add module parameter for io queue depth
  nvme-pci: compile warnings in nvme_alloc_host_mem()
  nvmet_fc: Accept variable pad lengths on Create Association LS
  nvme_fc/nvmet_fc: revise Create Association descriptor length
  lightnvm: pblk: remove unnecessary checks
  lightnvm: pblk: control I/O flow also on tear down
  cciss: initialize struct scsi_req
  null_blk: fix error flow for shared tags during module_init
  block: Fix __blkdev_issue_zeroout loop
  nvme-rdma: unconditionally recycle the request mr
  nvme: split nvme_uninit_ctrl into stop and uninit
  virtio_blk: quiesce/unquiesce live IO when entering PM states
  mtip32xx: quiesce request queues to make sure no submissions are inflight
  nbd: quiesce request queues to make sure no submissions are inflight
  nvme: kick requeue list when requeueing a request instead of when starting the queues
  nvme-pci: quiesce/unquiesce admin_q instead of start/stop its hw queues
  nvme-loop: quiesce/unquiesce admin_q instead of start/stop its hw queues
  nvme-fc: quiesce/unquiesce admin_q instead of start/stop its hw queues
  ...
parents 908b852d b222dd2f
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -192,7 +192,7 @@ will require extra work due to the application tag.
    supported by the block device.


    int bio_integrity_prep(bio);
    bool bio_integrity_prep(bio);

      To generate IMD for WRITE and to set up buffers for READ, the
      filesystem must call bio_integrity_prep(bio).
@@ -201,9 +201,7 @@ will require extra work due to the application tag.
      sector must be set, and the bio should have all data pages
      added.  It is up to the caller to ensure that the bio does not
      change while I/O is in progress.

      bio_integrity_prep() should only be called if
      bio_integrity_enabled() returned 1.
      Complete bio with error if prepare failed for some reson.


5.3 PASSING EXISTING INTEGRITY METADATA
+10 −4
Original line number Diff line number Diff line
@@ -3483,11 +3483,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
			}
		}
	}
	/* Update weight both if it must be raised and if it must be lowered */
	/*
	 * To improve latency (for this or other queues), immediately
	 * update weight both if it must be raised and if it must be
	 * lowered. Since, entity may be on some active tree here, and
	 * might have a pending change of its ioprio class, invoke
	 * next function with the last parameter unset (see the
	 * comments on the function).
	 */
	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
		__bfq_entity_update_weight_prio(
			bfq_entity_service_tree(entity),
			entity);
		__bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
						entity, false);
}

/*
+2 −1
Original line number Diff line number Diff line
@@ -892,7 +892,8 @@ void bfq_put_idle_entity(struct bfq_service_tree *st,
			 struct bfq_entity *entity);
struct bfq_service_tree *
__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
				struct bfq_entity *entity);
				struct bfq_entity *entity,
				bool update_class_too);
void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
			  unsigned long time_ms);
+34 −5
Original line number Diff line number Diff line
@@ -694,10 +694,28 @@ struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
	return sched_data->service_tree + idx;
}


/*
 * Update weight and priority of entity. If update_class_too is true,
 * then update the ioprio_class of entity too.
 *
 * The reason why the update of ioprio_class is controlled through the
 * last parameter is as follows. Changing the ioprio class of an
 * entity implies changing the destination service trees for that
 * entity. If such a change occurred when the entity is already on one
 * of the service trees for its previous class, then the state of the
 * entity would become more complex: none of the new possible service
 * trees for the entity, according to bfq_entity_service_tree(), would
 * match any of the possible service trees on which the entity
 * is. Complex operations involving these trees, such as entity
 * activations and deactivations, should take into account this
 * additional complexity.  To avoid this issue, this function is
 * invoked with update_class_too unset in the points in the code where
 * entity may happen to be on some tree.
 */
struct bfq_service_tree *
__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
				struct bfq_entity *entity)
				struct bfq_entity *entity,
				bool update_class_too)
{
	struct bfq_service_tree *new_st = old_st;

@@ -739,8 +757,14 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
				  bfq_weight_to_ioprio(entity->orig_weight);
		}

		if (bfqq)
		if (bfqq && update_class_too)
			bfqq->ioprio_class = bfqq->new_ioprio_class;

		/*
		 * Reset prio_changed only if the ioprio_class change
		 * is not pending any longer.
		 */
		if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
			entity->prio_changed = 0;

		/*
@@ -867,7 +891,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
{
	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

	st = __bfq_entity_update_weight_prio(st, entity);
	/*
	 * When this function is invoked, entity is not in any service
	 * tree, then it is safe to invoke next function with the last
	 * parameter set (see the comments on the function).
	 */
	st = __bfq_entity_update_weight_prio(st, entity, true);
	bfq_calc_finish(entity, entity->budget);

	/*
+74 −91
Original line number Diff line number Diff line
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
 * Description: Used to free the integrity portion of a bio. Usually
 * called from bio_free().
 */
void bio_integrity_free(struct bio *bio)
static void bio_integrity_free(struct bio *bio)
{
	struct bio_integrity_payload *bip = bio_integrity(bio);
	struct bio_set *bs = bio->bi_pool;
@@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio)
	}

	bio->bi_integrity = NULL;
	bio->bi_opf &= ~REQ_INTEGRITY;
}
EXPORT_SYMBOL(bio_integrity_free);

/**
 * bio_integrity_add_page - Attach integrity metadata
@@ -159,44 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_integrity_add_page);

/**
 * bio_integrity_enabled - Check whether integrity can be passed
 * @bio:	bio to check
 *
 * Description: Determines whether bio_integrity_prep() can be called
 * on this bio or not.	bio data direction and target device must be
 * set prior to calling.  The functions honors the write_generate and
 * read_verify flags in sysfs.
 */
bool bio_integrity_enabled(struct bio *bio)
{
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);

	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
		return false;

	if (!bio_sectors(bio))
		return false;

	/* Already protected? */
	if (bio_integrity(bio))
		return false;

	if (bi == NULL)
		return false;

	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
	    (bi->flags & BLK_INTEGRITY_VERIFY))
		return true;

	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
	    (bi->flags & BLK_INTEGRITY_GENERATE))
		return true;

	return false;
}
EXPORT_SYMBOL(bio_integrity_enabled);

/**
 * bio_integrity_intervals - Return number of integrity intervals for a bio
 * @bi:		blk_integrity profile for device
@@ -222,10 +184,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
/**
 * bio_integrity_process - Process integrity metadata for a bio
 * @bio:	bio to generate/verify integrity metadata for
 * @proc_iter:  iterator to process
 * @proc_fn:	Pointer to the relevant processing function
 */
static blk_status_t bio_integrity_process(struct bio *bio,
				 integrity_processing_fn *proc_fn)
		struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
{
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
	struct blk_integrity_iter iter;
@@ -238,10 +201,10 @@ static blk_status_t bio_integrity_process(struct bio *bio,

	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
	iter.interval = 1 << bi->interval_exp;
	iter.seed = bip_get_seed(bip);
	iter.seed = proc_iter->bi_sector;
	iter.prot_buf = prot_buf;

	bio_for_each_segment(bv, bio, bviter) {
	__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
		void *kaddr = kmap_atomic(bv.bv_page);

		iter.data_buf = kaddr + bv.bv_offset;
@@ -262,14 +225,15 @@ static blk_status_t bio_integrity_process(struct bio *bio,
 * bio_integrity_prep - Prepare bio for integrity I/O
 * @bio:	bio to prepare
 *
 * Description: Allocates a buffer for integrity metadata, maps the
 * pages and attaches them to a bio.  The bio must have data
 * direction, target device and start sector set priot to calling.  In
 * the WRITE case, integrity metadata will be generated using the
 * block device's integrity function.  In the READ case, the buffer
 * Description:  Checks if the bio already has an integrity payload attached.
 * If it does, the payload has been generated by another kernel subsystem,
 * and we just pass it through. Otherwise allocates integrity payload.
 * The bio must have data direction, target device and start sector set priot
 * to calling.  In the WRITE case, integrity metadata will be generated using
 * the block device's integrity function.  In the READ case, the buffer
 * will be prepared for DMA and a suitable end_io handler set up.
 */
int bio_integrity_prep(struct bio *bio)
bool bio_integrity_prep(struct bio *bio)
{
	struct bio_integrity_payload *bip;
	struct blk_integrity *bi;
@@ -279,20 +243,41 @@ int bio_integrity_prep(struct bio *bio)
	unsigned int len, nr_pages;
	unsigned int bytes, offset, i;
	unsigned int intervals;
	blk_status_t status;

	bi = bdev_get_integrity(bio->bi_bdev);
	q = bdev_get_queue(bio->bi_bdev);
	BUG_ON(bi == NULL);
	BUG_ON(bio_integrity(bio));
	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
		return true;

	if (!bio_sectors(bio))
		return true;

	/* Already protected? */
	if (bio_integrity(bio))
		return true;

	if (bi == NULL)
		return true;

	if (bio_data_dir(bio) == READ) {
		if (!bi->profile->verify_fn ||
		    !(bi->flags & BLK_INTEGRITY_VERIFY))
			return true;
	} else {
		if (!bi->profile->generate_fn ||
		    !(bi->flags & BLK_INTEGRITY_GENERATE))
			return true;
	}
	intervals = bio_integrity_intervals(bi, bio_sectors(bio));

	/* Allocate kernel buffer for protection data */
	len = intervals * bi->tuple_size;
	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
	status = BLK_STS_RESOURCE;
	if (unlikely(buf == NULL)) {
		printk(KERN_ERR "could not allocate integrity buffer\n");
		return -ENOMEM;
		goto err_end_io;
	}

	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -304,7 +289,8 @@ int bio_integrity_prep(struct bio *bio)
	if (IS_ERR(bip)) {
		printk(KERN_ERR "could not allocate data integrity bioset\n");
		kfree(buf);
		return PTR_ERR(bip);
		status = BLK_STS_RESOURCE;
		goto err_end_io;
	}

	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -330,7 +316,7 @@ int bio_integrity_prep(struct bio *bio)
					     bytes, offset);

		if (ret == 0)
			return 0;
			return false;

		if (ret < bytes)
			break;
@@ -340,17 +326,18 @@ int bio_integrity_prep(struct bio *bio)
		offset = 0;
	}

	/* Install custom I/O completion handler if read verify is enabled */
	if (bio_data_dir(bio) == READ) {
		bip->bip_end_io = bio->bi_end_io;
		bio->bi_end_io = bio_integrity_endio;
	/* Auto-generate integrity metadata if this is a write */
	if (bio_data_dir(bio) == WRITE) {
		bio_integrity_process(bio, &bio->bi_iter,
				      bi->profile->generate_fn);
	}
	return true;

	/* Auto-generate integrity metadata if this is a write */
	if (bio_data_dir(bio) == WRITE)
		bio_integrity_process(bio, bi->profile->generate_fn);
err_end_io:
	bio->bi_status = status;
	bio_endio(bio);
	return false;

	return 0;
}
EXPORT_SYMBOL(bio_integrity_prep);

@@ -368,16 +355,26 @@ static void bio_integrity_verify_fn(struct work_struct *work)
		container_of(work, struct bio_integrity_payload, bip_work);
	struct bio *bio = bip->bip_bio;
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
	struct bvec_iter iter = bio->bi_iter;

	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
	/*
	 * At the moment verify is called bio's iterator was advanced
	 * during split and completion, we need to rewind iterator to
	 * it's original position.
	 */
	if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
		bio->bi_status = bio_integrity_process(bio, &iter,
						       bi->profile->verify_fn);
	} else {
		bio->bi_status = BLK_STS_IOERR;
	}

	/* Restore original bio completion handler */
	bio->bi_end_io = bip->bip_end_io;
	bio_integrity_free(bio);
	bio_endio(bio);
}

/**
 * bio_integrity_endio - Integrity I/O completion function
 * __bio_integrity_endio - Integrity I/O completion function
 * @bio:	Protected bio
 * @error:	Pointer to errno
 *
@@ -388,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 * in process context.	This function postpones completion
 * accordingly.
 */
void bio_integrity_endio(struct bio *bio)
bool __bio_integrity_endio(struct bio *bio)
{
	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
		struct bio_integrity_payload *bip = bio_integrity(bio);

	BUG_ON(bip->bip_bio != bio);

	/* In case of an I/O error there is no point in verifying the
	 * integrity metadata.  Restore original bio end_io handler
	 * and run it.
	 */
	if (bio->bi_status) {
		bio->bi_end_io = bip->bip_end_io;
		bio_endio(bio);

		return;
	}

		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
		queue_work(kintegrityd_wq, &bip->bip_work);
		return false;
	}

	bio_integrity_free(bio);
	return true;
}
EXPORT_SYMBOL(bio_integrity_endio);

/**
 * bio_integrity_advance - Advance integrity vector
@@ -425,6 +414,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);

	bip->bip_iter.bi_sector += bytes_done >> 9;
	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
}
EXPORT_SYMBOL(bio_integrity_advance);
@@ -432,22 +422,15 @@ EXPORT_SYMBOL(bio_integrity_advance);
/**
 * bio_integrity_trim - Trim integrity vector
 * @bio:	bio whose integrity vector to update
 * @offset:	offset to first data sector
 * @sectors:	number of data sectors
 *
 * Description: Used to trim the integrity vector in a cloned bio.
 * The ivec will be advanced corresponding to 'offset' data sectors
 * and the length will be truncated corresponding to 'len' data
 * sectors.
 */
void bio_integrity_trim(struct bio *bio, unsigned int offset,
			unsigned int sectors)
void bio_integrity_trim(struct bio *bio)
{
	struct bio_integrity_payload *bip = bio_integrity(bio);
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);

	bio_integrity_advance(bio, offset << 9);
	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
}
EXPORT_SYMBOL(bio_integrity_trim);

Loading