Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8fde2832 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-2019-08-17' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A collection of fixes that should go into this series. This contains:

   - Revert of the REQ_NOWAIT_INLINE and associated dio changes. There
     were still corner cases there, and even though I had a solution for
     it, it's too involved for this stage. (me)

   - Set of NVMe fixes (via Sagi)

   - io_uring fix for fixed buffers (Anthony)

   - io_uring defer issue fix (Jackie)

   - Regression fix for queue sync at exit time (zhengbin)

   - xen blk-back memory leak fix (Wenwen)"

* tag 'for-linus-2019-08-17' of git://git.kernel.dk/linux-block:
  io_uring: fix an issue when IOSQE_IO_LINK is inserted into defer list
  block: remove REQ_NOWAIT_INLINE
  io_uring: fix manual setup of iov_iter for fixed buffers
  xen/blkback: fix memory leaks
  blk-mq: move cancel of requeue_work to the front of blk_exit_queue
  nvme-pci: Fix async probe remove race
  nvme: fix controller removal race with scan work
  nvme-rdma: fix possible use-after-free in connect error flow
  nvme: fix a possible deadlock when passthru commands sent to a multipath device
  nvme-core: Fix extra device_put() call on error path
  nvmet-file: fix nvmet_file_flush() always returning an error
  nvmet-loop: Flush nvme_delete_wq when removing the port
  nvmet: Fix use-after-free bug when a port is removed
  nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns
parents 85d8d3b1 a982eeb0
Loading
Loading
Loading
Loading
+2 −8
Original line number Diff line number Diff line
@@ -1958,13 +1958,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
	rq = blk_mq_get_request(q, bio, &data);
	if (unlikely(!rq)) {
		rq_qos_cleanup(q, bio);

		cookie = BLK_QC_T_NONE;
		if (bio->bi_opf & REQ_NOWAIT_INLINE)
			cookie = BLK_QC_T_EAGAIN;
		else if (bio->bi_opf & REQ_NOWAIT)
		if (bio->bi_opf & REQ_NOWAIT)
			bio_wouldblock_error(bio);
		return cookie;
		return BLK_QC_T_NONE;
	}

	trace_block_getrq(q, bio, bio->bi_opf);
@@ -2666,8 +2662,6 @@ void blk_mq_release(struct request_queue *q)
	struct blk_mq_hw_ctx *hctx, *next;
	int i;

	cancel_delayed_work_sync(&q->requeue_work);

	queue_for_each_hw_ctx(q, hctx, i)
		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));

+3 −0
Original line number Diff line number Diff line
@@ -892,6 +892,9 @@ static void __blk_release_queue(struct work_struct *work)

	blk_free_queue_stats(q->stats);

	if (queue_is_mq(q))
		cancel_delayed_work_sync(&q->requeue_work);

	blk_exit_queue(q);

	blk_queue_free_zone_bitmaps(q);
+3 −3
Original line number Diff line number Diff line
@@ -965,6 +965,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
		}
	}

	err = -ENOMEM;
	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
		req = kzalloc(sizeof(*req), GFP_KERNEL);
		if (!req)
@@ -987,7 +988,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
	err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
	if (err) {
		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
		return err;
		goto fail;
	}

	return 0;
@@ -1007,8 +1008,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
		}
		kfree(req);
	}
	return -ENOMEM;

	return err;
}

static int connect_ring(struct backend_info *be)
+14 −1
Original line number Diff line number Diff line
@@ -1286,6 +1286,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
	 */
	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
		mutex_lock(&ctrl->scan_lock);
		mutex_lock(&ctrl->subsys->lock);
		nvme_mpath_start_freeze(ctrl->subsys);
		nvme_mpath_wait_freeze(ctrl->subsys);
		nvme_start_freeze(ctrl);
		nvme_wait_freeze(ctrl);
	}
@@ -1316,6 +1319,8 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
		nvme_update_formats(ctrl);
	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
		nvme_unfreeze(ctrl);
		nvme_mpath_unfreeze(ctrl->subsys);
		mutex_unlock(&ctrl->subsys->lock);
		mutex_unlock(&ctrl->scan_lock);
	}
	if (effects & NVME_CMD_EFFECTS_CCC)
@@ -1715,6 +1720,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
	if (ns->head->disk) {
		nvme_update_disk_info(ns->head->disk, ns, id);
		blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
		revalidate_disk(ns->head->disk);
	}
#endif
}
@@ -2487,6 +2493,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
		if (ret) {
			dev_err(ctrl->device,
				"failed to register subsystem device.\n");
			put_device(&subsys->dev);
			goto out_unlock;
		}
		ida_init(&subsys->ns_ida);
@@ -2509,7 +2516,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
	nvme_put_subsystem(subsys);
out_unlock:
	mutex_unlock(&nvme_subsystems_lock);
	put_device(&subsys->dev);
	return ret;
}

@@ -3571,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
	struct nvme_ns *ns, *next;
	LIST_HEAD(ns_list);

	/*
	 * make sure to requeue I/O to all namespaces as these
	 * might result from the scan itself and must complete
	 * for the scan_work to make progress
	 */
	nvme_mpath_clear_ctrl_paths(ctrl);

	/* prevent racing with ns scanning */
	flush_work(&ctrl->scan_work);

+70 −6
Original line number Diff line number Diff line
@@ -12,6 +12,36 @@ module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
	"turn on native support for multiple controllers per subsystem");

void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
{
	struct nvme_ns_head *h;

	lockdep_assert_held(&subsys->lock);
	list_for_each_entry(h, &subsys->nsheads, entry)
		if (h->disk)
			blk_mq_unfreeze_queue(h->disk->queue);
}

void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
{
	struct nvme_ns_head *h;

	lockdep_assert_held(&subsys->lock);
	list_for_each_entry(h, &subsys->nsheads, entry)
		if (h->disk)
			blk_mq_freeze_queue_wait(h->disk->queue);
}

void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{
	struct nvme_ns_head *h;

	lockdep_assert_held(&subsys->lock);
	list_for_each_entry(h, &subsys->nsheads, entry)
		if (h->disk)
			blk_freeze_queue_start(h->disk->queue);
}

/*
 * If multipathing is enabled we need to always use the subsystem instance
 * number for numbering our devices to avoid conflicts between subsystems that
@@ -104,18 +134,34 @@ static const char *nvme_ana_state_names[] = {
	[NVME_ANA_CHANGE]		= "change",
};

void nvme_mpath_clear_current_path(struct nvme_ns *ns)
bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
	struct nvme_ns_head *head = ns->head;
	bool changed = false;
	int node;

	if (!head)
		return;
		goto out;

	for_each_node(node) {
		if (ns == rcu_access_pointer(head->current_path[node]))
		if (ns == rcu_access_pointer(head->current_path[node])) {
			rcu_assign_pointer(head->current_path[node], NULL);
			changed = true;
		}
	}
out:
	return changed;
}

void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
	struct nvme_ns *ns;

	mutex_lock(&ctrl->scan_lock);
	list_for_each_entry(ns, &ctrl->namespaces, list)
		if (nvme_mpath_clear_current_path(ns))
			kblockd_schedule_work(&ns->head->requeue_work);
	mutex_unlock(&ctrl->scan_lock);
}

static bool nvme_path_is_disabled(struct nvme_ns *ns)
@@ -226,6 +272,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
	return ns;
}

static bool nvme_available_path(struct nvme_ns_head *head)
{
	struct nvme_ns *ns;

	list_for_each_entry_rcu(ns, &head->list, siblings) {
		switch (ns->ctrl->state) {
		case NVME_CTRL_LIVE:
		case NVME_CTRL_RESETTING:
		case NVME_CTRL_CONNECTING:
			/* fallthru */
			return true;
		default:
			break;
		}
	}
	return false;
}

static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
		struct bio *bio)
{
@@ -252,14 +316,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
				      disk_devt(ns->head->disk),
				      bio->bi_iter.bi_sector);
		ret = direct_make_request(bio);
	} else if (!list_empty_careful(&head->list)) {
		dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
	} else if (nvme_available_path(head)) {
		dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");

		spin_lock_irq(&head->requeue_lock);
		bio_list_add(&head->requeue_list, bio);
		spin_unlock_irq(&head->requeue_lock);
	} else {
		dev_warn_ratelimited(dev, "no path - failing I/O\n");
		dev_warn_ratelimited(dev, "no available path - failing I/O\n");

		bio->bi_status = BLK_STS_IOERR;
		bio_endio(bio);
Loading