Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8aa63829 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-4.12' of git://git.infradead.org/nvme into for-linus

Christoph writes:

"A couple of fixes for the next rc on the nvme front. Various FC fixes
from James, controller removal fixes from Ming (including a block layer
patch), a APST related device quirk from Andy, a RDMA fix for small
queue depth device from Marta, as well as fixes for the lack of
metadata support in non-PCIe drivers and the printk logging format from
me."
parents a8ecdd71 50af47d0
Loading
Loading
Loading
Loading
+0 −19
Original line number Diff line number Diff line
@@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);

void blk_mq_abort_requeue_list(struct request_queue *q)
{
	unsigned long flags;
	LIST_HEAD(rq_list);

	spin_lock_irqsave(&q->requeue_lock, flags);
	list_splice_init(&q->requeue_list, &rq_list);
	spin_unlock_irqrestore(&q->requeue_lock, flags);

	while (!list_empty(&rq_list)) {
		struct request *rq;

		rq = list_first_entry(&rq_list, struct request, queuelist);
		list_del_init(&rq->queuelist);
		blk_mq_end_request(rq, -EIO);
	}
}
EXPORT_SYMBOL(blk_mq_abort_requeue_list);

struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
	if (tag < tags->nr_tags) {
+42 −23
Original line number Diff line number Diff line
@@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
		u16 bs)
{
	struct nvme_ns *ns = disk->private_data;
	u16 old_ms = ns->ms;
	u8 pi_type = 0;

	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);

	/* PI implementation requires metadata equal t10 pi tuple size */
	if (ns->ms == sizeof(struct t10_pi_tuple))
		pi_type = id->dps & NVME_NS_DPS_PI_MASK;

	if (blk_get_integrity(disk) &&
	    (ns->pi_type != pi_type || ns->ms != old_ms ||
	     bs != queue_logical_block_size(disk->queue) ||
	     (ns->ms && ns->ext)))
		blk_integrity_unregister(disk);

	ns->pi_type = pi_type;
}

static void nvme_init_integrity(struct nvme_ns *ns)
{
	struct blk_integrity integrity;
@@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns)
	blk_queue_max_integrity_segments(ns->queue, 1);
}
#else
static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
		u16 bs)
{
}
static void nvme_init_integrity(struct nvme_ns *ns)
{
}
@@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
	struct nvme_ns *ns = disk->private_data;
	u8 lbaf, pi_type;
	u16 old_ms;
	unsigned short bs;

	old_ms = ns->ms;
	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
	ns->lba_shift = id->lbaf[lbaf].ds;
	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
	u16 bs;

	/*
	 * If identify namespace failed, use default 512 byte block size so
	 * block layer can use before failing read/write for 0 capacity.
	 */
	ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
	if (ns->lba_shift == 0)
		ns->lba_shift = 9;
	bs = 1 << ns->lba_shift;
	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
					id->dps & NVME_NS_DPS_PI_MASK : 0;

	blk_mq_freeze_queue(disk->queue);
	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
				ns->ms != old_ms ||
				bs != queue_logical_block_size(disk->queue) ||
				(ns->ms && ns->ext)))
		blk_integrity_unregister(disk);

	ns->pi_type = pi_type;
	if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
		nvme_prep_integrity(disk, id, bs);
	blk_queue_logical_block_size(ns->queue, bs);

	if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
		nvme_init_integrity(ns);
	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1605,7 +1617,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
	}
	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));

	if (ctrl->ops->is_fabrics) {
	if (ctrl->ops->flags & NVME_F_FABRICS) {
		ctrl->icdoff = le16_to_cpu(id->icdoff);
		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
		ctrl->iorcsz = le32_to_cpu(id->iorcsz);
@@ -2098,7 +2110,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
		if (ns->ndev)
			nvme_nvm_unregister_sysfs(ns);
		del_gendisk(ns->disk);
		blk_mq_abort_requeue_list(ns->queue);
		blk_cleanup_queue(ns->queue);
	}

@@ -2436,8 +2447,16 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
			continue;
		revalidate_disk(ns->disk);
		blk_set_queue_dying(ns->queue);
		blk_mq_abort_requeue_list(ns->queue);
		blk_mq_start_stopped_hw_queues(ns->queue, true);

		/*
		 * Forcibly start all queues to avoid having stuck requests.
		 * Note that we must ensure the queues are not stopped
		 * when the final removal happens.
		 */
		blk_mq_start_hw_queues(ns->queue);

		/* draining requests in requeue list */
		blk_mq_kick_requeue_list(ns->queue);
	}
	mutex_unlock(&ctrl->namespaces_mutex);
}
+60 −87
Original line number Diff line number Diff line
@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {

#define NVMEFC_QUEUE_DELAY	3		/* ms units */

#define NVME_FC_MAX_CONNECT_ATTEMPTS	1

struct nvme_fc_queue {
	struct nvme_fc_ctrl	*ctrl;
	struct device		*dev;
@@ -165,8 +163,6 @@ struct nvme_fc_ctrl {
	struct work_struct	delete_work;
	struct work_struct	reset_work;
	struct delayed_work	connect_work;
	int			reconnect_delay;
	int			connect_attempts;

	struct kref		ref;
	u32			flags;
@@ -1376,9 +1372,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
	if (!complete_rq) {
		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
			status = cpu_to_le16(NVME_SC_ABORT_REQ);
			status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
			if (blk_queue_dying(rq->q))
				status |= cpu_to_le16(NVME_SC_DNR);
				status |= cpu_to_le16(NVME_SC_DNR << 1);
		}
		nvme_end_request(rq, status, result);
	} else
@@ -1751,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
	dev_warn(ctrl->ctrl.device,
		"NVME-FC{%d}: transport association error detected: %s\n",
		ctrl->cnum, errmsg);
	dev_info(ctrl->ctrl.device,
	dev_warn(ctrl->ctrl.device,
		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);

	/* stop the queues on error, cleanup is in reset thread */
@@ -2195,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
	if (!opts->nr_io_queues)
		return 0;

	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
			opts->nr_io_queues);

	nvme_fc_init_io_queues(ctrl);

	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
@@ -2268,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
	if (ctrl->queue_count == 1)
		return 0;

	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
			opts->nr_io_queues);

	nvme_fc_init_io_queues(ctrl);

	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -2306,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
	int ret;
	bool changed;

	ctrl->connect_attempts++;
	++ctrl->ctrl.opts->nr_reconnects;

	/*
	 * Create the admin queue
@@ -2403,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
	WARN_ON_ONCE(!changed);

	ctrl->connect_attempts = 0;

	kref_get(&ctrl->ctrl.kref);
	ctrl->ctrl.opts->nr_reconnects = 0;

	if (ctrl->queue_count > 1) {
		nvme_start_queues(&ctrl->ctrl);
@@ -2536,26 +2524,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)

	/*
	 * tear down the controller
	 * This will result in the last reference on the nvme ctrl to
	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
	 * From there, the transport will tear down it's logical queues and
	 * association.
	 * After the last reference on the nvme ctrl is removed,
	 * the transport nvme_fc_nvme_ctrl_freed() callback will be
	 * invoked. From there, the transport will tear down it's
	 * logical queues and association.
	 */
	nvme_uninit_ctrl(&ctrl->ctrl);

	nvme_put_ctrl(&ctrl->ctrl);
}

static int
__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
static bool
__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
{
	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
		return -EBUSY;
		return true;

	if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
		return -EBUSY;
		return true;

	return 0;
	return false;
}

static int
__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
{
	return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
}

/*
@@ -2580,6 +2574,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
	return ret;
}

static void
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
{
	/* If we are resetting/deleting then do nothing */
	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
			ctrl->ctrl.state == NVME_CTRL_LIVE);
		return;
	}

	dev_info(ctrl->ctrl.device,
		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
		ctrl->cnum, status);

	if (nvmf_should_reconnect(&ctrl->ctrl)) {
		dev_info(ctrl->ctrl.device,
			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
				ctrl->ctrl.opts->reconnect_delay * HZ);
	} else {
		dev_warn(ctrl->ctrl.device,
				"NVME-FC{%d}: Max reconnect attempts (%d) "
				"reached. Removing controller\n",
				ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
		WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
	}
}

static void
nvme_fc_reset_ctrl_work(struct work_struct *work)
{
@@ -2591,34 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
	nvme_fc_delete_association(ctrl);

	ret = nvme_fc_create_association(ctrl);
	if (ret) {
		dev_warn(ctrl->ctrl.device,
			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
			ctrl->cnum, ret);
		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
			dev_warn(ctrl->ctrl.device,
				"NVME-FC{%d}: Max reconnect attempts (%d) "
				"reached. Removing controller\n",
				ctrl->cnum, ctrl->connect_attempts);

			if (!nvme_change_ctrl_state(&ctrl->ctrl,
				NVME_CTRL_DELETING)) {
				dev_err(ctrl->ctrl.device,
					"NVME-FC{%d}: failed to change state "
					"to DELETING\n", ctrl->cnum);
				return;
			}

			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
			return;
		}

		dev_warn(ctrl->ctrl.device,
			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
			ctrl->cnum, ctrl->reconnect_delay);
		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
				ctrl->reconnect_delay * HZ);
	} else
	if (ret)
		nvme_fc_reconnect_or_delete(ctrl, ret);
	else
		dev_info(ctrl->ctrl.device,
			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
}
@@ -2632,7 +2630,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
{
	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);

	dev_warn(ctrl->ctrl.device,
	dev_info(ctrl->ctrl.device,
		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);

	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
@@ -2649,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
	.name			= "fc",
	.module			= THIS_MODULE,
	.is_fabrics		= true,
	.flags			= NVME_F_FABRICS,
	.reg_read32		= nvmf_reg_read32,
	.reg_read64		= nvmf_reg_read64,
	.reg_write32		= nvmf_reg_write32,
@@ -2671,34 +2669,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
				struct nvme_fc_ctrl, connect_work);

	ret = nvme_fc_create_association(ctrl);
	if (ret) {
		dev_warn(ctrl->ctrl.device,
			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
			ctrl->cnum, ret);
		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
			dev_warn(ctrl->ctrl.device,
				"NVME-FC{%d}: Max reconnect attempts (%d) "
				"reached. Removing controller\n",
				ctrl->cnum, ctrl->connect_attempts);

			if (!nvme_change_ctrl_state(&ctrl->ctrl,
				NVME_CTRL_DELETING)) {
				dev_err(ctrl->ctrl.device,
					"NVME-FC{%d}: failed to change state "
					"to DELETING\n", ctrl->cnum);
				return;
			}

			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
			return;
		}

		dev_warn(ctrl->ctrl.device,
			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
			ctrl->cnum, ctrl->reconnect_delay);
		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
				ctrl->reconnect_delay * HZ);
	} else
	if (ret)
		nvme_fc_reconnect_or_delete(ctrl, ret);
	else
		dev_info(ctrl->ctrl.device,
			"NVME-FC{%d}: controller reconnect complete\n",
			ctrl->cnum);
@@ -2755,7 +2728,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
	ctrl->reconnect_delay = opts->reconnect_delay;
	spin_lock_init(&ctrl->lock);

	/* io queue count */
@@ -2819,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
		ctrl->ctrl.opts = NULL;
		/* initiate nvme ctrl ref counting teardown */
		nvme_uninit_ctrl(&ctrl->ctrl);
		nvme_put_ctrl(&ctrl->ctrl);

		/* as we're past the point where we transition to the ref
		 * counting teardown path, if we return a bad pointer here,
@@ -2835,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
		return ERR_PTR(ret);
	}

	kref_get(&ctrl->ctrl.kref);

	dev_info(ctrl->ctrl.device,
		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
@@ -2971,7 +2944,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
static struct nvmf_transport_ops nvme_fc_transport = {
	.name		= "fc",
	.required_opts	= NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY,
	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
	.create_ctrl	= nvme_fc_create_ctrl,
};

+3 −1
Original line number Diff line number Diff line
@@ -208,7 +208,9 @@ struct nvme_ns {
struct nvme_ctrl_ops {
	const char *name;
	struct module *module;
	bool is_fabrics;
	unsigned int flags;
#define NVME_F_FABRICS			(1 << 0)
#define NVME_F_METADATA_SUPPORTED	(1 << 1)
	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+9 −6
Original line number Diff line number Diff line
@@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);

	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
		dev_warn(dev->dev, "unable to set dbbuf\n");
		dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
		/* Free memory and continue on */
		nvme_dbbuf_dma_free(dev);
	}
@@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
				      &pci_status);
	if (result == PCIBIOS_SUCCESSFUL)
		dev_warn(dev->dev,
		dev_warn(dev->ctrl.device,
			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
			 csts, pci_status);
	else
		dev_warn(dev->dev,
		dev_warn(dev->ctrl.device,
			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
			 csts, result);
}
@@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
	 */
	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
		dev->q_depth = 2;
		dev_warn(dev->dev, "detected Apple NVMe controller, set "
			"queue depth=%u to work around controller resets\n",
		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
			"set queue depth=%u to work around controller resets\n",
			dev->q_depth);
	}

@@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
		if (dev->cmbsz) {
			if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
						    &dev_attr_cmb.attr, NULL))
				dev_warn(dev->dev,
				dev_warn(dev->ctrl.device,
					 "failed to add sysfs attribute for CMB\n");
		}
	}
@@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
	.name			= "pcie",
	.module			= THIS_MODULE,
	.flags			= NVME_F_METADATA_SUPPORTED,
	.reg_read32		= nvme_pci_reg_read32,
	.reg_write32		= nvme_pci_reg_write32,
	.reg_read64		= nvme_pci_reg_read64,
@@ -2293,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = {
	{ PCI_VDEVICE(INTEL, 0x0a54),
		.driver_data = NVME_QUIRK_STRIPE_SIZE |
				NVME_QUIRK_DEALLOCATE_ZEROES, },
	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
		.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
Loading