Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b7403066 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-5.3' of git://git.infradead.org/nvme into for-linus

Pull NVMe fixes from Christoph:

"Lof of fixes all over the place, and two very minor features that
 were in the nvme tree by the end of the merge window, but hadn't made
 it out to Jens yet."

* 'nvme-5.3' of git://git.infradead.org/nvme:
  nvme: fix regression upon hot device removal and insertion
  nvme-fc: fix module unloads while lports still pending
  nvme-tcp: don't use sendpage for SLAB pages
  nvme-tcp: set the STABLE_WRITES flag when data digests are enabled
  nvmet: print a hint while rejecting NSID 0 or 0xffffffff
  nvme-multipath: do not select namespaces which are about to be removed
  nvme-multipath: also check for a disabled path if there is a single sibling
  nvme-multipath: factor out a nvme_path_is_disabled helper
  nvme: set physical block size and optimal I/O size
  nvme: add I/O characteristics fields
  nvmet: export I/O characteristics attributes in Identify
  nvme-trace: add delete completion and submission queue to admin cmds tracer
  nvme-trace: fix spelling mistake "spcecific" -> "specific"
  nvme-pci: limit max_hw_sectors based on the DMA max mapping size
  nvme-pci: check for NULL return from pci_alloc_p2pmem()
  nvme-pci: don't create a read hctx mapping without read queues
  nvme-pci: don't fall back to a 32-bit DMA mask
  nvme-pci: make nvme_dev_pm_ops static
  nvme-fcloop: resolve warnings on RCU usage and sleep warnings
  nvme-fcloop: fix inconsistent lock state warnings
parents 4ddeaae8 420dc733
Loading
Loading
Loading
Loading
+39 −4
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/hdreg.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/list_sort.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -1626,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
	sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
	unsigned short bs = 1 << ns->lba_shift;
	u32 atomic_bs, phys_bs, io_opt;

	if (ns->lba_shift > PAGE_SHIFT) {
		/* unsupported block size, set capacity to 0 later */
@@ -1634,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
	blk_mq_freeze_queue(disk->queue);
	blk_integrity_unregister(disk);

	if (id->nabo == 0) {
		/*
		 * Bit 1 indicates whether NAWUPF is defined for this namespace
		 * and whether it should be used instead of AWUPF. If NAWUPF ==
		 * 0 then AWUPF must be used instead.
		 */
		if (id->nsfeat & (1 << 1) && id->nawupf)
			atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
		else
			atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
	} else {
		atomic_bs = bs;
	}
	phys_bs = bs;
	io_opt = bs;
	if (id->nsfeat & (1 << 4)) {
		/* NPWG = Namespace Preferred Write Granularity */
		phys_bs *= 1 + le16_to_cpu(id->npwg);
		/* NOWS = Namespace Optimal Write Size */
		io_opt *= 1 + le16_to_cpu(id->nows);
	}

	blk_queue_logical_block_size(disk->queue, bs);
	blk_queue_physical_block_size(disk->queue, bs);
	blk_queue_io_min(disk->queue, bs);
	/*
	 * Linux filesystems assume writing a single physical block is
	 * an atomic operation. Hence limit the physical block size to the
	 * value of the Atomic Write Unit Power Fail parameter.
	 */
	blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
	blk_queue_io_min(disk->queue, phys_bs);
	blk_queue_io_opt(disk->queue, io_opt);

	if (ns->ms && !ns->ext &&
	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
@@ -2386,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
	lockdep_assert_held(&nvme_subsystems_lock);

	list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
		if (ctrl->state == NVME_CTRL_DELETING ||
		    ctrl->state == NVME_CTRL_DEAD)
		if (tmp->state == NVME_CTRL_DELETING ||
		    tmp->state == NVME_CTRL_DEAD)
			continue;

		if (tmp->cntlid == ctrl->cntlid) {
@@ -2433,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
	memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
	subsys->vendor_id = le16_to_cpu(id->vid);
	subsys->cmic = id->cmic;
	subsys->awupf = le16_to_cpu(id->awupf);
#ifdef CONFIG_NVME_MULTIPATH
	subsys->iopolicy = NVME_IOPOLICY_NUMA;
#endif
@@ -3274,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
		goto out_free_ns;
	}

	if (ctrl->opts->data_digest)
		ns->queue->backing_dev_info->capabilities
			|= BDI_CAP_STABLE_WRITES;

	blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
	if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
		blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
+48 −3
Original line number Diff line number Diff line
@@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);

static struct workqueue_struct *nvme_fc_wq;

static bool nvme_fc_waiting_to_unload;
static DECLARE_COMPLETION(nvme_fc_unload_proceed);

/*
 * These items are short-term. They will eventually be moved into
 * a generic FC class. See comments in module init.
@@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref)
	/* remove from transport list */
	spin_lock_irqsave(&nvme_fc_lock, flags);
	list_del(&lport->port_list);
	if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
		complete(&nvme_fc_unload_proceed);
	spin_unlock_irqrestore(&nvme_fc_lock, flags);

	ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
@@ -3456,11 +3461,51 @@ static int __init nvme_fc_init_module(void)
	return ret;
}

static void
nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
{
	struct nvme_fc_ctrl *ctrl;

	spin_lock(&rport->lock);
	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
		dev_warn(ctrl->ctrl.device,
			"NVME-FC{%d}: transport unloading: deleting ctrl\n",
			ctrl->cnum);
		nvme_delete_ctrl(&ctrl->ctrl);
	}
	spin_unlock(&rport->lock);
}

static void
nvme_fc_cleanup_for_unload(void)
{
	struct nvme_fc_lport *lport;
	struct nvme_fc_rport *rport;

	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
		list_for_each_entry(rport, &lport->endp_list, endp_list) {
			nvme_fc_delete_controllers(rport);
		}
	}
}

static void __exit nvme_fc_exit_module(void)
{
	/* sanity check - all lports should be removed */
	if (!list_empty(&nvme_fc_lport_list))
		pr_warn("%s: localport list not empty\n", __func__);
	unsigned long flags;
	bool need_cleanup = false;

	spin_lock_irqsave(&nvme_fc_lock, flags);
	nvme_fc_waiting_to_unload = true;
	if (!list_empty(&nvme_fc_lport_list)) {
		need_cleanup = true;
		nvme_fc_cleanup_for_unload();
	}
	spin_unlock_irqrestore(&nvme_fc_lock, flags);
	if (need_cleanup) {
		pr_info("%s: waiting for ctlr deletes\n", __func__);
		wait_for_completion(&nvme_fc_unload_proceed);
		pr_info("%s: ctrl deletes complete\n", __func__);
	}

	nvmf_unregister_transport(&nvme_fc_transport);

+13 −5
Original line number Diff line number Diff line
@@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns)
	}
}

static bool nvme_path_is_disabled(struct nvme_ns *ns)
{
	return ns->ctrl->state != NVME_CTRL_LIVE ||
		test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
		test_bit(NVME_NS_REMOVING, &ns->flags);
}

static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
	int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
	struct nvme_ns *found = NULL, *fallback = NULL, *ns;

	list_for_each_entry_rcu(ns, &head->list, siblings) {
		if (ns->ctrl->state != NVME_CTRL_LIVE ||
		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
		if (nvme_path_is_disabled(ns))
			continue;

		if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
@@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
{
	struct nvme_ns *ns, *found, *fallback = NULL;

	if (list_is_singular(&head->list))
	if (list_is_singular(&head->list)) {
		if (nvme_path_is_disabled(old))
			return NULL;
		return old;
	}

	for (ns = nvme_next_ns(head, old);
	     ns != old;
	     ns = nvme_next_ns(head, ns)) {
		if (ns->ctrl->state != NVME_CTRL_LIVE ||
		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
		if (nvme_path_is_disabled(ns))
			continue;

		if (ns->ana_state == NVME_ANA_OPTIMIZED) {
+1 −0
Original line number Diff line number Diff line
@@ -283,6 +283,7 @@ struct nvme_subsystem {
	char			firmware_rev[8];
	u8			cmic;
	u16			vendor_id;
	u16			awupf;	/* 0's based awupf value. */
	struct ida		ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
	enum nvme_iopolicy	iopolicy;
+16 −10
Original line number Diff line number Diff line
@@ -1439,12 +1439,16 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,

	if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
		nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
		if (nvmeq->sq_cmds) {
			nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
							nvmeq->sq_cmds);
			if (nvmeq->sq_dma_addr) {
				set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
				return 0;
			}

			pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth));
		}
	}

	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
@@ -2250,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
	if (!dev->ctrl.tagset) {
		dev->tagset.ops = &nvme_mq_ops;
		dev->tagset.nr_hw_queues = dev->online_queues - 1;
		dev->tagset.nr_maps = 2; /* default + read */
		dev->tagset.nr_maps = 1; /* default */
		if (dev->io_queues[HCTX_TYPE_READ])
			dev->tagset.nr_maps++;
		if (dev->io_queues[HCTX_TYPE_POLL])
			dev->tagset.nr_maps++;
		dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -2289,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)

	pci_set_master(pdev);

	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)))
		goto disable;

	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
@@ -2498,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work)
	 * Limit the max command size to prevent iod->sg allocations going
	 * over a single page.
	 */
	dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
	dev->ctrl.max_hw_sectors = min_t(u32,
		NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
	dev->ctrl.max_segments = NVME_MAX_SEGS;

	/*
@@ -2923,7 +2929,7 @@ static int nvme_simple_resume(struct device *dev)
	return 0;
}

const struct dev_pm_ops nvme_dev_pm_ops = {
static const struct dev_pm_ops nvme_dev_pm_ops = {
	.suspend	= nvme_suspend,
	.resume		= nvme_resume,
	.freeze		= nvme_simple_suspend,
Loading