Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b55b3902 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull NVMe update from Matthew Wilcox:
 "Mostly bugfixes again for the NVMe driver.  I'd like to call out the
  exported tracepoint in the block layer; I believe Keith has cleared
  this with Jens.

  We've had a few reports from people who're really pounding on NVMe
  devices at scale, hence the timeout changes (and new module
  parameters), hotplug cpu deadlock, tracepoints, and minor performance
  tweaks"

[ Jens hadn't seen that tracepoint thing, but is ok with it - it will
  end up going away when mq conversion happens ]

* git://git.infradead.org/users/willy/linux-nvme: (22 commits)
  NVMe: Fix START_STOP_UNIT Scsi->NVMe translation.
  NVMe: Use Log Page constants in SCSI emulation
  NVMe: Define Log Page constants
  NVMe: Fix hot cpu notification dead lock
  NVMe: Rename io_timeout to nvme_io_timeout
  NVMe: Use last bytes of f/w rev SCSI Inquiry
  NVMe: Adhere to request queue block accounting enable/disable
  NVMe: Fix nvme get/put queue semantics
  NVMe: Delete NVME_GET_FEAT_TEMP_THRESH
  NVMe: Make admin timeout a module parameter
  NVMe: Make iod bio timeout a parameter
  NVMe: Prevent possible NULL pointer dereference
  NVMe: Fix the buffer size passed in GetLogPage(CDW10.NUMD)
  NVMe: Update data structures for NVMe 1.2
  NVMe: Enable BUILD_BUG_ON checks
  NVMe: Update namespace and controller identify structures to the 1.1a spec
  NVMe: Flush with data support
  NVMe: Configure support for block flush
  NVMe: Add tracepoints
  NVMe: Protect against badly formatted CQEs
  ...
parents abf04af7 b8e08084
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -43,6 +43,7 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);


DEFINE_IDA(blk_queue_ida);
DEFINE_IDA(blk_queue_ida);
+132 −71
Original line number Original line Diff line number Diff line
@@ -10,10 +10,6 @@
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */
 */


#include <linux/nvme.h>
#include <linux/nvme.h>
@@ -46,16 +42,26 @@
#include <scsi/sg.h>
#include <scsi/sg.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>


#include <trace/events/block.h>

#define NVME_Q_DEPTH		1024
#define NVME_Q_DEPTH		1024
#define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
#define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
#define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
#define ADMIN_TIMEOUT	(60 * HZ)
#define ADMIN_TIMEOUT		(admin_timeout * HZ)
#define IOD_TIMEOUT	(4 * NVME_IO_TIMEOUT)
#define IOD_TIMEOUT		(retry_time * HZ)

static unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");


unsigned char io_timeout = 30;
unsigned char nvme_io_timeout = 30;
module_param(io_timeout, byte, 0644);
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");


static unsigned char retry_time = 30;
module_param(retry_time, byte, 0644);
MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O");

static int nvme_major;
static int nvme_major;
module_param(nvme_major, int, 0);
module_param(nvme_major, int, 0);


@@ -67,6 +73,7 @@ static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq;
static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
static wait_queue_head_t nvme_kthread_wait;
static struct notifier_block nvme_nb;


static void nvme_reset_failed_dev(struct work_struct *ws);
static void nvme_reset_failed_dev(struct work_struct *ws);


@@ -199,16 +206,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
#define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
#define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
#define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
#define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
#define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
#define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
#define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
#define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
#define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)


static void special_completion(struct nvme_queue *nvmeq, void *ctx,
static void special_completion(struct nvme_queue *nvmeq, void *ctx,
						struct nvme_completion *cqe)
						struct nvme_completion *cqe)
{
{
	if (ctx == CMD_CTX_CANCELLED)
	if (ctx == CMD_CTX_CANCELLED)
		return;
		return;
	if (ctx == CMD_CTX_FLUSH)
		return;
	if (ctx == CMD_CTX_ABORT) {
	if (ctx == CMD_CTX_ABORT) {
		++nvmeq->dev->abort_limit;
		++nvmeq->dev->abort_limit;
		return;
		return;
@@ -247,7 +251,8 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,
	void *ctx;
	void *ctx;
	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);


	if (cmdid >= nvmeq->q_depth) {
	if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) {
		if (fn)
			*fn = special_completion;
			*fn = special_completion;
		return CMD_CTX_INVALID;
		return CMD_CTX_INVALID;
	}
	}
@@ -281,9 +286,17 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid)


static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
{
{
	struct nvme_queue *nvmeq;
	unsigned queue_id = get_cpu_var(*dev->io_queue);
	unsigned queue_id = get_cpu_var(*dev->io_queue);

	rcu_read_lock();
	rcu_read_lock();
	return rcu_dereference(dev->queues[queue_id]);
	nvmeq = rcu_dereference(dev->queues[queue_id]);
	if (nvmeq)
		return nvmeq;

	rcu_read_unlock();
	put_cpu_var(*dev->io_queue);
	return NULL;
}
}


static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@ -295,8 +308,15 @@ static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
							__acquires(RCU)
							__acquires(RCU)
{
{
	struct nvme_queue *nvmeq;

	rcu_read_lock();
	rcu_read_lock();
	return rcu_dereference(dev->queues[q_idx]);
	nvmeq = rcu_dereference(dev->queues[q_idx]);
	if (nvmeq)
		return nvmeq;

	rcu_read_unlock();
	return NULL;
}
}


static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@ -387,18 +407,22 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
static void nvme_start_io_acct(struct bio *bio)
static void nvme_start_io_acct(struct bio *bio)
{
{
	struct gendisk *disk = bio->bi_bdev->bd_disk;
	struct gendisk *disk = bio->bi_bdev->bd_disk;
	if (blk_queue_io_stat(disk->queue)) {
		const int rw = bio_data_dir(bio);
		const int rw = bio_data_dir(bio);
		int cpu = part_stat_lock();
		int cpu = part_stat_lock();
		part_round_stats(cpu, &disk->part0);
		part_round_stats(cpu, &disk->part0);
		part_stat_inc(cpu, &disk->part0, ios[rw]);
		part_stat_inc(cpu, &disk->part0, ios[rw]);
	part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
		part_stat_add(cpu, &disk->part0, sectors[rw],
							bio_sectors(bio));
		part_inc_in_flight(&disk->part0, rw);
		part_inc_in_flight(&disk->part0, rw);
		part_stat_unlock();
		part_stat_unlock();
	}
	}
}


static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
{
{
	struct gendisk *disk = bio->bi_bdev->bd_disk;
	struct gendisk *disk = bio->bi_bdev->bd_disk;
	if (blk_queue_io_stat(disk->queue)) {
		const int rw = bio_data_dir(bio);
		const int rw = bio_data_dir(bio);
		unsigned long duration = jiffies - start_time;
		unsigned long duration = jiffies - start_time;
		int cpu = part_stat_lock();
		int cpu = part_stat_lock();
@@ -407,6 +431,7 @@ static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
		part_dec_in_flight(&disk->part0, rw);
		part_dec_in_flight(&disk->part0, rw);
		part_stat_unlock();
		part_stat_unlock();
	}
	}
}


static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
						struct nvme_completion *cqe)
						struct nvme_completion *cqe)
@@ -414,6 +439,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
	struct nvme_iod *iod = ctx;
	struct nvme_iod *iod = ctx;
	struct bio *bio = iod->private;
	struct bio *bio = iod->private;
	u16 status = le16_to_cpup(&cqe->status) >> 1;
	u16 status = le16_to_cpup(&cqe->status) >> 1;
	int error = 0;


	if (unlikely(status)) {
	if (unlikely(status)) {
		if (!(status & NVME_SC_DNR ||
		if (!(status & NVME_SC_DNR ||
@@ -426,6 +452,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
			wake_up(&nvmeq->sq_full);
			wake_up(&nvmeq->sq_full);
			return;
			return;
		}
		}
		error = -EIO;
	}
	}
	if (iod->nents) {
	if (iod->nents) {
		dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
		dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
@@ -433,10 +460,9 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
		nvme_end_io_acct(bio, iod->start_time);
		nvme_end_io_acct(bio, iod->start_time);
	}
	}
	nvme_free_iod(nvmeq->dev, iod);
	nvme_free_iod(nvmeq->dev, iod);
	if (status)

		bio_endio(bio, -EIO);
	trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error);
	else
	bio_endio(bio, error);
		bio_endio(bio, 0);
}
}


/* length is in bytes.  gfp flags indicates whether we may sleep. */
/* length is in bytes.  gfp flags indicates whether we may sleep. */
@@ -525,6 +551,8 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
	if (!split)
	if (!split)
		return -ENOMEM;
		return -ENOMEM;


	trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
					split->bi_iter.bi_sector);
	bio_chain(split, bio);
	bio_chain(split, bio);


	if (!waitqueue_active(&nvmeq->sq_full))
	if (!waitqueue_active(&nvmeq->sq_full))
@@ -627,16 +655,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
	return 0;
	return 0;
}
}


int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
{
	int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
					special_completion, NVME_IO_TIMEOUT);
	if (unlikely(cmdid < 0))
		return cmdid;

	return nvme_submit_flush(nvmeq, ns, cmdid);
}

static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
{
{
	struct bio *bio = iod->private;
	struct bio *bio = iod->private;
@@ -652,7 +670,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)


	if (bio->bi_rw & REQ_DISCARD)
	if (bio->bi_rw & REQ_DISCARD)
		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
	if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
	if (bio->bi_rw & REQ_FLUSH)
		return nvme_submit_flush(nvmeq, ns, cmdid);
		return nvme_submit_flush(nvmeq, ns, cmdid);


	control = 0;
	control = 0;
@@ -686,6 +704,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
	return 0;
	return 0;
}
}


static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
{
	struct bio *split = bio_clone(bio, GFP_ATOMIC);
	if (!split)
		return -ENOMEM;

	split->bi_iter.bi_size = 0;
	split->bi_phys_segments = 0;
	bio->bi_rw &= ~REQ_FLUSH;
	bio_chain(split, bio);

	if (!waitqueue_active(&nvmeq->sq_full))
		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
	bio_list_add(&nvmeq->sq_cong, split);
	bio_list_add(&nvmeq->sq_cong, bio);
	wake_up_process(nvme_thread);

	return 0;
}

/*
/*
 * Called with local interrupts disabled and the q_lock held.  May not sleep.
 * Called with local interrupts disabled and the q_lock held.  May not sleep.
 */
 */
@@ -696,11 +734,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
	int psegs = bio_phys_segments(ns->queue, bio);
	int psegs = bio_phys_segments(ns->queue, bio);
	int result;
	int result;


	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
	if ((bio->bi_rw & REQ_FLUSH) && psegs)
		result = nvme_submit_flush_data(nvmeq, ns);
		return nvme_split_flush_data(nvmeq, bio);
		if (result)
			return result;
	}


	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
	if (!iod)
	if (!iod)
@@ -795,7 +830,6 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
	int result = -EBUSY;
	int result = -EBUSY;


	if (!nvmeq) {
	if (!nvmeq) {
		put_nvmeq(NULL);
		bio_endio(bio, -EIO);
		bio_endio(bio, -EIO);
		return;
		return;
	}
	}
@@ -870,10 +904,8 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx,
	struct nvme_queue *nvmeq;
	struct nvme_queue *nvmeq;


	nvmeq = lock_nvmeq(dev, q_idx);
	nvmeq = lock_nvmeq(dev, q_idx);
	if (!nvmeq) {
	if (!nvmeq)
		unlock_nvmeq(nvmeq);
		return -ENODEV;
		return -ENODEV;
	}


	cmdinfo.task = current;
	cmdinfo.task = current;
	cmdinfo.status = -EINTR;
	cmdinfo.status = -EINTR;
@@ -898,9 +930,10 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx,


	if (cmdinfo.status == -EINTR) {
	if (cmdinfo.status == -EINTR) {
		nvmeq = lock_nvmeq(dev, q_idx);
		nvmeq = lock_nvmeq(dev, q_idx);
		if (nvmeq)
		if (nvmeq) {
			nvme_abort_command(nvmeq, cmdid);
			nvme_abort_command(nvmeq, cmdid);
			unlock_nvmeq(nvmeq);
			unlock_nvmeq(nvmeq);
		}
		return -EINTR;
		return -EINTR;
	}
	}


@@ -1358,7 +1391,8 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
			return -EINTR;
			return -EINTR;
		if (time_after(jiffies, timeout)) {
		if (time_after(jiffies, timeout)) {
			dev_err(&dev->pci_dev->dev,
			dev_err(&dev->pci_dev->dev,
				"Device not ready; aborting initialisation\n");
				"Device not ready; aborting %s\n", enabled ?
						"initialisation" : "reset");
			return -ENODEV;
			return -ENODEV;
		}
		}
	}
	}
@@ -1481,7 +1515,11 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
		goto put_pages;
		goto put_pages;
	}
	}


	err = -ENOMEM;
	iod = nvme_alloc_iod(count, length, GFP_KERNEL);
	iod = nvme_alloc_iod(count, length, GFP_KERNEL);
	if (!iod)
		goto put_pages;

	sg = iod->sg;
	sg = iod->sg;
	sg_init_table(sg, count);
	sg_init_table(sg, count);
	for (i = 0; i < count; i++) {
	for (i = 0; i < count; i++) {
@@ -1494,7 +1532,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
	sg_mark_end(&sg[i - 1]);
	sg_mark_end(&sg[i - 1]);
	iod->nents = count;
	iod->nents = count;


	err = -ENOMEM;
	nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
	nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
	if (!nents)
	if (!nents)
@@ -1894,6 +1931,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
	if (dev->max_hw_sectors)
	if (dev->max_hw_sectors)
		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);


	disk->major = nvme_major;
	disk->major = nvme_major;
	disk->first_minor = 0;
	disk->first_minor = 0;
@@ -2062,8 +2101,13 @@ static int set_queue_count(struct nvme_dev *dev, int count)


	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
								&result);
								&result);
	if (status)
	if (status < 0)
		return status < 0 ? -EIO : -EBUSY;
		return status;
	if (status > 0) {
		dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
									status);
		return -EBUSY;
	}
	return min(result & 0xffff, result >> 16) + 1;
	return min(result & 0xffff, result >> 16) + 1;
}
}


@@ -2072,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
}
}


static void nvme_cpu_workfn(struct work_struct *work)
{
	struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
	if (dev->initialized)
		nvme_assign_io_queues(dev);
}

static int nvme_cpu_notify(struct notifier_block *self,
static int nvme_cpu_notify(struct notifier_block *self,
				unsigned long action, void *hcpu)
				unsigned long action, void *hcpu)
{
{
	struct nvme_dev *dev = container_of(self, struct nvme_dev, nb);
	struct nvme_dev *dev;

	switch (action) {
	switch (action) {
	case CPU_ONLINE:
	case CPU_ONLINE:
	case CPU_DEAD:
	case CPU_DEAD:
		nvme_assign_io_queues(dev);
		spin_lock(&dev_list_lock);
		list_for_each_entry(dev, &dev_list, node)
			schedule_work(&dev->cpu_work);
		spin_unlock(&dev_list_lock);
		break;
		break;
	}
	}
	return NOTIFY_OK;
	return NOTIFY_OK;
@@ -2148,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
	nvme_free_queues(dev, nr_io_queues + 1);
	nvme_free_queues(dev, nr_io_queues + 1);
	nvme_assign_io_queues(dev);
	nvme_assign_io_queues(dev);


	dev->nb.notifier_call = &nvme_cpu_notify;
	result = register_hotcpu_notifier(&dev->nb);
	if (result)
		goto free_queues;

	return 0;
	return 0;


 free_queues:
 free_queues:
@@ -2184,6 +2234,7 @@ static int nvme_dev_add(struct nvme_dev *dev)


	res = nvme_identify(dev, 0, 1, dma_addr);
	res = nvme_identify(dev, 0, 1, dma_addr);
	if (res) {
	if (res) {
		dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
		res = -EIO;
		res = -EIO;
		goto out;
		goto out;
	}
	}
@@ -2192,6 +2243,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
	nn = le32_to_cpup(&ctrl->nn);
	nn = le32_to_cpup(&ctrl->nn);
	dev->oncs = le16_to_cpup(&ctrl->oncs);
	dev->oncs = le16_to_cpup(&ctrl->oncs);
	dev->abort_limit = ctrl->acl + 1;
	dev->abort_limit = ctrl->acl + 1;
	dev->vwc = ctrl->vwc;
	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2450,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
	int i;
	int i;


	dev->initialized = 0;
	dev->initialized = 0;
	unregister_hotcpu_notifier(&dev->nb);

	nvme_dev_list_remove(dev);
	nvme_dev_list_remove(dev);


	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
@@ -2722,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	INIT_LIST_HEAD(&dev->namespaces);
	INIT_LIST_HEAD(&dev->namespaces);
	dev->reset_workfn = nvme_reset_failed_dev;
	dev->reset_workfn = nvme_reset_failed_dev;
	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
	INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
	dev->pci_dev = pdev;
	dev->pci_dev = pdev;
	pci_set_drvdata(pdev, dev);
	pci_set_drvdata(pdev, dev);
	result = nvme_set_instance(dev);
	result = nvme_set_instance(dev);
@@ -2801,6 +2852,7 @@ static void nvme_remove(struct pci_dev *pdev)


	pci_set_drvdata(pdev, NULL);
	pci_set_drvdata(pdev, NULL);
	flush_work(&dev->reset_work);
	flush_work(&dev->reset_work);
	flush_work(&dev->cpu_work);
	misc_deregister(&dev->miscdev);
	misc_deregister(&dev->miscdev);
	nvme_dev_remove(dev);
	nvme_dev_remove(dev);
	nvme_dev_shutdown(dev);
	nvme_dev_shutdown(dev);
@@ -2889,11 +2941,18 @@ static int __init nvme_init(void)
	else if (result > 0)
	else if (result > 0)
		nvme_major = result;
		nvme_major = result;


	result = pci_register_driver(&nvme_driver);
	nvme_nb.notifier_call = &nvme_cpu_notify;
	result = register_hotcpu_notifier(&nvme_nb);
	if (result)
	if (result)
		goto unregister_blkdev;
		goto unregister_blkdev;

	result = pci_register_driver(&nvme_driver);
	if (result)
		goto unregister_hotcpu;
	return 0;
	return 0;


 unregister_hotcpu:
	unregister_hotcpu_notifier(&nvme_nb);
 unregister_blkdev:
 unregister_blkdev:
	unregister_blkdev(nvme_major, "nvme");
	unregister_blkdev(nvme_major, "nvme");
 kill_workq:
 kill_workq:
@@ -2904,9 +2963,11 @@ static int __init nvme_init(void)
static void __exit nvme_exit(void)
static void __exit nvme_exit(void)
{
{
	pci_unregister_driver(&nvme_driver);
	pci_unregister_driver(&nvme_driver);
	unregister_hotcpu_notifier(&nvme_nb);
	unregister_blkdev(nvme_major, "nvme");
	unregister_blkdev(nvme_major, "nvme");
	destroy_workqueue(nvme_workq);
	destroy_workqueue(nvme_workq);
	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
	_nvme_check_size();
}
}


MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
+17 −19
Original line number Original line Diff line number Diff line
/*
/*
 * NVM Express device driver
 * NVM Express device driver
 * Copyright (c) 2011, Intel Corporation.
 * Copyright (c) 2011-2014, Intel Corporation.
 *
 *
 * This program is free software; you can redistribute it and/or modify it
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */
 */


/*
/*
@@ -243,8 +239,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define READ_CAP_16_RESP_SIZE				32
#define READ_CAP_16_RESP_SIZE				32


/* NVMe Namespace and Command Defines */
/* NVMe Namespace and Command Defines */
#define NVME_GET_SMART_LOG_PAGE				0x02
#define NVME_GET_FEAT_TEMP_THRESH			0x04
#define BYTES_TO_DWORDS					4
#define BYTES_TO_DWORDS					4
#define NVME_MAX_FIRMWARE_SLOT				7
#define NVME_MAX_FIRMWARE_SLOT				7


@@ -686,6 +680,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
	u8 resp_data_format = 0x02;
	u8 resp_data_format = 0x02;
	u8 protect;
	u8 protect;
	u8 cmdque = 0x01 << 1;
	u8 cmdque = 0x01 << 1;
	u8 fw_offset = sizeof(dev->firmware_rev);


	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
				&dma_addr, GFP_KERNEL);
				&dma_addr, GFP_KERNEL);
@@ -721,7 +716,11 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
	strncpy(&inq_response[8], "NVMe    ", 8);
	strncpy(&inq_response[8], "NVMe    ", 8);
	strncpy(&inq_response[16], dev->model, 16);
	strncpy(&inq_response[16], dev->model, 16);
	strncpy(&inq_response[32], dev->firmware_rev, 4);

	while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
		fw_offset--;
	fw_offset -= 4;
	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);


	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -1018,8 +1017,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
	c.common.opcode = nvme_admin_get_log_page;
	c.common.opcode = nvme_admin_get_log_page;
	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
	c.common.prp1 = cpu_to_le64(dma_addr);
	c.common.prp1 = cpu_to_le64(dma_addr);
	c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) /
	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
			BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE);
			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
	res = nvme_submit_admin_cmd(dev, &c, NULL);
	res = nvme_submit_admin_cmd(dev, &c, NULL);
	if (res != NVME_SC_SUCCESS) {
	if (res != NVME_SC_SUCCESS) {
		temp_c = LOG_TEMP_UNKNOWN;
		temp_c = LOG_TEMP_UNKNOWN;
@@ -1086,8 +1085,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
	c.common.opcode = nvme_admin_get_log_page;
	c.common.opcode = nvme_admin_get_log_page;
	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
	c.common.prp1 = cpu_to_le64(dma_addr);
	c.common.prp1 = cpu_to_le64(dma_addr);
	c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) /
	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
			BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE);
			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
	res = nvme_submit_admin_cmd(dev, &c, NULL);
	res = nvme_submit_admin_cmd(dev, &c, NULL);
	if (res != NVME_SC_SUCCESS) {
	if (res != NVME_SC_SUCCESS) {
		temp_c_cur = LOG_TEMP_UNKNOWN;
		temp_c_cur = LOG_TEMP_UNKNOWN;
@@ -1477,7 +1476,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
		goto out_dma;
		goto out_dma;
	}
	}
	id_ctrl = mem;
	id_ctrl = mem;
	lowest_pow_st = id_ctrl->npss - 1;
	lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));


	switch (pc) {
	switch (pc) {
	case NVME_POWER_STATE_START_VALID:
	case NVME_POWER_STATE_START_VALID:
@@ -1494,20 +1493,19 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
		break;
		break;
	case NVME_POWER_STATE_IDLE:
	case NVME_POWER_STATE_IDLE:
		/* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */
		/* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */
		/* min of desired state and (lps-1) because lps is STOP */
		if (pcmod == 0x0)
		if (pcmod == 0x0)
			ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1));
			ps_desired = POWER_STATE_1;
		else if (pcmod == 0x1)
		else if (pcmod == 0x1)
			ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1));
			ps_desired = POWER_STATE_2;
		else if (pcmod == 0x2)
		else if (pcmod == 0x2)
			ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1));
			ps_desired = POWER_STATE_3;
		break;
		break;
	case NVME_POWER_STATE_STANDBY:
	case NVME_POWER_STATE_STANDBY:
		/* Action unspecified if POWER CONDITION MODIFIER != [0,1] */
		/* Action unspecified if POWER CONDITION MODIFIER != [0,1] */
		if (pcmod == 0x0)
		if (pcmod == 0x0)
			ps_desired = max(0, (lowest_pow_st - 2));
			ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2));
		else if (pcmod == 0x1)
		else if (pcmod == 0x1)
			ps_desired = max(0, (lowest_pow_st - 1));
			ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1));
		break;
		break;
	case NVME_POWER_STATE_LU_CONTROL:
	case NVME_POWER_STATE_LU_CONTROL:
	default:
	default:
+5 −9
Original line number Original line Diff line number Diff line
/*
/*
 * Definitions for the NVM Express interface
 * Definitions for the NVM Express interface
 * Copyright (c) 2011-2013, Intel Corporation.
 * Copyright (c) 2011-2014, Intel Corporation.
 *
 *
 * This program is free software; you can redistribute it and/or modify it
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */
 */


#ifndef _LINUX_NVME_H
#ifndef _LINUX_NVME_H
@@ -66,8 +62,8 @@ enum {


#define NVME_VS(major, minor)	(major << 16 | minor)
#define NVME_VS(major, minor)	(major << 16 | minor)


extern unsigned char io_timeout;
extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT	(io_timeout * HZ)
#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)


/*
/*
 * Represents an NVM Express device.  Each nvme_dev is a PCI function.
 * Represents an NVM Express device.  Each nvme_dev is a PCI function.
@@ -94,7 +90,7 @@ struct nvme_dev {
	struct miscdevice miscdev;
	struct miscdevice miscdev;
	work_func_t reset_workfn;
	work_func_t reset_workfn;
	struct work_struct reset_work;
	struct work_struct reset_work;
	struct notifier_block nb;
	struct work_struct cpu_work;
	char name[12];
	char name[12];
	char serial[20];
	char serial[20];
	char model[40];
	char model[40];
@@ -103,6 +99,7 @@ struct nvme_dev {
	u32 stripe_size;
	u32 stripe_size;
	u16 oncs;
	u16 oncs;
	u16 abort_limit;
	u16 abort_limit;
	u8 vwc;
	u8 initialized;
	u8 initialized;
};
};


@@ -159,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
			struct nvme_iod *iod);
			struct nvme_iod *iod);
int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
							u32 *result);
							u32 *result);
int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
+39 −11
Original line number Original line Diff line number Diff line
/*
/*
 * Definitions for the NVM Express interface
 * Definitions for the NVM Express interface
 * Copyright (c) 2011-2013, Intel Corporation.
 * Copyright (c) 2011-2014, Intel Corporation.
 *
 *
 * This program is free software; you can redistribute it and/or modify it
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */
 */


#ifndef _UAPI_LINUX_NVME_H
#ifndef _UAPI_LINUX_NVME_H
@@ -31,7 +27,12 @@ struct nvme_id_power_state {
	__u8			read_lat;
	__u8			read_lat;
	__u8			write_tput;
	__u8			write_tput;
	__u8			write_lat;
	__u8			write_lat;
	__u8			rsvd16[16];
	__le16			idle_power;
	__u8			idle_scale;
	__u8			rsvd19;
	__le16			active_power;
	__u8			active_work_scale;
	__u8			rsvd23[9];
};
};


enum {
enum {
@@ -49,7 +50,9 @@ struct nvme_id_ctrl {
	__u8			ieee[3];
	__u8			ieee[3];
	__u8			mic;
	__u8			mic;
	__u8			mdts;
	__u8			mdts;
	__u8			rsvd78[178];
	__u16			cntlid;
	__u32			ver;
	__u8			rsvd84[172];
	__le16			oacs;
	__le16			oacs;
	__u8			acl;
	__u8			acl;
	__u8			aerl;
	__u8			aerl;
@@ -57,7 +60,11 @@ struct nvme_id_ctrl {
	__u8			lpa;
	__u8			lpa;
	__u8			elpe;
	__u8			elpe;
	__u8			npss;
	__u8			npss;
	__u8			rsvd264[248];
	__u8			avscc;
	__u8			apsta;
	__le16			wctemp;
	__le16			cctemp;
	__u8			rsvd270[242];
	__u8			sqes;
	__u8			sqes;
	__u8			cqes;
	__u8			cqes;
	__u8			rsvd514[2];
	__u8			rsvd514[2];
@@ -68,7 +75,12 @@ struct nvme_id_ctrl {
	__u8			vwc;
	__u8			vwc;
	__le16			awun;
	__le16			awun;
	__le16			awupf;
	__le16			awupf;
	__u8			rsvd530[1518];
	__u8			nvscc;
	__u8			rsvd531;
	__le16			acwu;
	__u8			rsvd534[2];
	__le32			sgls;
	__u8			rsvd540[1508];
	struct nvme_id_power_state	psd[32];
	struct nvme_id_power_state	psd[32];
	__u8			vs[1024];
	__u8			vs[1024];
};
};
@@ -77,6 +89,7 @@ enum {
	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
	NVME_CTRL_ONCS_DSM			= 1 << 2,
	NVME_CTRL_ONCS_DSM			= 1 << 2,
	NVME_CTRL_VWC_PRESENT			= 1 << 0,
};
};


struct nvme_lbaf {
struct nvme_lbaf {
@@ -95,7 +108,15 @@ struct nvme_id_ns {
	__u8			mc;
	__u8			mc;
	__u8			dpc;
	__u8			dpc;
	__u8			dps;
	__u8			dps;
	__u8			rsvd30[98];
	__u8			nmic;
	__u8			rescap;
	__u8			fpi;
	__u8			rsvd33;
	__le16			nawun;
	__le16			nawupf;
	__le16			nacwu;
	__u8			rsvd40[80];
	__u8			eui64[8];
	struct nvme_lbaf	lbaf[16];
	struct nvme_lbaf	lbaf[16];
	__u8			rsvd192[192];
	__u8			rsvd192[192];
	__u8			vs[3712];
	__u8			vs[3712];
@@ -126,7 +147,10 @@ struct nvme_smart_log {
	__u8			unsafe_shutdowns[16];
	__u8			unsafe_shutdowns[16];
	__u8			media_errors[16];
	__u8			media_errors[16];
	__u8			num_err_log_entries[16];
	__u8			num_err_log_entries[16];
	__u8			rsvd192[320];
	__le32			warning_temp_time;
	__le32			critical_comp_time;
	__le16			temp_sensor[8];
	__u8			rsvd216[296];
};
};


enum {
enum {
@@ -282,6 +306,10 @@ enum {
	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
	NVME_FEAT_ASYNC_EVENT	= 0x0b,
	NVME_FEAT_ASYNC_EVENT	= 0x0b,
	NVME_FEAT_SW_PROGRESS	= 0x0c,
	NVME_FEAT_SW_PROGRESS	= 0x0c,
	NVME_LOG_ERROR		= 0x01,
	NVME_LOG_SMART		= 0x02,
	NVME_LOG_FW_SLOT	= 0x03,
	NVME_LOG_RESERVATION	= 0x80,
	NVME_FWACT_REPL		= (0 << 3),
	NVME_FWACT_REPL		= (0 << 3),
	NVME_FWACT_REPL_ACTV	= (1 << 3),
	NVME_FWACT_REPL_ACTV	= (1 << 3),
	NVME_FWACT_ACTV		= (2 << 3),
	NVME_FWACT_ACTV		= (2 << 3),