Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 047fc8a1 authored by Ross Zwisler's avatar Ross Zwisler Committed by Dan Williams
Browse files

libnvdimm, nfit, nd_blk: driver for BLK-mode access persistent memory

The libnvdimm implementation handles allocating dimm address space (DPA)
between PMEM and BLK mode interfaces.  After DPA has been allocated from
a BLK-region to a BLK-namespace the nd_blk driver attaches to handle I/O
as a struct bio based block device. Unlike PMEM, BLK is required to
handle platform specific details like mmio register formats and memory
controller interleave.  For this reason the libnvdimm generic nd_blk
driver calls back into the bus provider to carry out the I/O.

This initial implementation handles the BLK interface defined by the
ACPI 6 NFIT [1] and the NVDIMM DSM Interface Example [2] composed from
DCR (dimm control region), BDW (block data window), IDT (interleave
descriptor) NFIT structures and the hardware register format.
[1]: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
[2]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf



Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
Acked-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 5212e11f
Loading
Loading
Loading
Loading
+435 −14
Original line number Original line Diff line number Diff line
@@ -13,12 +13,20 @@
#include <linux/list_sort.h>
#include <linux/list_sort.h>
#include <linux/libnvdimm.h>
#include <linux/libnvdimm.h>
#include <linux/module.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ndctl.h>
#include <linux/ndctl.h>
#include <linux/list.h>
#include <linux/list.h>
#include <linux/acpi.h>
#include <linux/acpi.h>
#include <linux/sort.h>
#include <linux/sort.h>
#include <linux/io.h>
#include "nfit.h"
#include "nfit.h"


/*
 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
 * irrelevant.
 */
#include <asm-generic/io-64-nonatomic-hi-lo.h>

static bool force_enable_dimms;
static bool force_enable_dimms;
module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
@@ -72,7 +80,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,


		if (!adev)
		if (!adev)
			return -ENOTTY;
			return -ENOTTY;
		dimm_name = dev_name(&adev->dev);
		dimm_name = nvdimm_name(nvdimm);
		cmd_name = nvdimm_cmd_name(cmd);
		cmd_name = nvdimm_cmd_name(cmd);
		dsm_mask = nfit_mem->dsm_mask;
		dsm_mask = nfit_mem->dsm_mask;
		desc = nd_cmd_dimm_desc(cmd);
		desc = nd_cmd_dimm_desc(cmd);
@@ -279,6 +287,23 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
	return true;
	return true;
}
}


static bool add_idt(struct acpi_nfit_desc *acpi_desc,
		struct acpi_nfit_interleave *idt)
{
	struct device *dev = acpi_desc->dev;
	struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt),
			GFP_KERNEL);

	if (!nfit_idt)
		return false;
	INIT_LIST_HEAD(&nfit_idt->list);
	nfit_idt->idt = idt;
	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
			idt->interleave_index, idt->line_count);
	return true;
}

static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
		const void *end)
		const void *end)
{
{
@@ -307,9 +332,9 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
		if (!add_bdw(acpi_desc, table))
		if (!add_bdw(acpi_desc, table))
			return err;
			return err;
		break;
		break;
	/* TODO */
	case ACPI_NFIT_TYPE_INTERLEAVE:
	case ACPI_NFIT_TYPE_INTERLEAVE:
		dev_dbg(dev, "%s: idt\n", __func__);
		if (!add_idt(acpi_desc, table))
			return err;
		break;
		break;
	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
		dev_dbg(dev, "%s: flush\n", __func__);
		dev_dbg(dev, "%s: flush\n", __func__);
@@ -362,8 +387,11 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
{
{
	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
	struct nfit_memdev *nfit_memdev;
	struct nfit_dcr *nfit_dcr;
	struct nfit_dcr *nfit_dcr;
	struct nfit_bdw *nfit_bdw;
	struct nfit_bdw *nfit_bdw;
	struct nfit_idt *nfit_idt;
	u16 idt_idx, range_index;


	list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
	list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
		if (nfit_dcr->dcr->region_index != dcr)
		if (nfit_dcr->dcr->region_index != dcr)
@@ -396,6 +424,26 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
		return 0;
		return 0;


	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);

	if (!nfit_mem->spa_bdw)
		return 0;

	range_index = nfit_mem->spa_bdw->range_index;
	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
		if (nfit_memdev->memdev->range_index != range_index ||
				nfit_memdev->memdev->region_index != dcr)
			continue;
		nfit_mem->memdev_bdw = nfit_memdev->memdev;
		idt_idx = nfit_memdev->memdev->interleave_index;
		list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
			if (nfit_idt->idt->interleave_index != idt_idx)
				continue;
			nfit_mem->idt_bdw = nfit_idt->idt;
			break;
		}
		break;
	}

	return 0;
	return 0;
}
}


@@ -439,9 +487,19 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
		}
		}


		if (type == NFIT_SPA_DCR) {
		if (type == NFIT_SPA_DCR) {
			struct nfit_idt *nfit_idt;
			u16 idt_idx;

			/* multiple dimms may share a SPA when interleaved */
			/* multiple dimms may share a SPA when interleaved */
			nfit_mem->spa_dcr = spa;
			nfit_mem->spa_dcr = spa;
			nfit_mem->memdev_dcr = nfit_memdev->memdev;
			nfit_mem->memdev_dcr = nfit_memdev->memdev;
			idt_idx = nfit_memdev->memdev->interleave_index;
			list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
				if (nfit_idt->idt->interleave_index != idt_idx)
					continue;
				nfit_mem->idt_dcr = nfit_idt->idt;
				break;
			}
		} else {
		} else {
			/*
			/*
			 * A single dimm may belong to multiple SPA-PM
			 * A single dimm may belong to multiple SPA-PM
@@ -871,6 +929,359 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
	return 0;
	return 0;
}
}


static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
{
	struct acpi_nfit_interleave *idt = mmio->idt;
	u32 sub_line_offset, line_index, line_offset;
	u64 line_no, table_skip_count, table_offset;

	line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
	table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
	line_offset = idt->line_offset[line_index]
		* mmio->line_size;
	table_offset = table_skip_count * mmio->table_size;

	return mmio->base_offset + line_offset + table_offset + sub_line_offset;
}

static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
{
	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
	u64 offset = nfit_blk->stat_offset + mmio->size * bw;

	if (mmio->num_lines)
		offset = to_interleave_offset(offset, mmio);

	return readq(mmio->base + offset);
}

static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
		resource_size_t dpa, unsigned int len, unsigned int write)
{
	u64 cmd, offset;
	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];

	enum {
		BCW_OFFSET_MASK = (1ULL << 48)-1,
		BCW_LEN_SHIFT = 48,
		BCW_LEN_MASK = (1ULL << 8) - 1,
		BCW_CMD_SHIFT = 56,
	};

	cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
	len = len >> L1_CACHE_SHIFT;
	cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
	cmd |= ((u64) write) << BCW_CMD_SHIFT;

	offset = nfit_blk->cmd_offset + mmio->size * bw;
	if (mmio->num_lines)
		offset = to_interleave_offset(offset, mmio);

	writeq(cmd, mmio->base + offset);
	/* FIXME: conditionally perform read-back if mandated by firmware */
}

static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
		resource_size_t dpa, void *iobuf, size_t len, int rw,
		unsigned int lane)
{
	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
	unsigned int copied = 0;
	u64 base_offset;
	int rc;

	base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
		+ lane * mmio->size;
	/* TODO: non-temporal access, flush hints, cache management etc... */
	write_blk_ctl(nfit_blk, lane, dpa, len, rw);
	while (len) {
		unsigned int c;
		u64 offset;

		if (mmio->num_lines) {
			u32 line_offset;

			offset = to_interleave_offset(base_offset + copied,
					mmio);
			div_u64_rem(offset, mmio->line_size, &line_offset);
			c = min_t(size_t, len, mmio->line_size - line_offset);
		} else {
			offset = base_offset + nfit_blk->bdw_offset;
			c = len;
		}

		if (rw)
			memcpy(mmio->aperture + offset, iobuf + copied, c);
		else
			memcpy(iobuf + copied, mmio->aperture + offset, c);

		copied += c;
		len -= c;
	}
	rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
	return rc;
}

static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
		resource_size_t dpa, void *iobuf, u64 len, int rw)
{
	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
	struct nd_region *nd_region = nfit_blk->nd_region;
	unsigned int lane, copied = 0;
	int rc = 0;

	lane = nd_region_acquire_lane(nd_region);
	while (len) {
		u64 c = min(len, mmio->size);

		rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
				iobuf + copied, c, rw, lane);
		if (rc)
			break;

		copied += c;
		len -= c;
	}
	nd_region_release_lane(nd_region, lane);

	return rc;
}

static void nfit_spa_mapping_release(struct kref *kref)
{
	struct nfit_spa_mapping *spa_map = to_spa_map(kref);
	struct acpi_nfit_system_address *spa = spa_map->spa;
	struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;

	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
	dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
	iounmap(spa_map->iomem);
	release_mem_region(spa->address, spa->length);
	list_del(&spa_map->list);
	kfree(spa_map);
}

static struct nfit_spa_mapping *find_spa_mapping(
		struct acpi_nfit_desc *acpi_desc,
		struct acpi_nfit_system_address *spa)
{
	struct nfit_spa_mapping *spa_map;

	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
	list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
		if (spa_map->spa == spa)
			return spa_map;

	return NULL;
}

static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
		struct acpi_nfit_system_address *spa)
{
	struct nfit_spa_mapping *spa_map;

	mutex_lock(&acpi_desc->spa_map_mutex);
	spa_map = find_spa_mapping(acpi_desc, spa);

	if (spa_map)
		kref_put(&spa_map->kref, nfit_spa_mapping_release);
	mutex_unlock(&acpi_desc->spa_map_mutex);
}

static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
		struct acpi_nfit_system_address *spa)
{
	resource_size_t start = spa->address;
	resource_size_t n = spa->length;
	struct nfit_spa_mapping *spa_map;
	struct resource *res;

	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));

	spa_map = find_spa_mapping(acpi_desc, spa);
	if (spa_map) {
		kref_get(&spa_map->kref);
		return spa_map->iomem;
	}

	spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
	if (!spa_map)
		return NULL;

	INIT_LIST_HEAD(&spa_map->list);
	spa_map->spa = spa;
	kref_init(&spa_map->kref);
	spa_map->acpi_desc = acpi_desc;

	res = request_mem_region(start, n, dev_name(acpi_desc->dev));
	if (!res)
		goto err_mem;

	/* TODO: cacheability based on the spa type */
	spa_map->iomem = ioremap_nocache(start, n);
	if (!spa_map->iomem)
		goto err_map;

	list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
	return spa_map->iomem;

 err_map:
	release_mem_region(start, n);
 err_mem:
	kfree(spa_map);
	return NULL;
}

/**
 * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
 * @nvdimm_bus: NFIT-bus that provided the spa table entry
 * @nfit_spa: spa table to map
 *
 * In the case where block-data-window apertures and
 * dimm-control-regions are interleaved they will end up sharing a
 * single request_mem_region() + ioremap() for the address range.  In
 * the style of devm nfit_spa_map() mappings are automatically dropped
 * when all region devices referencing the same mapping are disabled /
 * unbound.
 */
static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
		struct acpi_nfit_system_address *spa)
{
	void __iomem *iomem;

	mutex_lock(&acpi_desc->spa_map_mutex);
	iomem = __nfit_spa_map(acpi_desc, spa);
	mutex_unlock(&acpi_desc->spa_map_mutex);

	return iomem;
}

static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
		struct acpi_nfit_interleave *idt, u16 interleave_ways)
{
	if (idt) {
		mmio->num_lines = idt->line_count;
		mmio->line_size = idt->line_size;
		if (interleave_ways == 0)
			return -ENXIO;
		mmio->table_size = mmio->num_lines * interleave_ways
			* mmio->line_size;
	}

	return 0;
}

static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
		struct device *dev)
{
	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
	struct nfit_blk_mmio *mmio;
	struct nfit_blk *nfit_blk;
	struct nfit_mem *nfit_mem;
	struct nvdimm *nvdimm;
	int rc;

	nvdimm = nd_blk_region_to_dimm(ndbr);
	nfit_mem = nvdimm_provider_data(nvdimm);
	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
				nfit_mem ? "" : " nfit_mem",
				nfit_mem->dcr ? "" : " dcr",
				nfit_mem->bdw ? "" : " bdw");
		return -ENXIO;
	}

	nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
	if (!nfit_blk)
		return -ENOMEM;
	nd_blk_region_set_provider_data(ndbr, nfit_blk);
	nfit_blk->nd_region = to_nd_region(dev);

	/* map block aperture memory */
	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
	mmio = &nfit_blk->mmio[BDW];
	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw);
	if (!mmio->base) {
		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
				nvdimm_name(nvdimm));
		return -ENOMEM;
	}
	mmio->size = nfit_mem->bdw->size;
	mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
	mmio->idt = nfit_mem->idt_bdw;
	mmio->spa = nfit_mem->spa_bdw;
	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
			nfit_mem->memdev_bdw->interleave_ways);
	if (rc) {
		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
				__func__, nvdimm_name(nvdimm));
		return rc;
	}

	/* map block control memory */
	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
	mmio = &nfit_blk->mmio[DCR];
	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr);
	if (!mmio->base) {
		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
				nvdimm_name(nvdimm));
		return -ENOMEM;
	}
	mmio->size = nfit_mem->dcr->window_size;
	mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
	mmio->idt = nfit_mem->idt_dcr;
	mmio->spa = nfit_mem->spa_dcr;
	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
			nfit_mem->memdev_dcr->interleave_ways);
	if (rc) {
		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
				__func__, nvdimm_name(nvdimm));
		return rc;
	}

	if (mmio->line_size == 0)
		return 0;

	if ((u32) nfit_blk->cmd_offset % mmio->line_size
			+ 8 > mmio->line_size) {
		dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
		return -ENXIO;
	} else if ((u32) nfit_blk->stat_offset % mmio->line_size
			+ 8 > mmio->line_size) {
		dev_dbg(dev, "stat_offset crosses interleave boundary\n");
		return -ENXIO;
	}

	return 0;
}

static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
		struct device *dev)
{
	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
	int i;

	if (!nfit_blk)
		return; /* never enabled */

	/* auto-free BLK spa mappings */
	for (i = 0; i < 2; i++) {
		struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];

		if (mmio->base)
			nfit_spa_unmap(acpi_desc, mmio->spa);
	}
	nd_blk_region_set_provider_data(ndbr, NULL);
	/* devm will free nfit_blk */
}

static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
		struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
		struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
		struct acpi_nfit_memory_map *memdev,
		struct acpi_nfit_memory_map *memdev,
@@ -878,6 +1289,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
{
{
	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
			memdev->device_handle);
			memdev->device_handle);
	struct nd_blk_region_desc *ndbr_desc;
	struct nfit_mem *nfit_mem;
	struct nfit_mem *nfit_mem;
	int blk_valid = 0;
	int blk_valid = 0;


@@ -908,6 +1320,10 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,


		ndr_desc->nd_mapping = nd_mapping;
		ndr_desc->nd_mapping = nd_mapping;
		ndr_desc->num_mappings = blk_valid;
		ndr_desc->num_mappings = blk_valid;
		ndbr_desc = to_blk_region_desc(ndr_desc);
		ndbr_desc->enable = acpi_nfit_blk_region_enable;
		ndbr_desc->disable = acpi_nfit_blk_region_disable;
		ndbr_desc->do_io = acpi_nfit_blk_region_do_io;
		if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc))
		if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc))
			return -ENOMEM;
			return -ENOMEM;
		break;
		break;
@@ -921,8 +1337,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
{
{
	static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
	static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
	struct acpi_nfit_system_address *spa = nfit_spa->spa;
	struct acpi_nfit_system_address *spa = nfit_spa->spa;
	struct nd_blk_region_desc ndbr_desc;
	struct nd_region_desc *ndr_desc;
	struct nfit_memdev *nfit_memdev;
	struct nfit_memdev *nfit_memdev;
	struct nd_region_desc ndr_desc;
	struct nvdimm_bus *nvdimm_bus;
	struct nvdimm_bus *nvdimm_bus;
	struct resource res;
	struct resource res;
	int count = 0, rc;
	int count = 0, rc;
@@ -935,12 +1352,13 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,


	memset(&res, 0, sizeof(res));
	memset(&res, 0, sizeof(res));
	memset(&nd_mappings, 0, sizeof(nd_mappings));
	memset(&nd_mappings, 0, sizeof(nd_mappings));
	memset(&ndr_desc, 0, sizeof(ndr_desc));
	memset(&ndbr_desc, 0, sizeof(ndbr_desc));
	res.start = spa->address;
	res.start = spa->address;
	res.end = res.start + spa->length - 1;
	res.end = res.start + spa->length - 1;
	ndr_desc.res = &res;
	ndr_desc = &ndbr_desc.ndr_desc;
	ndr_desc.provider_data = nfit_spa;
	ndr_desc->res = &res;
	ndr_desc.attr_groups = acpi_nfit_region_attribute_groups;
	ndr_desc->provider_data = nfit_spa;
	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
		struct nd_mapping *nd_mapping;
		struct nd_mapping *nd_mapping;
@@ -953,24 +1371,24 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
			return -ENXIO;
			return -ENXIO;
		}
		}
		nd_mapping = &nd_mappings[count++];
		nd_mapping = &nd_mappings[count++];
		rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, &ndr_desc,
		rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
				memdev, spa);
				memdev, spa);
		if (rc)
		if (rc)
			return rc;
			return rc;
	}
	}


	ndr_desc.nd_mapping = nd_mappings;
	ndr_desc->nd_mapping = nd_mappings;
	ndr_desc.num_mappings = count;
	ndr_desc->num_mappings = count;
	rc = acpi_nfit_init_interleave_set(acpi_desc, &ndr_desc, spa);
	rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
	if (rc)
	if (rc)
		return rc;
		return rc;


	nvdimm_bus = acpi_desc->nvdimm_bus;
	nvdimm_bus = acpi_desc->nvdimm_bus;
	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
		if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
			return -ENOMEM;
			return -ENOMEM;
	} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
	} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
		if (!nvdimm_volatile_region_create(nvdimm_bus, &ndr_desc))
		if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc))
			return -ENOMEM;
			return -ENOMEM;
	}
	}
	return 0;
	return 0;
@@ -996,11 +1414,14 @@ static int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
	u8 *data;
	u8 *data;
	int rc;
	int rc;


	INIT_LIST_HEAD(&acpi_desc->spa_maps);
	INIT_LIST_HEAD(&acpi_desc->spas);
	INIT_LIST_HEAD(&acpi_desc->spas);
	INIT_LIST_HEAD(&acpi_desc->dcrs);
	INIT_LIST_HEAD(&acpi_desc->dcrs);
	INIT_LIST_HEAD(&acpi_desc->bdws);
	INIT_LIST_HEAD(&acpi_desc->bdws);
	INIT_LIST_HEAD(&acpi_desc->idts);
	INIT_LIST_HEAD(&acpi_desc->memdevs);
	INIT_LIST_HEAD(&acpi_desc->memdevs);
	INIT_LIST_HEAD(&acpi_desc->dimms);
	INIT_LIST_HEAD(&acpi_desc->dimms);
	mutex_init(&acpi_desc->spa_map_mutex);


	data = (u8 *) acpi_desc->nfit;
	data = (u8 *) acpi_desc->nfit;
	end = data + sz;
	end = data + sz;
+49 −0
Original line number Original line Diff line number Diff line
@@ -52,6 +52,11 @@ struct nfit_bdw {
	struct list_head list;
	struct list_head list;
};
};


struct nfit_idt {
	struct acpi_nfit_interleave *idt;
	struct list_head list;
};

struct nfit_memdev {
struct nfit_memdev {
	struct acpi_nfit_memory_map *memdev;
	struct acpi_nfit_memory_map *memdev;
	struct list_head list;
	struct list_head list;
@@ -62,10 +67,13 @@ struct nfit_mem {
	struct nvdimm *nvdimm;
	struct nvdimm *nvdimm;
	struct acpi_nfit_memory_map *memdev_dcr;
	struct acpi_nfit_memory_map *memdev_dcr;
	struct acpi_nfit_memory_map *memdev_pmem;
	struct acpi_nfit_memory_map *memdev_pmem;
	struct acpi_nfit_memory_map *memdev_bdw;
	struct acpi_nfit_control_region *dcr;
	struct acpi_nfit_control_region *dcr;
	struct acpi_nfit_data_region *bdw;
	struct acpi_nfit_data_region *bdw;
	struct acpi_nfit_system_address *spa_dcr;
	struct acpi_nfit_system_address *spa_dcr;
	struct acpi_nfit_system_address *spa_bdw;
	struct acpi_nfit_system_address *spa_bdw;
	struct acpi_nfit_interleave *idt_dcr;
	struct acpi_nfit_interleave *idt_bdw;
	struct list_head list;
	struct list_head list;
	struct acpi_device *adev;
	struct acpi_device *adev;
	unsigned long dsm_mask;
	unsigned long dsm_mask;
@@ -74,16 +82,57 @@ struct nfit_mem {
struct acpi_nfit_desc {
struct acpi_nfit_desc {
	struct nvdimm_bus_descriptor nd_desc;
	struct nvdimm_bus_descriptor nd_desc;
	struct acpi_table_nfit *nfit;
	struct acpi_table_nfit *nfit;
	struct mutex spa_map_mutex;
	struct list_head spa_maps;
	struct list_head memdevs;
	struct list_head memdevs;
	struct list_head dimms;
	struct list_head dimms;
	struct list_head spas;
	struct list_head spas;
	struct list_head dcrs;
	struct list_head dcrs;
	struct list_head bdws;
	struct list_head bdws;
	struct list_head idts;
	struct nvdimm_bus *nvdimm_bus;
	struct nvdimm_bus *nvdimm_bus;
	struct device *dev;
	struct device *dev;
	unsigned long dimm_dsm_force_en;
	unsigned long dimm_dsm_force_en;
};
};


enum nd_blk_mmio_selector {
	BDW,
	DCR,
};

struct nfit_blk {
	struct nfit_blk_mmio {
		union {
			void __iomem *base;
			void *aperture;
		};
		u64 size;
		u64 base_offset;
		u32 line_size;
		u32 num_lines;
		u32 table_size;
		struct acpi_nfit_interleave *idt;
		struct acpi_nfit_system_address *spa;
	} mmio[2];
	struct nd_region *nd_region;
	u64 bdw_offset; /* post interleave offset */
	u64 stat_offset;
	u64 cmd_offset;
};

struct nfit_spa_mapping {
	struct acpi_nfit_desc *acpi_desc;
	struct acpi_nfit_system_address *spa;
	struct list_head list;
	struct kref kref;
	void __iomem *iomem;
};

static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
{
	return container_of(kref, struct nfit_spa_mapping, kref);
}

static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
		struct nfit_mem *nfit_mem)
		struct nfit_mem *nfit_mem)
{
{
+13 −0
Original line number Original line Diff line number Diff line
@@ -34,6 +34,19 @@ config BLK_DEV_PMEM


	  Say Y if you want to use an NVDIMM
	  Say Y if you want to use an NVDIMM


config ND_BLK
	tristate "BLK: Block data window (aperture) device support"
	default LIBNVDIMM
	select ND_BTT if BTT
	help
	  Support NVDIMMs, or other devices, that implement a BLK-mode
	  access capability.  BLK-mode access uses memory-mapped-i/o
	  apertures to access persistent media.

	  Say Y if your platform firmware emits an ACPI.NFIT table
	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
	  capabilities.

config ND_BTT
config ND_BTT
	tristate
	tristate


+3 −0
Original line number Original line Diff line number Diff line
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o


nd_pmem-y := pmem.o
nd_pmem-y := pmem.o


nd_btt-y := btt.o
nd_btt-y := btt.o


nd_blk-y := blk.o

libnvdimm-y := core.o
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
libnvdimm-y += dimm_devs.o

drivers/nvdimm/blk.c

0 → 100644
+245 −0
Original line number Original line Diff line number Diff line
/*
 * NVDIMM Block Window Driver
 * Copyright (c) 2014, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/nd.h>
#include <linux/sizes.h>
#include "nd.h"

struct nd_blk_device {
	struct request_queue *queue;
	struct gendisk *disk;
	struct nd_namespace_blk *nsblk;
	struct nd_blk_region *ndbr;
	size_t disk_size;
};

static int nd_blk_major;

static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
				resource_size_t ns_offset, unsigned int len)
{
	int i;

	for (i = 0; i < nsblk->num_resources; i++) {
		if (ns_offset < resource_size(nsblk->res[i])) {
			if (ns_offset + len > resource_size(nsblk->res[i])) {
				dev_WARN_ONCE(&nsblk->common.dev, 1,
					"illegal request\n");
				return SIZE_MAX;
			}
			return nsblk->res[i]->start + ns_offset;
		}
		ns_offset -= resource_size(nsblk->res[i]);
	}

	dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
	return SIZE_MAX;
}

static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
{
	struct block_device *bdev = bio->bi_bdev;
	struct gendisk *disk = bdev->bd_disk;
	struct nd_namespace_blk *nsblk;
	struct nd_blk_device *blk_dev;
	struct nd_blk_region *ndbr;
	struct bvec_iter iter;
	struct bio_vec bvec;
	int err = 0, rw;

	blk_dev = disk->private_data;
	nsblk = blk_dev->nsblk;
	ndbr = blk_dev->ndbr;
	rw = bio_data_dir(bio);
	bio_for_each_segment(bvec, bio, iter) {
		unsigned int len = bvec.bv_len;
		resource_size_t	dev_offset;
		void *iobuf;

		BUG_ON(len > PAGE_SIZE);

		dev_offset = to_dev_offset(nsblk,
				iter.bi_sector << SECTOR_SHIFT, len);
		if (dev_offset == SIZE_MAX) {
			err = -EIO;
			goto out;
		}

		iobuf = kmap_atomic(bvec.bv_page);
		err = ndbr->do_io(ndbr, dev_offset, iobuf + bvec.bv_offset,
				len, rw);
		kunmap_atomic(iobuf);
		if (err)
			goto out;
	}

 out:
	bio_endio(bio, err);
}

static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
		resource_size_t offset, void *iobuf, size_t n, int rw)
{
	struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
	struct nd_namespace_blk *nsblk = blk_dev->nsblk;
	struct nd_blk_region *ndbr = blk_dev->ndbr;
	resource_size_t	dev_offset;

	dev_offset = to_dev_offset(nsblk, offset, n);

	if (unlikely(offset + n > blk_dev->disk_size)) {
		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
		return -EFAULT;
	}

	if (dev_offset == SIZE_MAX)
		return -EIO;

	return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
}

static const struct block_device_operations nd_blk_fops = {
	.owner = THIS_MODULE,
};

static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
		struct nd_blk_device *blk_dev)
{
	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
	struct gendisk *disk;

	blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
	if (!blk_dev->queue)
		return -ENOMEM;

	blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
	blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
	blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
	blk_queue_logical_block_size(blk_dev->queue, nsblk->lbasize);
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);

	disk = blk_dev->disk = alloc_disk(0);
	if (!disk) {
		blk_cleanup_queue(blk_dev->queue);
		return -ENOMEM;
	}

	disk->driverfs_dev	= &ndns->dev;
	disk->major		= nd_blk_major;
	disk->first_minor	= 0;
	disk->fops		= &nd_blk_fops;
	disk->private_data	= blk_dev;
	disk->queue		= blk_dev->queue;
	disk->flags		= GENHD_FL_EXT_DEVT;
	nvdimm_namespace_disk_name(ndns, disk->disk_name);
	set_capacity(disk, blk_dev->disk_size >> SECTOR_SHIFT);
	add_disk(disk);

	return 0;
}

static int nd_blk_probe(struct device *dev)
{
	struct nd_namespace_common *ndns;
	struct nd_blk_device *blk_dev;
	int rc;

	ndns = nvdimm_namespace_common_probe(dev);
	if (IS_ERR(ndns))
		return PTR_ERR(ndns);

	blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
	if (!blk_dev)
		return -ENOMEM;

	blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
	blk_dev->ndbr = to_nd_blk_region(dev->parent);
	blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
	dev_set_drvdata(dev, blk_dev);

	ndns->rw_bytes = nd_blk_rw_bytes;
	if (is_nd_btt(dev))
		rc = nvdimm_namespace_attach_btt(ndns);
	else if (nd_btt_probe(ndns, blk_dev) == 0) {
		/* we'll come back as btt-blk */
		rc = -ENXIO;
	} else
		rc = nd_blk_attach_disk(ndns, blk_dev);
	if (rc)
		kfree(blk_dev);
	return rc;
}

static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
{
	del_gendisk(blk_dev->disk);
	put_disk(blk_dev->disk);
	blk_cleanup_queue(blk_dev->queue);
}

static int nd_blk_remove(struct device *dev)
{
	struct nd_blk_device *blk_dev = dev_get_drvdata(dev);

	if (is_nd_btt(dev))
		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
	else
		nd_blk_detach_disk(blk_dev);
	kfree(blk_dev);

	return 0;
}

static struct nd_device_driver nd_blk_driver = {
	.probe = nd_blk_probe,
	.remove = nd_blk_remove,
	.drv = {
		.name = "nd_blk",
	},
	.type = ND_DRIVER_NAMESPACE_BLK,
};

static int __init nd_blk_init(void)
{
	int rc;

	rc = register_blkdev(0, "nd_blk");
	if (rc < 0)
		return rc;

	nd_blk_major = rc;
	rc = nd_driver_register(&nd_blk_driver);

	if (rc < 0)
		unregister_blkdev(nd_blk_major, "nd_blk");

	return rc;
}

static void __exit nd_blk_exit(void)
{
	driver_unregister(&nd_blk_driver.drv);
	unregister_blkdev(nd_blk_major, "nd_blk");
}

MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
module_init(nd_blk_init);
module_exit(nd_blk_exit);
Loading