Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bf9bccc1 authored by Dan Williams's avatar Dan Williams
Browse files

libnvdimm: pmem label sets and namespace instantiation.



A complete label set is a PMEM-label per-dimm per-interleave-set where
all the UUIDs match and the interleave set cookie matches the hosting
interleave set.

Present sysfs attributes for manipulation of a PMEM-namespace's
'alt_name', 'uuid', and 'size' attributes.  A later patch will make
these settings persistent by writing back the label.

Note that PMEM allocations grow forwards from the start of an interleave
set (lowest dimm-physical-address (DPA)).  BLK-namespaces that alias
with a PMEM interleave set will grow allocations backward from the
highest DPA.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 4a826c83
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -97,6 +97,8 @@ static int nvdimm_bus_probe(struct device *dev)
	rc = nd_drv->probe(dev);
	if (rc == 0)
		nd_region_probe_success(nvdimm_bus, dev);
	else
		nd_region_disable(nvdimm_bus, dev);
	nvdimm_bus_probe_end(nvdimm_bus);

	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
@@ -381,8 +383,10 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
}
EXPORT_SYMBOL_GPL(nd_cmd_out_size);

static void wait_nvdimm_bus_probe_idle(struct nvdimm_bus *nvdimm_bus)
void wait_nvdimm_bus_probe_idle(struct device *dev)
{
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);

	do {
		if (nvdimm_bus->probe_active == 0)
			break;
@@ -402,7 +406,7 @@ static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
		return 0;

	nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
	wait_nvdimm_bus_probe_idle(nvdimm_bus);
	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);

	if (atomic_read(&nvdimm->busy))
		return -EBUSY;
+64 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/ctype.h>
#include <linux/ndctl.h>
#include <linux/mutex.h>
#include <linux/slab.h>
@@ -109,6 +110,69 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
	return NULL;
}

static bool is_uuid_sep(char sep)
{
	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
		return true;
	return false;
}

static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
		size_t len)
{
	const char *str = buf;
	u8 uuid[16];
	int i;

	for (i = 0; i < 16; i++) {
		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
					__func__, i, str - buf, str[0],
					str + 1 - buf, str[1]);
			return -EINVAL;
		}

		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
		str += 2;
		if (is_uuid_sep(*str))
			str++;
	}

	memcpy(uuid_out, uuid, sizeof(uuid));
	return 0;
}

/**
 * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
 * @dev: container device for the uuid property
 * @uuid_out: uuid buffer to replace
 * @buf: raw sysfs buffer to parse
 *
 * Enforce that uuids can only be changed while the device is disabled
 * (driver detached)
 * LOCKING: expects device_lock() is held on entry
 */
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
		size_t len)
{
	u8 uuid[16];
	int rc;

	if (dev->driver)
		return -EBUSY;

	rc = nd_uuid_parse(dev, uuid, buf, len);
	if (rc)
		return rc;

	kfree(*uuid_out);
	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
	if (!(*uuid_out))
		return -ENOMEM;

	return 0;
}

static ssize_t commands_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
+4 −17
Original line number Diff line number Diff line
@@ -21,18 +21,6 @@
#include "label.h"
#include "nd.h"

static void free_data(struct nvdimm_drvdata *ndd)
{
	if (!ndd)
		return;

	if (ndd->data && is_vmalloc_addr(ndd->data))
		vfree(ndd->data);
	else
		kfree(ndd->data);
	kfree(ndd);
}

static int nvdimm_probe(struct device *dev)
{
	struct nvdimm_drvdata *ndd;
@@ -49,6 +37,8 @@ static int nvdimm_probe(struct device *dev)
	ndd->dpa.start = 0;
	ndd->dpa.end = -1;
	ndd->dev = dev;
	get_device(dev);
	kref_init(&ndd->kref);

	rc = nvdimm_init_nsarea(ndd);
	if (rc)
@@ -74,21 +64,18 @@ static int nvdimm_probe(struct device *dev)
	return 0;

 err:
	free_data(ndd);
	put_ndd(ndd);
	return rc;
}

static int nvdimm_remove(struct device *dev)
{
	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
	struct resource *res, *_r;

	nvdimm_bus_lock(dev);
	dev_set_drvdata(dev, NULL);
	for_each_dpa_resource_safe(ndd, res, _r)
		nvdimm_free_dpa(ndd, res);
	nvdimm_bus_unlock(dev);
	free_data(ndd);
	put_ndd(ndd);

	return 0;
}
+137 −0
Original line number Diff line number Diff line
@@ -159,6 +159,48 @@ struct nvdimm *to_nvdimm(struct device *dev)
}
EXPORT_SYMBOL_GPL(to_nvdimm);

struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
{
	struct nvdimm *nvdimm = nd_mapping->nvdimm;

	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));

	return dev_get_drvdata(&nvdimm->dev);
}
EXPORT_SYMBOL(to_ndd);

void nvdimm_drvdata_release(struct kref *kref)
{
	struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
	struct device *dev = ndd->dev;
	struct resource *res, *_r;

	dev_dbg(dev, "%s\n", __func__);

	nvdimm_bus_lock(dev);
	for_each_dpa_resource_safe(ndd, res, _r)
		nvdimm_free_dpa(ndd, res);
	nvdimm_bus_unlock(dev);

	if (ndd->data && is_vmalloc_addr(ndd->data))
		vfree(ndd->data);
	else
		kfree(ndd->data);
	kfree(ndd);
	put_device(dev);
}

void get_ndd(struct nvdimm_drvdata *ndd)
{
	kref_get(&ndd->kref);
}

void put_ndd(struct nvdimm_drvdata *ndd)
{
	if (ndd)
		kref_put(&ndd->kref, nvdimm_drvdata_release);
}

const char *nvdimm_name(struct nvdimm *nvdimm)
{
	return dev_name(&nvdimm->dev);
@@ -247,6 +289,83 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
}
EXPORT_SYMBOL_GPL(nvdimm_create);

/**
 * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
 * @nd_mapping: container of dpa-resource-root + labels
 * @nd_region: constrain available space check to this reference region
 * @overlap: calculate available space assuming this level of overlap
 *
 * Validate that a PMEM label, if present, aligns with the start of an
 * interleave set and truncate the available size at the lowest BLK
 * overlap point.
 *
 * The expectation is that this routine is called multiple times as it
 * probes for the largest BLK encroachment for any single member DIMM of
 * the interleave set.  Once that value is determined the PMEM-limit for
 * the set can be established.
 */
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
		struct nd_mapping *nd_mapping, resource_size_t *overlap)
{
	resource_size_t map_start, map_end, busy = 0, available, blk_start;
	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
	struct resource *res;
	const char *reason;

	if (!ndd)
		return 0;

	map_start = nd_mapping->start;
	map_end = map_start + nd_mapping->size - 1;
	blk_start = max(map_start, map_end + 1 - *overlap);
	for_each_dpa_resource(ndd, res)
		if (res->start >= map_start && res->start < map_end) {
			if (strncmp(res->name, "blk", 3) == 0)
				blk_start = min(blk_start, res->start);
			else if (res->start != map_start) {
				reason = "misaligned to iset";
				goto err;
			} else {
				if (busy) {
					reason = "duplicate overlapping PMEM reservations?";
					goto err;
				}
				busy += resource_size(res);
				continue;
			}
		} else if (res->end >= map_start && res->end <= map_end) {
			if (strncmp(res->name, "blk", 3) == 0) {
				/*
				 * If a BLK allocation overlaps the start of
				 * PMEM the entire interleave set may now only
				 * be used for BLK.
				 */
				blk_start = map_start;
			} else {
				reason = "misaligned to iset";
				goto err;
			}
		} else if (map_start > res->start && map_start < res->end) {
			/* total eclipse of the mapping */
			busy += nd_mapping->size;
			blk_start = map_start;
		}

	*overlap = map_end + 1 - blk_start;
	available = blk_start - map_start;
	if (busy < available)
		return available - busy;
	return 0;

 err:
	/*
	 * Something is wrong, PMEM must align with the start of the
	 * interleave set, and there can only be one allocation per set.
	 */
	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
	return 0;
}

void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
{
	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
@@ -271,6 +390,24 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
	return res;
}

/**
 * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
 * @nvdimm: container of dpa-resource-root + labels
 * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
 */
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
		struct nd_label_id *label_id)
{
	resource_size_t allocated = 0;
	struct resource *res;

	for_each_dpa_resource(ndd, res)
		if (strcmp(res->name, label_id->id) == 0)
			allocated += resource_size(res);

	return allocated;
}

static int count_dimms(struct device *dev, void *c)
{
	int *count = c;
+54 −1
Original line number Diff line number Diff line
@@ -230,7 +230,7 @@ static bool preamble_current(struct nvdimm_drvdata *ndd,
	return true;
}

static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
{
	if (!label_id || !uuid)
		return NULL;
@@ -288,3 +288,56 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)

	return 0;
}

int nd_label_active_count(struct nvdimm_drvdata *ndd)
{
	struct nd_namespace_index *nsindex;
	unsigned long *free;
	u32 nslot, slot;
	int count = 0;

	if (!preamble_current(ndd, &nsindex, &free, &nslot))
		return 0;

	for_each_clear_bit_le(slot, free, nslot) {
		struct nd_namespace_label *nd_label;

		nd_label = nd_label_base(ndd) + slot;

		if (!slot_valid(nd_label, slot)) {
			u32 label_slot = __le32_to_cpu(nd_label->slot);
			u64 size = __le64_to_cpu(nd_label->rawsize);
			u64 dpa = __le64_to_cpu(nd_label->dpa);

			dev_dbg(ndd->dev,
				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
					__func__, slot, label_slot, dpa, size);
			continue;
		}
		count++;
	}
	return count;
}

struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
{
	struct nd_namespace_index *nsindex;
	unsigned long *free;
	u32 nslot, slot;

	if (!preamble_current(ndd, &nsindex, &free, &nslot))
		return NULL;

	for_each_clear_bit_le(slot, free, nslot) {
		struct nd_namespace_label *nd_label;

		nd_label = nd_label_base(ndd) + slot;
		if (!slot_valid(nd_label, slot))
			continue;

		if (n-- == 0)
			return nd_label_base(ndd) + slot;
	}

	return NULL;
}
Loading