Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5ea6718b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull libnvdimm updates from Dan Williams:
 "The bulk of this has been in -next since before the merge window
  opened, with no known collisions / issues reported.

  The only detail worth noting, outside the summary below, is that the
  "libnvdimm-start-pad" topic has been truncated to just cleanups and
  small fixes. The full topic branch would have doubled down on hacks
  around the "section alignment" limitation of the core-mm, instead
  effort is now being spent to address that root issue in the memory
  hotplug implementation for v5.2.

   - Fix nfit-bus command submission regression

   - Support retrieval of short-ARS results if the ARS state is
     "requires continuation", and even if the "no_init_ars" module
     parameter is specified

   - Allow busy-polling of the kernel ARS state by allowing root to
     reset the exponential back-off timer

   - Filter potentially stale ARS results by tracking query-ARS relative
     to the previous start-ARS

   - Enhance dax_device alignment checks

   - Add support for the Hyper-V family of device-specific-methods
     (DSMs)

   - Add several fixes and workarounds for Hyper-V compatibility

   - Fix support to cache the dirty-shutdown-count at init"

* tag 'libnvdimm-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (25 commits)
  libnvdimm/namespace: Clean up holder_class_store()
  libnvdimm/of_pmem: Fix platform_no_drv_owner.cocci warnings
  acpi/nfit: Update NFIT flags error message
  libnvdimm/btt: Fix LBA masking during 'free list' population
  libnvdimm/btt: Remove unnecessary code in btt_freelist_init
  libnvdimm/pfn: Remove dax_label_reserve
  dax: Check the end of the block-device capacity with dax_direct_access()
  nfit/ars: Avoid stale ARS results
  nfit/ars: Allow root to busy-poll the ARS state machine
  nfit/ars: Introduce scrub_flags
  nfit/ars: Remove ars_start_flags
  nfit/ars: Attempt short-ARS even in the no_init_ars case
  nfit/ars: Attempt a short-ARS whenever the ARS state is idle at boot
  acpi/nfit: Require opt-in for read-only label configurations
  libnvdimm/pmem: Honor force_raw for legacy pmem regions
  libnvdimm/pfn: Account for PAGE_SIZE > info-block-size in nd_pfn_init()
  libnvdimm: Fix altmap reservation size calculation
  libnvdimm, pfn: Fix over-trim in trim_pfn_device()
  acpi/nfit: Fix bus command validation
  libnvdimm/dimm: Add a no-BLK quirk based on NVDIMM family
  ...
parents 3bb0f28d 4083014e
Loading
Loading
Loading
Loading
+8 −9
Original line number Diff line number Diff line
@@ -4643,10 +4643,11 @@ S: Maintained
F:	drivers/i2c/busses/i2c-diolan-u2c.c

FILESYSTEM DIRECT ACCESS (DAX)
M:	Matthew Wilcox <willy@infradead.org>
M:	Ross Zwisler <zwisler@kernel.org>
M:	Jan Kara <jack@suse.cz>
M:	Dan Williams <dan.j.williams@intel.com>
R:	Matthew Wilcox <willy@infradead.org>
R:	Jan Kara <jack@suse.cz>
L:	linux-fsdevel@vger.kernel.org
L:	linux-nvdimm@lists.01.org
S:	Supported
F:	fs/dax.c
F:	include/linux/dax.h
@@ -4654,9 +4655,9 @@ F: include/trace/events/fs_dax.h

DEVICE DIRECT ACCESS (DAX)
M:	Dan Williams <dan.j.williams@intel.com>
M:	Dave Jiang <dave.jiang@intel.com>
M:	Ross Zwisler <zwisler@kernel.org>
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Keith Busch <keith.busch@intel.com>
M:	Dave Jiang <dave.jiang@intel.com>
L:	linux-nvdimm@lists.01.org
S:	Supported
F:	drivers/dax/
@@ -8812,7 +8813,6 @@ S: Maintained
F:	tools/lib/lockdep/

LIBNVDIMM BLK: MMIO-APERTURE DRIVER
M:	Ross Zwisler <zwisler@kernel.org>
M:	Dan Williams <dan.j.williams@intel.com>
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Dave Jiang <dave.jiang@intel.com>
@@ -8825,7 +8825,6 @@ F: drivers/nvdimm/region_devs.c
LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Dan Williams <dan.j.williams@intel.com>
M:	Ross Zwisler <zwisler@kernel.org>
M:	Dave Jiang <dave.jiang@intel.com>
L:	linux-nvdimm@lists.01.org
Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
@@ -8833,7 +8832,6 @@ S: Supported
F:	drivers/nvdimm/btt*

LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
M:	Ross Zwisler <zwisler@kernel.org>
M:	Dan Williams <dan.j.williams@intel.com>
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Dave Jiang <dave.jiang@intel.com>
@@ -8852,9 +8850,10 @@ F: Documentation/devicetree/bindings/pmem/pmem-region.txt

LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
M:	Dan Williams <dan.j.williams@intel.com>
M:	Ross Zwisler <zwisler@kernel.org>
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Dave Jiang <dave.jiang@intel.com>
M:	Keith Busch <keith.busch@intel.com>
M:	Ira Weiny <ira.weiny@intel.com>
L:	linux-nvdimm@lists.01.org
Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
+127 −58
Original line number Diff line number Diff line
@@ -55,6 +55,10 @@ static bool no_init_ars;
module_param(no_init_ars, bool, 0644);
MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");

static bool force_labels;
module_param(force_labels, bool, 0444);
MODULE_PARM_DESC(force_labels, "Opt-in to labels despite missing methods");

LIST_HEAD(acpi_descs);
DEFINE_MUTEX(acpi_desc_lock);

@@ -415,7 +419,7 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd,
	if (call_pkg) {
		int i;

		if (nfit_mem->family != call_pkg->nd_family)
		if (nfit_mem && nfit_mem->family != call_pkg->nd_family)
			return -ENOTTY;

		for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
@@ -424,6 +428,10 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd,
		return call_pkg->nd_command;
	}

	/* In the !call_pkg case, bus commands == bus functions */
	if (!nfit_mem)
		return cmd;

	/* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */
	if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
		return cmd;
@@ -454,17 +462,18 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
	if (cmd_rc)
		*cmd_rc = -EINVAL;

	if (cmd == ND_CMD_CALL)
		call_pkg = buf;
	func = cmd_to_func(nfit_mem, cmd, call_pkg);
	if (func < 0)
		return func;

	if (nvdimm) {
		struct acpi_device *adev = nfit_mem->adev;

		if (!adev)
			return -ENOTTY;

		if (cmd == ND_CMD_CALL)
			call_pkg = buf;
		func = cmd_to_func(nfit_mem, cmd, call_pkg);
		if (func < 0)
			return func;
		dimm_name = nvdimm_name(nvdimm);
		cmd_name = nvdimm_cmd_name(cmd);
		cmd_mask = nvdimm_cmd_mask(nvdimm);
@@ -475,11 +484,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
	} else {
		struct acpi_device *adev = to_acpi_dev(acpi_desc);

		func = cmd;
		cmd_name = nvdimm_bus_cmd_name(cmd);
		cmd_mask = nd_desc->cmd_mask;
		dsm_mask = cmd_mask;
		if (cmd == ND_CMD_CALL)
		dsm_mask = nd_desc->bus_dsm_mask;
		desc = nd_cmd_bus_desc(cmd);
		guid = to_nfit_uuid(NFIT_DEV_BUS);
@@ -554,6 +560,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
		return -EINVAL;
	}

	if (out_obj->type != ACPI_TYPE_BUFFER) {
		dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
				dimm_name, cmd_name, out_obj->type);
		rc = -EINVAL;
		goto out;
	}

	if (call_pkg) {
		call_pkg->nd_fw_size = out_obj->buffer.length;
		memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
@@ -572,13 +585,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
		return 0;
	}

	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
		dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
				dimm_name, cmd_name, out_obj->type);
		rc = -EINVAL;
		goto out;
	}

	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
			cmd_name, out_obj->buffer.length);
	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
@@ -1317,19 +1323,30 @@ static ssize_t scrub_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nvdimm_bus_descriptor *nd_desc;
	struct acpi_nfit_desc *acpi_desc;
	ssize_t rc = -ENXIO;
	bool busy;

	device_lock(dev);
	nd_desc = dev_get_drvdata(dev);
	if (nd_desc) {
		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
	if (!nd_desc) {
		device_unlock(dev);
		return rc;
	}
	acpi_desc = to_acpi_desc(nd_desc);

	mutex_lock(&acpi_desc->init_mutex);
		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
				acpi_desc->scrub_busy
				&& !acpi_desc->cancel ? "+\n" : "\n");
		mutex_unlock(&acpi_desc->init_mutex);
	busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags)
		&& !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
	rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n");
	/* Allow an admin to poll the busy state at a higher rate */
	if (busy && capable(CAP_SYS_RAWIO) && !test_and_set_bit(ARS_POLL,
				&acpi_desc->scrub_flags)) {
		acpi_desc->scrub_tmo = 1;
		mod_delayed_work(nfit_wq, &acpi_desc->dwork, HZ);
	}

	mutex_unlock(&acpi_desc->init_mutex);
	device_unlock(dev);
	return rc;
}
@@ -1759,14 +1776,14 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)

__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
{
	struct device *dev = &nfit_mem->adev->dev;
	struct nd_intel_smart smart = { 0 };
	union acpi_object in_buf = {
		.type = ACPI_TYPE_BUFFER,
		.buffer.pointer = (char *) &smart,
		.buffer.length = sizeof(smart),
		.buffer.type = ACPI_TYPE_BUFFER,
		.buffer.length = 0,
	};
	union acpi_object in_obj = {
		.type = ACPI_TYPE_PACKAGE,
		.package.type = ACPI_TYPE_PACKAGE,
		.package.count = 1,
		.package.elements = &in_buf,
	};
@@ -1781,8 +1798,15 @@ __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
		return;

	out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
	if (!out_obj)
	if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER
			|| out_obj->buffer.length < sizeof(smart)) {
		dev_dbg(dev->parent, "%s: failed to retrieve initial health\n",
				dev_name(dev));
		ACPI_FREE(out_obj);
		return;
	}
	memcpy(&smart, out_obj->buffer.pointer, sizeof(smart));
	ACPI_FREE(out_obj);

	if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) {
		if (smart.shutdown_state)
@@ -1793,7 +1817,6 @@ __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
		set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
		nfit_mem->dirty_shutdown = smart.shutdown_count;
	}
	ACPI_FREE(out_obj);
}

static void populate_shutdown_status(struct nfit_mem *nfit_mem)
@@ -1861,9 +1884,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
	dev_set_drvdata(&adev_dimm->dev, nfit_mem);

	/*
	 * Until standardization materializes we need to consider 4
	 * different command sets.  Note, that checking for function0 (bit0)
	 * tells us if any commands are reachable through this GUID.
	 * There are 4 "legacy" NVDIMM command sets
	 * (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before
	 * an EFI working group was established to constrain this
	 * proliferation. The nfit driver probes for the supported command
	 * set by GUID. Note, if you're a platform developer looking to add
	 * a new command set to this probe, consider using an existing set,
	 * or otherwise seek approval to publish the command set at
	 * http://www.uefi.org/RFIC_LIST.
	 *
	 * Note, that checking for function0 (bit0) tells us if any commands
	 * are reachable through this GUID.
	 */
	for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1886,6 +1917,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
			dsm_mask &= ~(1 << 8);
	} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
		dsm_mask = 0xffffffff;
	} else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) {
		dsm_mask = 0x1f;
	} else {
		dev_dbg(dev, "unknown dimm command family\n");
		nfit_mem->family = -1;
@@ -1915,8 +1948,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
		| 1 << ND_CMD_SET_CONFIG_DATA;
	if (family == NVDIMM_FAMILY_INTEL
			&& (dsm_mask & label_mask) == label_mask)
		return 0;

		/* skip _LS{I,R,W} enabling */;
	else {
		if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
				&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
			dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
@@ -1929,6 +1962,20 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
			set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
		}

		/*
		 * Quirk read-only label configurations to preserve
		 * access to label-less namespaces by default.
		 */
		if (!test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
				&& !force_labels) {
			dev_dbg(dev, "%s: No _LSW, disable labels\n",
					dev_name(&adev_dimm->dev));
			clear_bit(NFIT_MEM_LSR, &nfit_mem->flags);
		} else
			dev_dbg(dev, "%s: Force enable labels\n",
					dev_name(&adev_dimm->dev));
	}

	populate_shutdown_status(nfit_mem);

	return 0;
@@ -2027,6 +2074,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
		}

		/* Quirk to ignore LOCAL for labels on HYPERV DIMMs */
		if (nfit_mem->family == NVDIMM_FAMILY_HYPERV)
			set_bit(NDD_NOBLK, &flags);

		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
@@ -2050,7 +2101,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
		if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
			continue;

		dev_info(acpi_desc->dev, "%s flags:%s%s%s%s%s\n",
		dev_err(acpi_desc->dev, "Error found in NVDIMM %s flags:%s%s%s%s%s\n",
				nvdimm_name(nvdimm),
		  mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
		  mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
@@ -2641,7 +2692,10 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc,

	if (rc < 0)
		return rc;
	if (cmd_rc < 0)
		return cmd_rc;
	set_bit(ARS_VALID, &acpi_desc->scrub_flags);
	return 0;
}

static int ars_continue(struct acpi_nfit_desc *acpi_desc)
@@ -2651,11 +2705,11 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc)
	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;

	memset(&ars_start, 0, sizeof(ars_start));
	ars_start.address = ars_status->restart_address;
	ars_start.length = ars_status->restart_length;
	ars_start.type = ars_status->type;
	ars_start.flags = acpi_desc->ars_start_flags;
	ars_start = (struct nd_cmd_ars_start) {
		.address = ars_status->restart_address,
		.length = ars_status->restart_length,
		.type = ars_status->type,
	};
	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
			sizeof(ars_start), &cmd_rc);
	if (rc < 0)
@@ -2734,6 +2788,17 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
	 */
	if (ars_status->out_length < 44)
		return 0;

	/*
	 * Ignore potentially stale results that are only refreshed
	 * after a start-ARS event.
	 */
	if (!test_and_clear_bit(ARS_VALID, &acpi_desc->scrub_flags)) {
		dev_dbg(acpi_desc->dev, "skip %d stale records\n",
				ars_status->num_records);
		return 0;
	}

	for (i = 0; i < ars_status->num_records; i++) {
		/* only process full records */
		if (ars_status->out_length
@@ -3004,14 +3069,16 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc,
{
	int rc;

	if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
	if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
		return acpi_nfit_register_region(acpi_desc, nfit_spa);

	set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
	if (!no_init_ars)
		set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);

	switch (acpi_nfit_query_poison(acpi_desc)) {
	case 0:
	case -ENOSPC:
	case -EAGAIN:
		rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
		/* shouldn't happen, try again later */
@@ -3036,7 +3103,6 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc,
		break;
	case -EBUSY:
	case -ENOMEM:
	case -ENOSPC:
		/*
		 * BIOS was using ARS, wait for it to complete (or
		 * resources to become available) and then perform our
@@ -3071,7 +3137,7 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,

	lockdep_assert_held(&acpi_desc->init_mutex);

	if (acpi_desc->cancel)
	if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags))
		return 0;

	if (query_rc == -EBUSY) {
@@ -3145,7 +3211,7 @@ static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo)
{
	lockdep_assert_held(&acpi_desc->init_mutex);

	acpi_desc->scrub_busy = 1;
	set_bit(ARS_BUSY, &acpi_desc->scrub_flags);
	/* note this should only be set from within the workqueue */
	if (tmo)
		acpi_desc->scrub_tmo = tmo;
@@ -3161,7 +3227,7 @@ static void notify_ars_done(struct acpi_nfit_desc *acpi_desc)
{
	lockdep_assert_held(&acpi_desc->init_mutex);

	acpi_desc->scrub_busy = 0;
	clear_bit(ARS_BUSY, &acpi_desc->scrub_flags);
	acpi_desc->scrub_count++;
	if (acpi_desc->scrub_count_state)
		sysfs_notify_dirent(acpi_desc->scrub_count_state);
@@ -3182,6 +3248,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
	else
		notify_ars_done(acpi_desc);
	memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
	clear_bit(ARS_POLL, &acpi_desc->scrub_flags);
	mutex_unlock(&acpi_desc->init_mutex);
}

@@ -3216,6 +3283,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
	struct nfit_spa *nfit_spa;
	int rc;

	set_bit(ARS_VALID, &acpi_desc->scrub_flags);
	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
		switch (nfit_spa_type(nfit_spa->spa)) {
		case NFIT_SPA_VOLATILE:
@@ -3450,7 +3518,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
	struct nfit_spa *nfit_spa;

	mutex_lock(&acpi_desc->init_mutex);
	if (acpi_desc->cancel) {
	if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) {
		mutex_unlock(&acpi_desc->init_mutex);
		return 0;
	}
@@ -3529,7 +3597,7 @@ void acpi_nfit_shutdown(void *data)
	mutex_unlock(&acpi_desc_lock);

	mutex_lock(&acpi_desc->init_mutex);
	acpi_desc->cancel = 1;
	set_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
	cancel_delayed_work_sync(&acpi_desc->dwork);
	mutex_unlock(&acpi_desc->init_mutex);

@@ -3729,6 +3797,7 @@ static __init int nfit_init(void)
	guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
	guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
	guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
	guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]);

	nfit_wq = create_singlethread_workqueue("nfit");
	if (!nfit_wq)
+13 −4
Original line number Diff line number Diff line
@@ -34,11 +34,14 @@
/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"

/* http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901") */
#define UUID_NFIT_DIMM_N_HYPERV "5746c5f2-a9a2-4264-ad0e-e4ddc9e09e80"

#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
		| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
		| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)

#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV

#define NVDIMM_STANDARD_CMDMASK \
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
@@ -94,6 +97,7 @@ enum nfit_uuids {
	NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
	NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
	NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
	NFIT_DEV_DIMM_N_HYPERV = NVDIMM_FAMILY_HYPERV,
	NFIT_SPA_VOLATILE,
	NFIT_SPA_PM,
	NFIT_SPA_DCR,
@@ -210,6 +214,13 @@ struct nfit_mem {
	int family;
};

enum scrub_flags {
	ARS_BUSY,
	ARS_CANCEL,
	ARS_VALID,
	ARS_POLL,
};

struct acpi_nfit_desc {
	struct nvdimm_bus_descriptor nd_desc;
	struct acpi_table_header acpi_header;
@@ -223,7 +234,6 @@ struct acpi_nfit_desc {
	struct list_head idts;
	struct nvdimm_bus *nvdimm_bus;
	struct device *dev;
	u8 ars_start_flags;
	struct nd_cmd_ars_status *ars_status;
	struct nfit_spa *scrub_spa;
	struct delayed_work dwork;
@@ -232,8 +242,7 @@ struct acpi_nfit_desc {
	unsigned int max_ars;
	unsigned int scrub_count;
	unsigned int scrub_mode;
	unsigned int scrub_busy:1;
	unsigned int cancel:1;
	unsigned long scrub_flags;
	unsigned long dimm_cmd_force_en;
	unsigned long bus_cmd_force_en;
	unsigned long bus_nfit_cmd_force_en;
+28 −10
Original line number Diff line number Diff line
@@ -86,12 +86,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
{
	struct dax_device *dax_dev;
	bool dax_enabled = false;
	pgoff_t pgoff, pgoff_end;
	struct request_queue *q;
	pgoff_t pgoff;
	int err, id;
	pfn_t pfn;
	long len;
	char buf[BDEVNAME_SIZE];
	void *kaddr, *end_kaddr;
	pfn_t pfn, end_pfn;
	sector_t last_page;
	long len, len2;
	int err, id;

	if (blocksize != PAGE_SIZE) {
		pr_debug("%s: error: unsupported blocksize for dax\n",
@@ -113,6 +115,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
		return false;
	}

	last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
	if (err) {
		pr_debug("%s: error: unaligned partition for dax\n",
				bdevname(bdev, buf));
		return false;
	}

	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
	if (!dax_dev) {
		pr_debug("%s: error: device does not support dax\n",
@@ -121,14 +131,15 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
	}

	id = dax_read_lock();
	len = dax_direct_access(dax_dev, pgoff, 1, NULL, &pfn);
	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
	dax_read_unlock(id);

	put_dax(dax_dev);

	if (len < 1) {
	if (len < 1 || len2 < 1) {
		pr_debug("%s: error: dax access failed (%ld)\n",
				bdevname(bdev, buf), len);
				bdevname(bdev, buf), len < 1 ? len : len2);
		return false;
	}

@@ -143,13 +154,20 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
		 */
		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
		dax_enabled = true;
	} else if (pfn_t_devmap(pfn)) {
		struct dev_pagemap *pgmap;
	} else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
		struct dev_pagemap *pgmap, *end_pgmap;

		pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
		if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
		end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
		if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
				&& pfn_t_to_page(pfn)->pgmap == pgmap
				&& pfn_t_to_page(end_pfn)->pgmap == pgmap
				&& pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
				&& pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
			dax_enabled = true;
		put_dev_pagemap(pgmap);
		put_dev_pagemap(end_pgmap);

	}

	if (!dax_enabled) {
+21 −12
Original line number Diff line number Diff line
@@ -541,9 +541,9 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)

static int btt_freelist_init(struct arena_info *arena)
{
	int old, new, ret;
	u32 i, map_entry;
	struct log_entry log_new, log_old;
	int new, ret;
	struct log_entry log_new;
	u32 i, map_entry, log_oldmap, log_newmap;

	arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
					GFP_KERNEL);
@@ -551,24 +551,26 @@ static int btt_freelist_init(struct arena_info *arena)
		return -ENOMEM;

	for (i = 0; i < arena->nfree; i++) {
		old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
		if (old < 0)
			return old;

		new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
		if (new < 0)
			return new;

		/* old and new map entries with any flags stripped out */
		log_oldmap = ent_lba(le32_to_cpu(log_new.old_map));
		log_newmap = ent_lba(le32_to_cpu(log_new.new_map));

		/* sub points to the next one to be overwritten */
		arena->freelist[i].sub = 1 - new;
		arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
		arena->freelist[i].block = le32_to_cpu(log_new.old_map);
		arena->freelist[i].block = log_oldmap;

		/*
		 * FIXME: if error clearing fails during init, we want to make
		 * the BTT read-only
		 */
		if (ent_e_flag(log_new.old_map)) {
		if (ent_e_flag(log_new.old_map) &&
				!ent_normal(log_new.old_map)) {
			arena->freelist[i].has_err = 1;
			ret = arena_clear_freelist_error(arena, i);
			if (ret)
				dev_err_ratelimited(to_dev(arena),
@@ -576,7 +578,7 @@ static int btt_freelist_init(struct arena_info *arena)
		}

		/* This implies a newly created or untouched flog entry */
		if (log_new.old_map == log_new.new_map)
		if (log_oldmap == log_newmap)
			continue;

		/* Check if map recovery is needed */
@@ -584,8 +586,15 @@ static int btt_freelist_init(struct arena_info *arena)
				NULL, NULL, 0);
		if (ret)
			return ret;
		if ((le32_to_cpu(log_new.new_map) != map_entry) &&
				(le32_to_cpu(log_new.old_map) == map_entry)) {

		/*
		 * The map_entry from btt_read_map is stripped of any flag bits,
		 * so use the stripped out versions from the log as well for
		 * testing whether recovery is needed. For restoration, use the
		 * 'raw' version of the log entries as that captured what we
		 * were going to write originally.
		 */
		if ((log_newmap != map_entry) && (log_oldmap == map_entry)) {
			/*
			 * Last transaction wrote the flog, but wasn't able
			 * to complete the map write. So fix up the map.
Loading