Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d5ff0814 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull nvdimm fixes from Dan Williams:
 "A small crop of lockdep, sleeping while atomic, and other fixes /
  band-aids in advance of the full-blown reworks targeting the next
  merge window. The largest change here is "libnvdimm: fix blk free
  space accounting" which deletes a pile of buggy code that better
  testing would have caught before merging. The next change that is
  borderline too big for a late rc is switching the device-dax locking
  from rcu to srcu, I couldn't think of a smaller way to make that fix.

  The __copy_user_nocache fix will have a full replacement in 4.12 to
  move those pmem special case considerations into the pmem driver. The
  "libnvdimm: band aid btt vs clear poison locking" commit admits that
  our error clearing support for btt went in broken, so we just disable
  it in 4.11 and -stable. A replacement / full fix is in the pipeline
  for 4.12

  Some of these would have been caught earlier had DEBUG_ATOMIC_SLEEP
  been enabled on my development station. I wonder if we should have:

      config DEBUG_ATOMIC_SLEEP
        default PROVE_LOCKING

  ...since I mistakenly thought I got both with PROVE_LOCKING=y.

  These have received a build success notification from the 0day robot,
  and some have appeared in a -next release with no reported issues"

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  x86, pmem: fix broken __copy_user_nocache cache-bypass assumptions
  device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation
  libnvdimm: band aid btt vs clear poison locking
  libnvdimm: fix reconfig_mutex, mmap_sem, and jbd2_handle lockdep splat
  libnvdimm: fix blk free space accounting
  acpi, nfit, libnvdimm: fix interleave set cookie calculation (64-bit comparison)
parents 403a39f8 11e63f6d
Loading
Loading
Loading
Loading
+31 −11
Original line number Diff line number Diff line
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 * @size:	number of bytes to write back
 *
 * Write back a cache range using the CLWB (cache line write back)
 * instruction.
 * instruction. Note that @size is internally rounded up to be cache
 * line size aligned.
 */
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
		clwb(p);
}

/*
 * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
 * iterators, so for other types (bvec & kvec) we must do a cache write-back.
 */
static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
{
	return iter_is_iovec(i) == false;
}

/**
 * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
 * @addr:	PMEM destination address
@@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
	/* TODO: skip the write-back by always using non-temporal stores */
	len = copy_from_iter_nocache(addr, bytes, i);

	if (__iter_needs_pmem_wb(i))
	/*
	 * In the iovec case on x86_64 copy_from_iter_nocache() uses
	 * non-temporal stores for the bulk of the transfer, but we need
	 * to manually flush if the transfer is unaligned. A cached
	 * memory copy is used when destination or size is not naturally
	 * aligned. That is:
	 *   - Require 8-byte alignment when size is 8 bytes or larger.
	 *   - Require 4-byte alignment when size is 4 bytes.
	 *
	 * In the non-iovec case the entire destination needs to be
	 * flushed.
	 */
	if (iter_is_iovec(i)) {
		unsigned long flushed, dest = (unsigned long) addr;

		if (bytes < 8) {
			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
				arch_wb_cache_pmem(addr, 1);
		} else {
			if (!IS_ALIGNED(dest, 8)) {
				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
				arch_wb_cache_pmem(addr, 1);
			}

			flushed = dest - (unsigned long) addr;
			if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
				arch_wb_cache_pmem(addr + bytes - 1, 1);
		}
	} else
		arch_wb_cache_pmem(addr, bytes);

	return len;
+5 −1
Original line number Diff line number Diff line
@@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1)
	const struct nfit_set_info_map *map0 = m0;
	const struct nfit_set_info_map *map1 = m1;

	return map0->region_offset - map1->region_offset;
	if (map0->region_offset < map1->region_offset)
		return -1;
	else if (map0->region_offset > map1->region_offset)
		return 1;
	return 0;
}

/* Retrieve the nth entry referencing this spa */
+1 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
	tristate "DAX: direct access to differentiated memory"
	default m if NVDIMM_DAX
	depends on TRANSPARENT_HUGEPAGE
	select SRCU
	help
	  Support raw access to differentiated (persistence, bandwidth,
	  latency...) memory via an mmap(2) capable character
+7 −6
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include "dax.h"

static dev_t dax_devt;
DEFINE_STATIC_SRCU(dax_srcu);
static struct class *dax_class;
static DEFINE_IDA(dax_minor_ida);
static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -60,7 +61,7 @@ struct dax_region {
 * @region - parent region
 * @dev - device backing the character device
 * @cdev - core chardev data
 * @alive - !alive + rcu grace period == no new mappings can be established
 * @alive - !alive + srcu grace period == no new mappings can be established
 * @id - child id in the region
 * @num_resources - number of physical address extents in this device
 * @res - array of physical address ranges
@@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
static int dax_dev_huge_fault(struct vm_fault *vmf,
		enum page_entry_size pe_size)
{
	int rc;
	int rc, id;
	struct file *filp = vmf->vma->vm_file;
	struct dax_dev *dax_dev = filp->private_data;

@@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
			? "write" : "read",
			vmf->vma->vm_start, vmf->vma->vm_end);

	rcu_read_lock();
	id = srcu_read_lock(&dax_srcu);
	switch (pe_size) {
	case PE_SIZE_PTE:
		rc = __dax_dev_pte_fault(dax_dev, vmf);
@@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
	default:
		return VM_FAULT_FALLBACK;
	}
	rcu_read_unlock();
	srcu_read_unlock(&dax_srcu, id);

	return rc;
}
@@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev)
	 * Note, rcu is not protecting the liveness of dax_dev, rcu is
	 * ensuring that any fault handlers that might have seen
	 * dax_dev->alive == true, have completed.  Any fault handlers
	 * that start after synchronize_rcu() has started will abort
	 * that start after synchronize_srcu() has started will abort
	 * upon seeing dax_dev->alive == false.
	 */
	dax_dev->alive = false;
	synchronize_rcu();
	synchronize_srcu(&dax_srcu);
	unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
	cdev_del(cdev);
	device_unregister(dev);
+6 −0
Original line number Diff line number Diff line
@@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
	if (rc < 0)
		goto out_unlock;
	nvdimm_bus_unlock(&nvdimm_bus->dev);

	if (copy_to_user(p, buf, buf_len))
		rc = -EFAULT;

	vfree(buf);
	return rc;

 out_unlock:
	nvdimm_bus_unlock(&nvdimm_bus->dev);
 out:
Loading