Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 61031952 authored by Ross Zwisler's avatar Ross Zwisler Committed by Dan Williams
Browse files

arch, x86: pmem api for ensuring durability of persistent memory updates

Based on an original patch by Ross Zwisler [1].

Writes to persistent memory have the potential to be posted to cpu
cache, cpu write buffers, and platform write buffers (memory controller)
before being committed to persistent media.  Provide apis,
memcpy_to_pmem(), wmb_pmem(), and memremap_pmem(), to write data to
pmem and assert that it is durable in PMEM (a persistent linear address
range).  A '__pmem' attribute is added so sparse can track proper usage
of pointers to pmem.

This continues the status quo of pmem being x86 only for 4.2, but
reworks to ioremap, and wider implementation of memremap() will enable
other archs in 4.3.

[1]: https://lists.01.org/pipermail/linux-nvdimm/2015-May/000932.html



Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
[djbw: various reworks]
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 74ae66c3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ config X86
	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
	select ARCH_HAS_FAST_MULTIPLIER
	select ARCH_HAS_GCOV_PROFILE_ALL
	select ARCH_HAS_PMEM_API
	select ARCH_MIGHT_HAVE_PC_PARPORT
	select ARCH_MIGHT_HAVE_PC_SERIO
	select HAVE_AOUT if X86_32
+72 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
/* Caches aren't brain-dead on the intel. */
#include <asm-generic/cacheflush.h>
#include <asm/special_insns.h>
#include <asm/uaccess.h>

/*
 * The set_memory_* API can be used to change various attributes of a virtual
@@ -104,4 +105,75 @@ static inline int rodata_test(void)
}
#endif

#ifdef ARCH_HAS_NOCACHE_UACCESS

/**
 * arch_memcpy_to_pmem - copy data to persistent memory
 * @dst: destination buffer for the copy
 * @src: source buffer for the copy
 * @n: length of the copy in bytes
 *
 * Copy data to persistent memory media via non-temporal stores so that
 * a subsequent arch_wmb_pmem() can flush cpu and memory controller
 * write buffers to guarantee durability.
 */
static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
		size_t n)
{
	int unwritten;

	/*
	 * We are copying between two kernel buffers, if
	 * __copy_from_user_inatomic_nocache() returns an error (page
	 * fault) we would have already reported a general protection fault
	 * before the WARN+BUG.
	 */
	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
			(void __user *) src, n);
	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
				__func__, dst, src, unwritten))
		BUG();
}

/**
 * arch_wmb_pmem - synchronize writes to persistent memory
 *
 * After a series of arch_memcpy_to_pmem() operations this drains data
 * from cpu write buffers and any platform (memory controller) buffers
 * to ensure that written data is durable on persistent memory media.
 */
static inline void arch_wmb_pmem(void)
{
	/*
	 * wmb() to 'sfence' all previous writes such that they are
	 * architecturally visible to 'pcommit'.  Note, that we've
	 * already arranged for pmem writes to avoid the cache via
	 * arch_memcpy_to_pmem().
	 */
	wmb();
	pcommit_sfence();
}

static inline bool __arch_has_wmb_pmem(void)
{
#ifdef CONFIG_X86_64
	/*
	 * We require that wmb() be an 'sfence', that is only guaranteed on
	 * 64-bit builds
	 */
	return static_cpu_has(X86_FEATURE_PCOMMIT);
#else
	return false;
#endif
}
#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
extern void arch_wmb_pmem(void);

static inline bool __arch_has_wmb_pmem(void)
{
	return false;
}
#endif

#endif /* _ASM_X86_CACHEFLUSH_H */
+6 −0
Original line number Diff line number Diff line
@@ -247,6 +247,12 @@ static inline void flush_write_buffers(void)
#endif
}

static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
	unsigned long size)
{
	return (void __force __pmem *) ioremap_cache(offset, size);
}

#endif /* __KERNEL__ */

extern void native_io_delay(void);
+20 −13
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/nd.h>
#include "nd.h"

@@ -32,7 +33,7 @@ struct pmem_device {

	/* One contiguous memory region per device */
	phys_addr_t		phys_addr;
	void			*virt_addr;
	void __pmem		*virt_addr;
	size_t			size;
};

@@ -44,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
{
	void *mem = kmap_atomic(page);
	size_t pmem_off = sector << 9;
	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;

	if (rw == READ) {
		memcpy(mem + off, pmem->virt_addr + pmem_off, len);
		memcpy_from_pmem(mem + off, pmem_addr, len);
		flush_dcache_page(page);
	} else {
		flush_dcache_page(page);
		memcpy(pmem->virt_addr + pmem_off, mem + off, len);
		memcpy_to_pmem(pmem_addr, mem + off, len);
	}

	kunmap_atomic(mem);
@@ -71,6 +73,10 @@ static void pmem_make_request(struct request_queue *q, struct bio *bio)
				bio_data_dir(bio), iter.bi_sector);
	if (do_acct)
		nd_iostat_end(bio, start);

	if (bio_data_dir(bio))
		wmb_pmem();

	bio_endio(bio, 0);
}

@@ -94,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
	if (!pmem)
		return -ENODEV;

	*kaddr = pmem->virt_addr + offset;
	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
	*kaddr = (void __force *) pmem->virt_addr + offset;
	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;

	return pmem->size - offset;
@@ -118,6 +125,8 @@ static struct pmem_device *pmem_alloc(struct device *dev,

	pmem->phys_addr = res->start;
	pmem->size = resource_size(res);
	if (!arch_has_pmem_api())
		dev_warn(dev, "unable to guarantee persistence of writes\n");

	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
@@ -126,11 +135,7 @@ static struct pmem_device *pmem_alloc(struct device *dev,
		return ERR_PTR(-EBUSY);
	}

	/*
	 * Map the memory as non-cachable, as we can't write back the contents
	 * of the CPU caches in case of a crash.
	 */
	pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
	pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
	if (!pmem->virt_addr) {
		release_mem_region(pmem->phys_addr, pmem->size);
		kfree(pmem);
@@ -195,16 +200,18 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
	}

	if (rw == READ)
		memcpy(buf, pmem->virt_addr + offset, size);
	else
		memcpy(pmem->virt_addr + offset, buf, size);
		memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
	else {
		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
		wmb_pmem();
	}

	return 0;
}

static void pmem_free(struct pmem_device *pmem)
{
	iounmap(pmem->virt_addr);
	memunmap_pmem(pmem->virt_addr);
	release_mem_region(pmem->phys_addr, pmem->size);
	kfree(pmem);
}
+2 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
# define __rcu		__attribute__((noderef, address_space(4)))
#else
# define __rcu
# define __pmem		__attribute__((noderef, address_space(5)))
#endif
extern void __chk_user_ptr(const volatile void __user *);
extern void __chk_io_ptr(const volatile void __iomem *);
@@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
# define __cond_lock(x,c) (c)
# define __percpu
# define __rcu
# define __pmem
#endif

/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
Loading