Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0606263f authored by Dan Williams's avatar Dan Williams
Browse files

Merge branch 'for-4.8/libnvdimm' into libnvdimm-for-next

parents a7225598 d4c5725d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -395,7 +395,7 @@ prototypes:
	int (*release) (struct gendisk *, fmode_t);
	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
	int (*direct_access) (struct block_device *, sector_t, void __pmem **,
	int (*direct_access) (struct block_device *, sector_t, void **,
				unsigned long *);
	int (*media_changed) (struct gendisk *);
	void (*unlock_native_capacity) (struct gendisk *);
+9 −19
Original line number Diff line number Diff line
@@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read
only state using a flag in the info block.


5. In-kernel usage
==================
5. Usage
========

Any block driver that supports byte granularity IO to the storage may register
with the BTT. It will have to provide the rw_bytes interface in its
block_device_operations struct:
The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
(pmem, or blk mode). The easiest way to set up such a namespace is using the
'ndctl' utility [1]:

	int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
For example, the ndctl command line to setup a btt with a 4k sector size is:

It may register with the BTT after it adds its own gendisk, using btt_init:
    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k

	struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
			u32 lbasize, u8 uuid[], int maxlane);
See ndctl create-namespace --help for more options.

note that maxlane is the maximum amount of concurrency the driver wishes to
allow the BTT to use.

The BTT 'disk' appears as a stacked block device that grabs the underlying block
device in the O_EXCL mode.

When the driver wishes to remove the backing disk, it should similarly call
btt_fini using the same struct btt* handle that was provided to it by btt_init.

	void btt_fini(struct btt *btt);
[1]: https://github.com/pmem/ndctl
+2 −2
Original line number Diff line number Diff line
@@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 */
static long
axon_ram_direct_access(struct block_device *device, sector_t sector,
		       void __pmem **kaddr, pfn_t *pfn, long size)
		       void **kaddr, pfn_t *pfn, long size)
{
	struct axon_ram_bank *bank = device->bd_disk->private_data;
	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;

	*kaddr = (void __pmem __force *) bank->io_addr + offset;
	*kaddr = (void *) bank->io_addr + offset;
	*pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
	return bank->size - offset;
}
+0 −1
Original line number Diff line number Diff line
@@ -225,7 +225,6 @@
#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
+19 −58
Original line number Diff line number Diff line
@@ -26,13 +26,11 @@
 * @n: length of the copy in bytes
 *
 * Copy data to persistent memory media via non-temporal stores so that
 * a subsequent arch_wmb_pmem() can flush cpu and memory controller
 * write buffers to guarantee durability.
 * a subsequent pmem driver flush operation will drain posted write queues.
 */
static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
		size_t n)
static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
	int unwritten;
	int rem;

	/*
	 * We are copying between two kernel buffers, if
@@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
	 * fault) we would have already reported a general protection fault
	 * before the WARN+BUG.
	 */
	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
			(void __user *) src, n);
	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
				__func__, dst, src, unwritten))
	rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
	if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
				__func__, dst, src, rem))
		BUG();
}

static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
		size_t n)
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
		return memcpy_mcsafe(dst, (void __force *) src, n);
	memcpy(dst, (void __force *) src, n);
		return memcpy_mcsafe(dst, src, n);
	memcpy(dst, src, n);
	return 0;
}

/**
 * arch_wmb_pmem - synchronize writes to persistent memory
 *
 * After a series of arch_memcpy_to_pmem() operations this drains data
 * from cpu write buffers and any platform (memory controller) buffers
 * to ensure that written data is durable on persistent memory media.
 */
static inline void arch_wmb_pmem(void)
{
	/*
	 * wmb() to 'sfence' all previous writes such that they are
	 * architecturally visible to 'pcommit'.  Note, that we've
	 * already arranged for pmem writes to avoid the cache via
	 * arch_memcpy_to_pmem().
	 */
	wmb();
	pcommit_sfence();
}

/**
 * arch_wb_cache_pmem - write back a cache range with CLWB
 * @vaddr:	virtual start address
 * @size:	number of bytes to write back
 *
 * Write back a cache range using the CLWB (cache line write back)
 * instruction.  This function requires explicit ordering with an
 * arch_wmb_pmem() call.
 * instruction.
 */
static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
	unsigned long clflush_mask = x86_clflush_size - 1;
	void *vaddr = (void __force *)addr;
	void *vend = vaddr + size;
	void *vend = addr + size;
	void *p;

	for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
	for (p = (void *)((unsigned long)addr & ~clflush_mask);
	     p < vend; p += x86_clflush_size)
		clwb(p);
}
@@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
 * @i:		iterator with source data
 *
 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
 * This function requires explicit ordering with an arch_wmb_pmem() call.
 */
static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
		struct iov_iter *i)
{
	void *vaddr = (void __force *)addr;
	size_t len;

	/* TODO: skip the write-back by always using non-temporal stores */
	len = copy_from_iter_nocache(vaddr, bytes, i);
	len = copy_from_iter_nocache(addr, bytes, i);

	if (__iter_needs_pmem_wb(i))
		arch_wb_cache_pmem(addr, bytes);
@@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
 * @size:	number of bytes to zero
 *
 * Write zeros into the memory range starting at 'addr' for 'size' bytes.
 * This function requires explicit ordering with an arch_wmb_pmem() call.
 */
static inline void arch_clear_pmem(void __pmem *addr, size_t size)
static inline void arch_clear_pmem(void *addr, size_t size)
{
	void *vaddr = (void __force *)addr;

	memset(vaddr, 0, size);
	memset(addr, 0, size);
	arch_wb_cache_pmem(addr, size);
}

static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
	clflush_cache_range((void __force *) addr, size);
}

static inline bool __arch_has_wmb_pmem(void)
{
	/*
	 * We require that wmb() be an 'sfence', that is only guaranteed on
	 * 64-bit builds
	 */
	return static_cpu_has(X86_FEATURE_PCOMMIT);
	clflush_cache_range(addr, size);
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */
#endif /* __ASM_X86_PMEM_H__ */
Loading