Loading Documentation/filesystems/Locking +1 −1 Original line number Diff line number Diff line Loading @@ -395,7 +395,7 @@ prototypes: int (*release) (struct gendisk *, fmode_t); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, void __pmem **, int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); Loading Documentation/nvdimm/btt.txt +9 −19 Original line number Diff line number Diff line Loading @@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read only state using a flag in the info block. 5. In-kernel usage ================== 5. Usage ======== Any block driver that supports byte granularity IO to the storage may register with the BTT. It will have to provide the rw_bytes interface in its block_device_operations struct: The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem (pmem, or blk mode). The easiest way to set up such a namespace is using the 'ndctl' utility [1]: int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw); For example, the ndctl command line to setup a btt with a 4k sector size is: It may register with the BTT after it adds its own gendisk, using btt_init: ndctl create-namespace -f -e namespace0.0 -m sector -l 4k struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize, u32 lbasize, u8 uuid[], int maxlane); See ndctl create-namespace --help for more options. note that maxlane is the maximum amount of concurrency the driver wishes to allow the BTT to use. The BTT 'disk' appears as a stacked block device that grabs the underlying block device in the O_EXCL mode. When the driver wishes to remove the backing disk, it should similarly call btt_fini using the same struct btt* handle that was provided to it by btt_init. void btt_fini(struct btt *btt); [1]: https://github.com/pmem/ndctl arch/powerpc/sysdev/axonram.c +2 −2 Original line number Diff line number Diff line Loading @@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, void __pmem **kaddr, pfn_t *pfn, long size) void **kaddr, pfn_t *pfn, long size) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; *kaddr = (void __pmem __force *) bank->io_addr + offset; *kaddr = (void *) bank->io_addr + offset; *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); return bank->size - offset; } Loading arch/x86/include/asm/cpufeatures.h +0 −1 Original line number Diff line number Diff line Loading @@ -225,7 +225,6 @@ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ Loading arch/x86/include/asm/pmem.h +19 −58 Original line number Diff line number Diff line Loading @@ -26,13 +26,11 @@ * @n: length of the copy in bytes * * Copy data to persistent memory media via non-temporal stores so that * a subsequent arch_wmb_pmem() can flush cpu and memory controller * write buffers to guarantee durability. * a subsequent pmem driver flush operation will drain posted write queues. */ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { int unwritten; int rem; /* * We are copying between two kernel buffers, if Loading @@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, * fault) we would have already reported a general protection fault * before the WARN+BUG. */ unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, (void __user *) src, n); if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", __func__, dst, src, unwritten)) rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", __func__, dst, src, rem)) BUG(); } static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, size_t n) static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) return memcpy_mcsafe(dst, (void __force *) src, n); memcpy(dst, (void __force *) src, n); return memcpy_mcsafe(dst, src, n); memcpy(dst, src, n); return 0; } /** * arch_wmb_pmem - synchronize writes to persistent memory * * After a series of arch_memcpy_to_pmem() operations this drains data * from cpu write buffers and any platform (memory controller) buffers * to ensure that written data is durable on persistent memory media. */ static inline void arch_wmb_pmem(void) { /* * wmb() to 'sfence' all previous writes such that they are * architecturally visible to 'pcommit'. Note, that we've * already arranged for pmem writes to avoid the cache via * arch_memcpy_to_pmem(). */ wmb(); pcommit_sfence(); } /** * arch_wb_cache_pmem - write back a cache range with CLWB * @vaddr: virtual start address * @size: number of bytes to write back * * Write back a cache range using the CLWB (cache line write back) * instruction. This function requires explicit ordering with an * arch_wmb_pmem() call. * instruction. */ static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) static inline void arch_wb_cache_pmem(void *addr, size_t size) { u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; unsigned long clflush_mask = x86_clflush_size - 1; void *vaddr = (void __force *)addr; void *vend = vaddr + size; void *vend = addr + size; void *p; for (p = (void *)((unsigned long)vaddr & ~clflush_mask); for (p = (void *)((unsigned long)addr & ~clflush_mask); p < vend; p += x86_clflush_size) clwb(p); } Loading @@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i) * @i: iterator with source data * * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. * This function requires explicit ordering with an arch_wmb_pmem() call. */ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { void *vaddr = (void __force *)addr; size_t len; /* TODO: skip the write-back by always using non-temporal stores */ len = copy_from_iter_nocache(vaddr, bytes, i); len = copy_from_iter_nocache(addr, bytes, i); if (__iter_needs_pmem_wb(i)) arch_wb_cache_pmem(addr, bytes); Loading @@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, * @size: number of bytes to zero * * Write zeros into the memory range starting at 'addr' for 'size' bytes. * This function requires explicit ordering with an arch_wmb_pmem() call. */ static inline void arch_clear_pmem(void __pmem *addr, size_t size) static inline void arch_clear_pmem(void *addr, size_t size) { void *vaddr = (void __force *)addr; memset(vaddr, 0, size); memset(addr, 0, size); arch_wb_cache_pmem(addr, size); } static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) static inline void arch_invalidate_pmem(void *addr, size_t size) { clflush_cache_range((void __force *) addr, size); } static inline bool __arch_has_wmb_pmem(void) { /* * We require that wmb() be an 'sfence', that is only guaranteed on * 64-bit builds */ return static_cpu_has(X86_FEATURE_PCOMMIT); clflush_cache_range(addr, size); } #endif /* CONFIG_ARCH_HAS_PMEM_API */ #endif /* __ASM_X86_PMEM_H__ */ Loading
Documentation/filesystems/Locking +1 −1 Original line number Diff line number Diff line Loading @@ -395,7 +395,7 @@ prototypes: int (*release) (struct gendisk *, fmode_t); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, void __pmem **, int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); Loading
Documentation/nvdimm/btt.txt +9 −19 Original line number Diff line number Diff line Loading @@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read only state using a flag in the info block. 5. In-kernel usage ================== 5. Usage ======== Any block driver that supports byte granularity IO to the storage may register with the BTT. It will have to provide the rw_bytes interface in its block_device_operations struct: The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem (pmem, or blk mode). The easiest way to set up such a namespace is using the 'ndctl' utility [1]: int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw); For example, the ndctl command line to setup a btt with a 4k sector size is: It may register with the BTT after it adds its own gendisk, using btt_init: ndctl create-namespace -f -e namespace0.0 -m sector -l 4k struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize, u32 lbasize, u8 uuid[], int maxlane); See ndctl create-namespace --help for more options. note that maxlane is the maximum amount of concurrency the driver wishes to allow the BTT to use. The BTT 'disk' appears as a stacked block device that grabs the underlying block device in the O_EXCL mode. When the driver wishes to remove the backing disk, it should similarly call btt_fini using the same struct btt* handle that was provided to it by btt_init. void btt_fini(struct btt *btt); [1]: https://github.com/pmem/ndctl
arch/powerpc/sysdev/axonram.c +2 −2 Original line number Diff line number Diff line Loading @@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, void __pmem **kaddr, pfn_t *pfn, long size) void **kaddr, pfn_t *pfn, long size) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; *kaddr = (void __pmem __force *) bank->io_addr + offset; *kaddr = (void *) bank->io_addr + offset; *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); return bank->size - offset; } Loading
arch/x86/include/asm/cpufeatures.h +0 −1 Original line number Diff line number Diff line Loading @@ -225,7 +225,6 @@ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ Loading
arch/x86/include/asm/pmem.h +19 −58 Original line number Diff line number Diff line Loading @@ -26,13 +26,11 @@ * @n: length of the copy in bytes * * Copy data to persistent memory media via non-temporal stores so that * a subsequent arch_wmb_pmem() can flush cpu and memory controller * write buffers to guarantee durability. * a subsequent pmem driver flush operation will drain posted write queues. */ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { int unwritten; int rem; /* * We are copying between two kernel buffers, if Loading @@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, * fault) we would have already reported a general protection fault * before the WARN+BUG. */ unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, (void __user *) src, n); if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", __func__, dst, src, unwritten)) rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", __func__, dst, src, rem)) BUG(); } static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, size_t n) static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) return memcpy_mcsafe(dst, (void __force *) src, n); memcpy(dst, (void __force *) src, n); return memcpy_mcsafe(dst, src, n); memcpy(dst, src, n); return 0; } /** * arch_wmb_pmem - synchronize writes to persistent memory * * After a series of arch_memcpy_to_pmem() operations this drains data * from cpu write buffers and any platform (memory controller) buffers * to ensure that written data is durable on persistent memory media. */ static inline void arch_wmb_pmem(void) { /* * wmb() to 'sfence' all previous writes such that they are * architecturally visible to 'pcommit'. Note, that we've * already arranged for pmem writes to avoid the cache via * arch_memcpy_to_pmem(). */ wmb(); pcommit_sfence(); } /** * arch_wb_cache_pmem - write back a cache range with CLWB * @vaddr: virtual start address * @size: number of bytes to write back * * Write back a cache range using the CLWB (cache line write back) * instruction. This function requires explicit ordering with an * arch_wmb_pmem() call. * instruction. */ static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) static inline void arch_wb_cache_pmem(void *addr, size_t size) { u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; unsigned long clflush_mask = x86_clflush_size - 1; void *vaddr = (void __force *)addr; void *vend = vaddr + size; void *vend = addr + size; void *p; for (p = (void *)((unsigned long)vaddr & ~clflush_mask); for (p = (void *)((unsigned long)addr & ~clflush_mask); p < vend; p += x86_clflush_size) clwb(p); } Loading @@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i) * @i: iterator with source data * * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. * This function requires explicit ordering with an arch_wmb_pmem() call. */ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { void *vaddr = (void __force *)addr; size_t len; /* TODO: skip the write-back by always using non-temporal stores */ len = copy_from_iter_nocache(vaddr, bytes, i); len = copy_from_iter_nocache(addr, bytes, i); if (__iter_needs_pmem_wb(i)) arch_wb_cache_pmem(addr, bytes); Loading @@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, * @size: number of bytes to zero * * Write zeros into the memory range starting at 'addr' for 'size' bytes. * This function requires explicit ordering with an arch_wmb_pmem() call. */ static inline void arch_clear_pmem(void __pmem *addr, size_t size) static inline void arch_clear_pmem(void *addr, size_t size) { void *vaddr = (void __force *)addr; memset(vaddr, 0, size); memset(addr, 0, size); arch_wb_cache_pmem(addr, size); } static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) static inline void arch_invalidate_pmem(void *addr, size_t size) { clflush_cache_range((void __force *) addr, size); } static inline bool __arch_has_wmb_pmem(void) { /* * We require that wmb() be an 'sfence', that is only guaranteed on * 64-bit builds */ return static_cpu_has(X86_FEATURE_PCOMMIT); clflush_cache_range(addr, size); } #endif /* CONFIG_ARCH_HAS_PMEM_API */ #endif /* __ASM_X86_PMEM_H__ */