Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e7638488 authored by Dan Williams's avatar Dan Williams
Browse files

mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS



In preparation for fixing dax-dma-vs-unmap issues, filesystems need to
be able to rely on the fact that they will get wakeups on dev_pagemap
page-idle events. Introduce MEMORY_DEVICE_FS_DAX and
generic_dax_page_free() as common indicator / infrastructure for dax
filesytems to require. With this change there are no users of the
MEMORY_DEVICE_HOST designation, so remove it.

The HMM sub-system extended dev_pagemap to arrange a callback when a
dev_pagemap managed page is freed. Since a dev_pagemap page is free /
idle when its reference count is 1 it requires an additional branch to
check the page-type at put_page() time. Given put_page() is a hot-path
we do not want to incur that check if HMM is not in use, so a static
branch is used to avoid that overhead when not necessary.

Now, the FS_DAX implementation wants to reuse this mechanism for
receiving dev_pagemap ->page_free() callbacks. Rework the HMM-specific
static-key into a generic mechanism that either HMM or FS_DAX code paths
can enable.

For ARCH=um builds, and any other arch that lacks ZONE_DEVICE support,
care must be taken to compile out the DEV_PAGEMAP_OPS infrastructure.
However, we still need to support FS_DAX in the FS_DAX_LIMITED case
implemented by the s390/dcssblk driver.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Reported-by: default avatarkbuild test robot <lkp@intel.com>
Reported-by: default avatarThomas Meyer <thomas@m3y3r.de>
Reported-by: default avatarDave Jiang <dave.jiang@intel.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 5981690d
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
{
	struct block_device *bdev = sb->s_bdev;
	struct dax_device *dax_dev;
	bool dax_enabled = false;
	pgoff_t pgoff;
	int err, id;
	void *kaddr;
@@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
		 * on being able to do (page_address(pfn_to_page())).
		 */
		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
		dax_enabled = true;
	} else if (pfn_t_devmap(pfn)) {
		/* pass */;
	} else {
		struct dev_pagemap *pgmap;

		pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
		if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
			dax_enabled = true;
		put_dev_pagemap(pgmap);
	}

	if (!dax_enabled) {
		pr_debug("VFS (%s): error: dax support not enabled\n",
				sb->s_id);
		return -EOPNOTSUPP;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+0 −2
Original line number Diff line number Diff line
@@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
	res->start += start_pad;
	res->end -= end_trunc;

	pgmap->type = MEMORY_DEVICE_HOST;

	if (nd_pfn->mode == PFN_MODE_RAM) {
		if (offset < SZ_8K)
			return -EINVAL;
+25 −0
Original line number Diff line number Diff line
@@ -294,6 +294,27 @@ static void pmem_release_disk(void *__pmem)
	put_disk(pmem->disk);
}

static void pmem_release_pgmap_ops(void *__pgmap)
{
	dev_pagemap_put_ops();
}

static void fsdax_pagefree(struct page *page, void *data)
{
	wake_up_var(&page->_refcount);
}

static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
{
	dev_pagemap_get_ops();
	if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
		return -ENOMEM;
	pgmap->type = MEMORY_DEVICE_FS_DAX;
	pgmap->page_free = fsdax_pagefree;

	return 0;
}

static int pmem_attach_disk(struct device *dev,
		struct nd_namespace_common *ndns)
{
@@ -353,6 +374,8 @@ static int pmem_attach_disk(struct device *dev,
	pmem->pfn_flags = PFN_DEV;
	pmem->pgmap.ref = &q->q_usage_counter;
	if (is_nd_pfn(dev)) {
		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
			return -ENOMEM;
		addr = devm_memremap_pages(dev, &pmem->pgmap);
		pfn_sb = nd_pfn->pfn_sb;
		pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
@@ -364,6 +387,8 @@ static int pmem_attach_disk(struct device *dev,
	} else if (pmem_should_map_pages(dev)) {
		memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
		pmem->pgmap.altmap_valid = false;
		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
			return -ENOMEM;
		addr = devm_memremap_pages(dev, &pmem->pgmap);
		pmem->pfn_flags |= PFN_MAP;
		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+1 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ config FS_DAX
	bool "Direct Access (DAX) support"
	depends on MMU
	depends on !(ARM || MIPS || SPARC)
	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
	select FS_IOMAP
	select DAX
	help
+10 −26
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MEMREMAP_H_
#define _LINUX_MEMREMAP_H_
#include <linux/mm.h>
#include <linux/ioport.h>
#include <linux/percpu-refcount.h>

@@ -30,13 +29,6 @@ struct vmem_altmap {
 * Specialize ZONE_DEVICE memory into multiple types each having differents
 * usage.
 *
 * MEMORY_DEVICE_HOST:
 * Persistent device memory (pmem): struct page might be allocated in different
 * memory and architecture might want to perform special actions. It is similar
 * to regular memory, in that the CPU can access it transparently. However,
 * it is likely to have different bandwidth and latency than regular memory.
 * See Documentation/nvdimm/nvdimm.txt for more information.
 *
 * MEMORY_DEVICE_PRIVATE:
 * Device memory that is not directly addressable by the CPU: CPU can neither
 * read nor write private memory. In this case, we do still have struct pages
@@ -53,11 +45,19 @@ struct vmem_altmap {
 * driver can hotplug the device memory using ZONE_DEVICE and with that memory
 * type. Any page of a process can be migrated to such memory. However no one
 * should be allow to pin such memory so that it can always be evicted.
 *
 * MEMORY_DEVICE_FS_DAX:
 * Host memory that has similar access semantics as System RAM i.e. DMA
 * coherent and supports page pinning. In support of coordinating page
 * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
 * wakeup event whenever a page is unpinned and becomes idle. This
 * wakeup is used to coordinate physical address space management (ex:
 * fs truncate/hole punch) vs pinned pages (ex: device dma).
 */
enum memory_type {
	MEMORY_DEVICE_HOST = 0,
	MEMORY_DEVICE_PRIVATE,
	MEMORY_DEVICE_PRIVATE = 1,
	MEMORY_DEVICE_PUBLIC,
	MEMORY_DEVICE_FS_DAX,
};

/*
@@ -129,8 +129,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,

unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);

static inline bool is_zone_device_page(const struct page *page);
#else
static inline void *devm_memremap_pages(struct device *dev,
		struct dev_pagemap *pgmap)
@@ -161,20 +159,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
}
#endif /* CONFIG_ZONE_DEVICE */

#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static inline bool is_device_private_page(const struct page *page)
{
	return is_zone_device_page(page) &&
		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
}

static inline bool is_device_public_page(const struct page *page)
{
	return is_zone_device_page(page) &&
		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
}
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */

static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{
	if (pgmap)
Loading