Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4b94ffdc authored by Dan Williams's avatar Dan Williams Committed by Linus Torvalds
Browse files

x86, mm: introduce vmem_altmap to augment vmemmap_populate()



In support of providing struct page for large persistent memory
capacities, use struct vmem_altmap to change the default policy for
allocating memory for the memmap array.  The default vmemmap_populate()
allocates page table storage area from the page allocator.  Given
persistent memory capacities relative to DRAM it may not be feasible to
store the memmap in 'System Memory'.  Instead vmem_altmap represents
pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
requests.

Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Reported-by: default avatarkbuild test robot <lkp@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9476df7d
Loading
Loading
Loading
Loading
+26 −7
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/memremap.h>
#include <linux/nmi.h>
#include <linux/gfp.h>
#include <linux/kcore.h>
@@ -714,6 +715,12 @@ static void __meminit free_pagetable(struct page *page, int order)
{
	unsigned long magic;
	unsigned int nr_pages = 1 << order;
	struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);

	if (altmap) {
		vmem_altmap_free(altmap, nr_pages);
		return;
	}

	/* bootmem page has reserved flag */
	if (PageReserved(page)) {
@@ -1017,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size)
{
	unsigned long start_pfn = start >> PAGE_SHIFT;
	unsigned long nr_pages = size >> PAGE_SHIFT;
	struct page *page = pfn_to_page(start_pfn);
	struct vmem_altmap *altmap;
	struct zone *zone;
	int ret;

	zone = page_zone(pfn_to_page(start_pfn));
	kernel_physical_mapping_remove(start, start + size);
	/* With altmap the first mapped page is offset from @start */
	altmap = to_vmem_altmap((unsigned long) page);
	if (altmap)
		page += vmem_altmap_offset(altmap);
	zone = page_zone(page);
	ret = __remove_pages(zone, start_pfn, nr_pages);
	WARN_ON_ONCE(ret);
	kernel_physical_mapping_remove(start, start + size);

	return ret;
}
@@ -1235,7 +1248,7 @@ static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;

static int __meminit vmemmap_populate_hugepages(unsigned long start,
						unsigned long end, int node)
		unsigned long end, int node, struct vmem_altmap *altmap)
{
	unsigned long addr;
	unsigned long next;
@@ -1258,7 +1271,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
		if (pmd_none(*pmd)) {
			void *p;

			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
			p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
			if (p) {
				pte_t entry;

@@ -1279,7 +1292,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
				addr_end = addr + PMD_SIZE;
				p_end = p + PMD_SIZE;
				continue;
			}
			} else if (altmap)
				return -ENOMEM; /* no fallback */
		} else if (pmd_large(*pmd)) {
			vmemmap_verify((pte_t *)pmd, node, addr, next);
			continue;
@@ -1293,11 +1307,16 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,

int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
	struct vmem_altmap *altmap = to_vmem_altmap(start);
	int err;

	if (cpu_has_pse)
		err = vmemmap_populate_hugepages(start, end, node);
	else
		err = vmemmap_populate_hugepages(start, end, node, altmap);
	else if (altmap) {
		pr_err_once("%s: no cpu support for altmap allocations\n",
				__func__);
		err = -ENOMEM;
	} else
		err = vmemmap_populate_basepages(start, end, node);
	if (!err)
		sync_global_pgds(start, end - 1, 0);
+4 −2
Original line number Diff line number Diff line
@@ -178,7 +178,8 @@ static struct pmem_device *pmem_alloc(struct device *dev,

	pmem->pfn_flags = PFN_DEV;
	if (pmem_should_map_pages(dev)) {
		pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res);
		pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res,
				NULL);
		pmem->pfn_flags |= PFN_MAP;
	} else
		pmem->virt_addr = (void __pmem *) devm_memremap(dev,
@@ -387,7 +388,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
	/* establish pfn range for lookup, and switch to direct map */
	pmem = dev_get_drvdata(dev);
	devm_memunmap(dev, (void __force *) pmem->virt_addr);
	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res,
			NULL);
	pmem->pfn_flags |= PFN_MAP;
	if (IS_ERR(pmem->virt_addr)) {
		rc = PTR_ERR(pmem->virt_addr);
+2 −1
Original line number Diff line number Diff line
@@ -275,7 +275,8 @@ extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
		unsigned long map_offset);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
					  unsigned long pnum);

+35 −4
Original line number Diff line number Diff line
@@ -4,21 +4,53 @@

struct resource;
struct device;

/**
 * struct vmem_altmap - pre-allocated storage for vmemmap_populate
 * @base_pfn: base of the entire dev_pagemap mapping
 * @reserve: pages mapped, but reserved for driver use (relative to @base)
 * @free: free pages set aside in the mapping for memmap storage
 * @align: pages reserved to meet allocation alignments
 * @alloc: track pages consumed, private to vmemmap_populate()
 */
struct vmem_altmap {
	const unsigned long base_pfn;
	const unsigned long reserve;
	unsigned long free;
	unsigned long align;
	unsigned long alloc;
};

unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);

#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE)
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
#else
static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
{
	return NULL;
}
#endif

/**
 * struct dev_pagemap - metadata for ZONE_DEVICE mappings
 * @altmap: pre-allocated/reserved memory for vmemmap allocations
 * @dev: host device of the mapping for debug
 */
struct dev_pagemap {
	/* TODO: vmem_altmap and percpu_ref count */
	struct vmem_altmap *altmap;
	const struct resource *res;
	struct device *dev;
};

#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct resource *res);
void *devm_memremap_pages(struct device *dev, struct resource *res,
		struct vmem_altmap *altmap);
struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
#else
static inline void *devm_memremap_pages(struct device *dev,
		struct resource *res)
		struct resource *res, struct vmem_altmap *altmap)
{
	/*
	 * Fail attempts to call devm_memremap_pages() without
@@ -34,5 +66,4 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
	return NULL;
}
#endif

#endif /* _LINUX_MEMREMAP_H_ */
+8 −1
Original line number Diff line number Diff line
@@ -2217,7 +2217,14 @@ pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
void *vmemmap_alloc_block_buf(unsigned long size, int node);
struct vmem_altmap;
void *__vmemmap_alloc_block_buf(unsigned long size, int node,
		struct vmem_altmap *altmap);
static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
{
	return __vmemmap_alloc_block_buf(size, node, NULL);
}

void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
			       int node);
Loading