Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae1dba94 authored by Andrea Reale's avatar Andrea Reale Committed by Isaac J. Manjarres
Browse files

arm64: Hot-remove implementation for arm64



- arch_remove_memory interface
- kernel page tables cleanup
- vmemmap_free implementation for arm64

Change-Id: I8945b6b91ed7012abc1478de266302427ebeb639
Signed-off-by: default avatarAndrea Reale <ar@linux.vnet.ibm.com>
Signed-off-by: default avatarMaciej Bielski <m.bielski@virtualopensystems.com>
Patch-mainline: linux-kernel @ 11 Apr 2017, 18:25
Signed-off-by: default avatarSrivatsa Vaddagiri <vatsa@codeaurora.org>
Signed-off-by: default avatarArun KS <arunks@codeaurora.org>
Signed-off-by: default avatarSudarshan Rajagopalan <sudaraja@codeaurora.org>
[swatsrid@codeaurora.org : Fix merge conflicts]
Signed-off-by: default avatarSwathi Sridhar <swatsrid@codeaurora.org>
[isaacm@codeaurora.org: Resolved merge conflicts due to MHP code in mmu.c]
Signed-off-by: default avatarIsaac J. Manjarres <isaacm@codeaurora.org>
parent 63ab21d4
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -276,6 +276,9 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
    depends on !NUMA
	def_bool y

config ARCH_ENABLE_MEMORY_HOTREMOVE
	def_bool y

config SMP
	def_bool y

+15 −0
Original line number Diff line number Diff line
@@ -499,6 +499,11 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)

static inline void pte_unmap(pte_t *pte) { }

static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
	return (unsigned long) __va(pmd_page_paddr(pmd));
}

/* Find an entry in the third-level page table. */
#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))

@@ -558,6 +563,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
	return __pud_to_phys(pud);
}

static inline unsigned long pud_page_vaddr(pud_t pud)
{
	return (unsigned long) __va(pud_page_paddr(pud));
}

/* Find an entry in the second-level page table. */
#define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))

@@ -616,6 +626,11 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
	return __pgd_to_phys(pgd);
}

static inline unsigned long pgd_page_vaddr(pgd_t pgd)
{
	return (unsigned long) __va(pgd_page_paddr(pgd));
}

/* Find an entry in the frst-level page table. */
#define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))

+390 −2
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
/*
 * Based on arch/arm/mm/mmu.c
 *
@@ -686,6 +685,381 @@ void __init paging_init(void)
	memblock_allow_resize();
}

#define PAGE_INUSE 0xFD

static void  free_pagetable(struct page *page, int order, bool direct)
{
	unsigned long magic;
	unsigned int nr_pages = 1 << order;

	/* bootmem page has reserved flag */
	if (PageReserved(page)) {
		__ClearPageReserved(page);

		magic = (unsigned long)page->lru.next;
		if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
			while (nr_pages--)
				put_page_bootmem(page++);
		} else {
			while (nr_pages--)
				free_reserved_page(page++);
		}
	} else {
		/*
		 * Only direct pagetable allocation (those allocated via
		 * hotplug) call the pgtable_page_ctor; vmemmap pgtable
		 * allocations don't.
		 */
		if (direct)
			pgtable_pte_page_dtor(page);

		free_pages((unsigned long)page_address(page), order);
	}
}

static void free_pte_table(pmd_t *pmd, bool direct)
{
	pte_t *pte_start, *pte;
	struct page *page;
	int i;

	pte_start =  (pte_t *) pmd_page_vaddr(*pmd);
	/* Check if there is no valid entry in the PMD */
	for (i = 0; i < PTRS_PER_PTE; i++) {
		pte = pte_start + i;
		if (!pte_none(*pte))
			return;
	}

	page = pmd_page(*pmd);

	free_pagetable(page, 0, direct);

	/*
	 * This spin lock could be only taken in _pte_aloc_kernel
	 * in mm/memory.c and nowhere else (for arm64). Not sure if
	 * the function above can be called concurrently. In doubt,
	 * I am living it here for now, but it probably can be removed
	 */
	spin_lock(&init_mm.page_table_lock);
	pmd_clear(pmd);
	spin_unlock(&init_mm.page_table_lock);
}

static void free_pmd_table(pud_t *pud, bool direct)
{
	pmd_t *pmd_start, *pmd;
	struct page *page;
	int i;

	pmd_start = (pmd_t *) pud_page_vaddr(*pud);
	/* Check if there is no valid entry in the PMD */
	for (i = 0; i < PTRS_PER_PMD; i++) {
		pmd = pmd_start + i;
		if (!pmd_none(*pmd))
			return;
	}

	page = pud_page(*pud);

	free_pagetable(page, 0, direct);

	/*
	 * This spin lock could be only taken in _pte_aloc_kernel
	 * in mm/memory.c and nowhere else (for arm64). Not sure if
	 * the function above can be called concurrently. In doubt,
	 * I am living it here for now, but it probably can be removed
	 */
	spin_lock(&init_mm.page_table_lock);
	pud_clear(pud);
	spin_unlock(&init_mm.page_table_lock);
}

/*
 * When the PUD is folded on the PGD (three levels of paging),
 * there's no need to free PUDs
 */
#if CONFIG_PGTABLE_LEVELS > 3
static void free_pud_table(pgd_t *pgd, bool direct)
{
	pud_t *pud_start, *pud;
	struct page *page;
	int i;

	pud_start = (pud_t *) pgd_page_vaddr(*pgd);
	/* Check if there is no valid entry in the PUD */
	for (i = 0; i < PTRS_PER_PUD; i++) {
		pud = pud_start + i;
		if (!pud_none(*pud))
			return;
	}

	page = pgd_page(*pgd);

	free_pagetable(page, 0, direct);

	/*
	 * This spin lock could be only
	 * taken in _pte_aloc_kernel in
	 * mm/memory.c and nowhere else
	 * (for arm64). Not sure if the
	 * function above can be called
	 * concurrently. In doubt,
	 * I am living it here for now,
	 * but it probably can be removed.
	 */
	spin_lock(&init_mm.page_table_lock);
	pgd_clear(pgd);
	spin_unlock(&init_mm.page_table_lock);
}
#endif

static void remove_pte_table(pte_t *pte, unsigned long addr,
	unsigned long end, bool direct)
{
	unsigned long next;
	void *page_addr;

	for (; addr < end; addr = next, pte++) {
		next = (addr + PAGE_SIZE) & PAGE_MASK;
		if (next > end)
			next = end;

		if (!pte_present(*pte))
			continue;

		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
			/*
			 * Do not free direct mapping pages since they were
			 * freed when offlining, or simplely not in use.
			 */
			if (!direct)
				free_pagetable(pte_page(*pte), 0, direct);

			/*
			 * This spin lock could be only
			 * taken in _pte_aloc_kernel in
			 * mm/memory.c and nowhere else
			 * (for arm64). Not sure if the
			 * function above can be called
			 * concurrently. In doubt,
			 * I am living it here for now,
			 * but it probably can be removed.
			 */
			spin_lock(&init_mm.page_table_lock);
			pte_clear(&init_mm, addr, pte);
			spin_unlock(&init_mm.page_table_lock);
		} else {
			/*
			 * If we are here, we are freeing vmemmap pages since
			 * direct mapped memory ranges to be freed are aligned.
			 *
			 * If we are not removing the whole page, it means
			 * other page structs in this page are being used and
			 * we canot remove them. So fill the unused page_structs
			 * with 0xFD, and remove the page when it is wholly
			 * filled with 0xFD.
			 */
			memset((void *)addr, PAGE_INUSE, next - addr);

			page_addr = page_address(pte_page(*pte));
			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
				free_pagetable(pte_page(*pte), 0, direct);

				/*
				 * This spin lock could be only
				 * taken in _pte_aloc_kernel in
				 * mm/memory.c and nowhere else
				 * (for arm64). Not sure if the
				 * function above can be called
				 * concurrently. In doubt,
				 * I am living it here for now,
				 * but it probably can be removed.
				 */
				spin_lock(&init_mm.page_table_lock);
				pte_clear(&init_mm, addr, pte);
				spin_unlock(&init_mm.page_table_lock);
			}
		}
	}

	// I am adding this flush here in simmetry to the x86 code.
	// Why do I need to call it here and not in remove_p[mu]d
	flush_tlb_all();
}

static void remove_pmd_table(pmd_t *pmd, unsigned long addr,
	unsigned long end, bool direct)
{
	unsigned long next;
	void *page_addr;
	pte_t *pte;

	for (; addr < end; addr = next, pmd++) {
		next = pmd_addr_end(addr, end);

		if (!pmd_present(*pmd))
			continue;

		// check if we are using 2MB section mappings
		if (pmd_sect(*pmd)) {
			if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
				if (!direct) {
					free_pagetable(pmd_page(*pmd),
						get_order(PMD_SIZE), direct);
				}
				/*
				 * This spin lock could be only
				 * taken in _pte_aloc_kernel in
				 * mm/memory.c and nowhere else
				 * (for arm64). Not sure if the
				 * function above can be called
				 * concurrently. In doubt,
				 * I am living it here for now,
				 * but it probably can be removed.
				 */
				spin_lock(&init_mm.page_table_lock);
				pmd_clear(pmd);
				spin_unlock(&init_mm.page_table_lock);
			} else {
				/* If here, we are freeing vmemmap pages. */
				memset((void *)addr, PAGE_INUSE, next - addr);

				page_addr = page_address(pmd_page(*pmd));
				if (!memchr_inv(page_addr, PAGE_INUSE,
						PMD_SIZE)) {
					free_pagetable(pmd_page(*pmd),
						get_order(PMD_SIZE), direct);

					/*
					 * This spin lock could be only
					 * taken in _pte_aloc_kernel in
					 * mm/memory.c and nowhere else
					 * (for arm64). Not sure if the
					 * function above can be called
					 * concurrently. In doubt,
					 * I am living it here for now,
					 * but it probably can be removed.
					 */
					spin_lock(&init_mm.page_table_lock);
					pmd_clear(pmd);
					spin_unlock(&init_mm.page_table_lock);
				}
			}
			continue;
		}

		BUG_ON(!pmd_table(*pmd));

		pte = pte_offset_map(pmd, addr);
		remove_pte_table(pte, addr, next, direct);
		free_pte_table(pmd, direct);
	}
}

static void remove_pud_table(pud_t *pud, unsigned long addr,
	unsigned long end, bool direct)
{
	unsigned long next;
	pmd_t *pmd;
	void *page_addr;

	for (; addr < end; addr = next, pud++) {
		next = pud_addr_end(addr, end);
		if (!pud_present(*pud))
			continue;
		/*
		 * If we are using 4K granules, check if we are using
		 * 1GB section mapping.
		 */
		if (pud_sect(*pud)) {
			if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
				if (!direct) {
					free_pagetable(pud_page(*pud),
						get_order(PUD_SIZE), direct);
				}

				/*
				 * This spin lock could be only
				 * taken in _pte_aloc_kernel in
				 * mm/memory.c and nowhere else
				 * (for arm64). Not sure if the
				 * function above can be called
				 * concurrently. In doubt,
				 * I am living it here for now,
				 * but it probably can be removed.
				 */
				spin_lock(&init_mm.page_table_lock);
				pud_clear(pud);
				spin_unlock(&init_mm.page_table_lock);
			} else {
				/* If here, we are freeing vmemmap pages. */
				memset((void *)addr, PAGE_INUSE, next - addr);

				page_addr = page_address(pud_page(*pud));
				if (!memchr_inv(page_addr, PAGE_INUSE,
						PUD_SIZE)) {

					free_pagetable(pud_page(*pud),
						get_order(PUD_SIZE), direct);

					/*
					 * This spin lock could be only
					 * taken in _pte_aloc_kernel in
					 * mm/memory.c and nowhere else
					 * (for arm64). Not sure if the
					 * function above can be called
					 * concurrently. In doubt,
					 * I am living it here for now,
					 * but it probably can be removed.
					 */
					spin_lock(&init_mm.page_table_lock);
					pud_clear(pud);
					spin_unlock(&init_mm.page_table_lock);
				}
			}
			continue;
		}

		BUG_ON(!pud_table(*pud));

		pmd = pmd_offset(pud, addr);
		remove_pmd_table(pmd, addr, next, direct);
		free_pmd_table(pud, direct);
	}
}

static void remove_pagetable(unsigned long start, unsigned long end,
			     bool direct)
{
	unsigned long next;
	unsigned long addr;
	pgd_t *pgd;
	pud_t *pud;

	for (addr = start; addr < end; addr = next) {
		next = pgd_addr_end(addr, end);

		pgd = pgd_offset_k(addr);
		if (pgd_none(*pgd))
			continue;

		pud = pud_offset(pgd, addr);
		remove_pud_table(pud, addr, next, direct);
		/*
		 * When the PUD is folded on the PGD (three levels of paging),
		 * I did already clear the PMD page in free_pmd_table,
		 * and reset the corresponding PGD==PUD entry.
		 */
#if CONFIG_PGTABLE_LEVELS > 3
		free_pud_table(pgd, direct);
#endif
	}

	flush_tlb_all();
}


/*
 * Check whether a kernel address is valid (derived from arch/x86/).
 */
@@ -773,6 +1147,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
void vmemmap_free(unsigned long start, unsigned long end,
		struct vmem_altmap *altmap)
{
	remove_pagetable(start, end, false);
}
#endif	/* CONFIG_SPARSEMEM_VMEMMAP */

@@ -1079,6 +1454,17 @@ int arch_add_memory(int nid, u64 start, u64 size,

	return ret;
}

static void kernel_physical_mapping_remove(unsigned long start,
	unsigned long end)
{
	start = (unsigned long)__va(start);
	end = (unsigned long)__va(end);

	remove_pagetable(start, end, true);

}

void arch_remove_memory(int nid, u64 start, u64 size,
			struct vmem_altmap *altmap)
{
@@ -1095,5 +1481,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
	 */
	zone = page_zone(pfn_to_page(start_pfn));
	__remove_pages(zone, start_pfn, nr_pages, altmap);

	kernel_physical_mapping_remove(start, start + size);
}
#endif
#endif /* CONFIG_MEMORY_HOTPLUG */