Loading arch/arm64/Kconfig +1 −0 Original line number Diff line number Diff line Loading @@ -653,6 +653,7 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu. config ARCH_ENABLE_MEMORY_HOTPLUG depends on !NUMA def_bool y config ARCH_ENABLE_MEMORY_HOTREMOVE Loading arch/arm64/include/asm/mmu.h +7 −0 Original line number Diff line number Diff line Loading @@ -35,5 +35,12 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #ifdef CONFIG_MEMORY_HOTPLUG extern void hotplug_paging(phys_addr_t start, phys_addr_t size); #ifdef CONFIG_MEMORY_HOTREMOVE extern void remove_pagetable(unsigned long start, unsigned long end, bool direct); #endif #endif #endif arch/arm64/include/asm/pgtable.h +15 −0 Original line number Diff line number Diff line Loading @@ -461,6 +461,11 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { return (unsigned long) __va(pmd_page_paddr(pmd)); } /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) Loading Loading @@ -512,6 +517,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; } static inline unsigned long pud_page_vaddr(pud_t pud) { return (unsigned long) __va(pud_page_paddr(pud)); } /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) Loading Loading @@ -564,6 +574,11 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; } static inline unsigned long pgd_page_vaddr(pgd_t pgd) { return (unsigned long) __va(pgd_page_paddr(pgd)); } /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) Loading arch/arm64/mm/init.c +78 −7 Original line number Diff line number Diff line Loading @@ -503,14 +503,74 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) struct zone *zone; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long end_pfn = start_pfn + nr_pages; unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); int ret; if (end_pfn > max_sparsemem_pfn) { pr_err("end_pfn too big"); return -1; } hotplug_paging(start, size); /* * Mark the first page in the range as unusable. This is needed * because __add_section (within __add_pages) wants pfn_valid * of it to be false, and in arm64 pfn falid is implemented by * just checking at the nomap flag for existing blocks. * * A small trick here is that __add_section() requires only * phys_start_pfn (that is the first pfn of a section) to be * invalid. Regardless of whether it was assumed (by the function * author) that all pfns within a section are either all valid * or all invalid, it allows to avoid looping twice (once here, * second when memblock_clear_nomap() is called) through all * pfns of the section and modify only one pfn. Thanks to that, * further, in __add_zone() only this very first pfn is skipped * and corresponding page is not flagged reserved. Therefore it * is enough to correct this setup only for it. * * When arch_add_memory() returns the walk_memory_range() function * is called and passed with online_memory_block() callback, * which execution finally reaches the memory_block_action() * function, where also only the first pfn of a memory block is * checked to be reserved. Above, it was first pfn of a section, * here it is a block but * (drivers/base/memory.c): * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; * (include/linux/memory.h): * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) * so we can consider block and section equivalently */ memblock_mark_nomap(start, 1<<PAGE_SHIFT); pgdat = NODE_DATA(nid); zone = pgdat->node_zones + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); ret = __add_pages(nid, zone, start_pfn, nr_pages); /* * Make the pages usable after they have been added. * This will make pfn_valid return true */ memblock_clear_nomap(start, 1<<PAGE_SHIFT); /* * This is a hack to avoid having to mix arch specific code * into arch independent code. SetPageReserved is supposed * to be called by __add_zone (within __add_section, within * __add_pages). However, when it is called there, it assumes that * pfn_valid returns true. For the way pfn_valid is implemented * in arm64 (a check on the nomap flag), the only way to make * this evaluate true inside __add_zone is to clear the nomap * flags of blocks in architecture independent code. * * To avoid this, we set the Reserved flag here after we cleared * the nomap flag in the line above. */ SetPageReserved(pfn_to_page(start_pfn)); if (ret) pr_warn("%s: Problem encountered in __add_pages() ret=%d\n", __func__, ret); Loading @@ -519,21 +579,32 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) } #ifdef CONFIG_MEMORY_HOTREMOVE static void kernel_physical_mapping_remove(unsigned long start, unsigned long end) { start = (unsigned long)__va(start); end = (unsigned long)__va(end); remove_pagetable(start, end, true); } int arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); struct zone *zone; int ret; int ret = 0; zone = page_zone(pfn_to_page(start_pfn)); zone = page_zone(page); ret = __remove_pages(zone, start_pfn, nr_pages); if (ret) pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n", __func__, ret); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); return ret; } #endif #endif #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ arch/arm64/mm/mmu.c +420 −0 Original line number Diff line number Diff line Loading @@ -605,6 +605,423 @@ void __init paging_init(void) bootmem_init(); } #ifdef CONFIG_MEMORY_HOTPLUG static phys_addr_t pgd_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) BUG(); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); return __pa(ptr); } /* * hotplug_paging() is used by memory hotplug to build new page tables * for hot added memory. */ void hotplug_paging(phys_addr_t start, phys_addr_t size) { struct page *pg; phys_addr_t pgd_phys = pgd_pgtable_alloc(); pgd_t *pgd = pgd_set_fixmap(pgd_phys); memcpy(pgd, swapper_pg_dir, PAGE_SIZE); __create_pgd_mapping(pgd, start, __phys_to_virt(start), size, PAGE_KERNEL, pgd_pgtable_alloc); cpu_replace_ttbr1(__va(pgd_phys)); memcpy(swapper_pg_dir, pgd, PAGE_SIZE); cpu_replace_ttbr1(swapper_pg_dir); pgd_clear_fixmap(); pg = phys_to_page(pgd_phys); pgtable_page_dtor(pg); __free_pages(pg, 0); } #ifdef CONFIG_MEMORY_HOTREMOVE #define PAGE_INUSE 0xFD static void free_pagetable(struct page *page, int order, bool direct) { unsigned long magic; unsigned int nr_pages = 1 << order; /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); magic = (unsigned long)page->lru.next; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); } else { while (nr_pages--) free_reserved_page(page++); } } else { /* * Only direct pagetable allocation (those allocated via * hotplug) call the pgtable_page_ctor; vmemmap pgtable * allocations don't. */ if (direct) pgtable_page_dtor(page); free_pages((unsigned long)page_address(page), order); } } static void free_pte_table(pmd_t *pmd, bool direct) { pte_t *pte_start, *pte; struct page *page; int i; pte_start = (pte_t *) pmd_page_vaddr(*pmd); /* Check if there is no valid entry in the PMD */ for (i = 0; i < PTRS_PER_PTE; i++) { pte = pte_start + i; if (!pte_none(*pte)) return; } page = pmd_page(*pmd); free_pagetable(page, 0, direct); /* * This spin lock could be only taken in _pte_aloc_kernel * in mm/memory.c and nowhere else (for arm64). Not sure if * the function above can be called concurrently. In doubt, * I am living it here for now, but it probably can be removed */ spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } static void free_pmd_table(pud_t *pud, bool direct) { pmd_t *pmd_start, *pmd; struct page *page; int i; pmd_start = (pmd_t *) pud_page_vaddr(*pud); /* Check if there is no valid entry in the PMD */ for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; if (!pmd_none(*pmd)) return; } page = pud_page(*pud); free_pagetable(page, 0, direct); /* * This spin lock could be only taken in _pte_aloc_kernel * in mm/memory.c and nowhere else (for arm64). Not sure if * the function above can be called concurrently. In doubt, * I am living it here for now, but it probably can be removed */ spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } /* * When the PUD is folded on the PGD (three levels of paging), * there's no need to free PUDs */ #if CONFIG_PGTABLE_LEVELS > 3 static void free_pud_table(pgd_t *pgd, bool direct) { pud_t *pud_start, *pud; struct page *page; int i; pud_start = (pud_t *) pgd_page_vaddr(*pgd); /* Check if there is no valid entry in the PUD */ for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; if (!pud_none(*pud)) return; } page = pgd_page(*pgd); free_pagetable(page, 0, direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pgd_clear(pgd); spin_unlock(&init_mm.page_table_lock); } #endif static void remove_pte_table(pte_t *pte, unsigned long addr, unsigned long end, bool direct) { unsigned long next; void *page_addr; for (; addr < end; addr = next, pte++) { next = (addr + PAGE_SIZE) & PAGE_MASK; if (next > end) next = end; if (!pte_present(*pte)) continue; if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { /* * Do not free direct mapping pages since they were * freed when offlining, or simplely not in use. */ if (!direct) free_pagetable(pte_page(*pte), 0, direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); spin_unlock(&init_mm.page_table_lock); } else { /* * If we are here, we are freeing vmemmap pages since * direct mapped memory ranges to be freed are aligned. * * If we are not removing the whole page, it means * other page structs in this page are being used and * we canot remove them. So fill the unused page_structs * with 0xFD, and remove the page when it is wholly * filled with 0xFD. */ memset((void *)addr, PAGE_INUSE, next - addr); page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { free_pagetable(pte_page(*pte), 0, direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); spin_unlock(&init_mm.page_table_lock); } } } // I am adding this flush here in simmetry to the x86 code. // Why do I need to call it here and not in remove_p[mu]d flush_tlb_all(); } static void remove_pmd_table(pmd_t *pmd, unsigned long addr, unsigned long end, bool direct) { unsigned long next; void *page_addr; pte_t *pte; for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) continue; // check if we are using 2MB section mappings if (pmd_sect(*pmd)) { if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { if (!direct) { free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE), direct); } /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } else { /* If here, we are freeing vmemmap pages. */ memset((void *)addr, PAGE_INUSE, next - addr); page_addr = page_address(pmd_page(*pmd)); if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE), direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } } continue; } BUG_ON(!pmd_table(*pmd)); pte = pte_offset_map(pmd, addr); remove_pte_table(pte, addr, next, direct); free_pte_table(pmd, direct); } } static void remove_pud_table(pud_t *pud, unsigned long addr, unsigned long end, bool direct) { unsigned long next; pmd_t *pmd; void *page_addr; for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); if (!pud_present(*pud)) continue; /* * If we are using 4K granules, check if we are using * 1GB section mapping. */ if (pud_sect(*pud)) { if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { if (!direct) { free_pagetable(pud_page(*pud), get_order(PUD_SIZE), direct); } /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } else { /* If here, we are freeing vmemmap pages. */ memset((void *)addr, PAGE_INUSE, next - addr); page_addr = page_address(pud_page(*pud)); if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), get_order(PUD_SIZE), direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } } continue; } BUG_ON(!pud_table(*pud)); pmd = pmd_offset(pud, addr); remove_pmd_table(pmd, addr, next, direct); free_pmd_table(pud, direct); } } void remove_pagetable(unsigned long start, unsigned long end, bool direct) { unsigned long next; unsigned long addr; pgd_t *pgd; pud_t *pud; for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); if (pgd_none(*pgd)) continue; pud = pud_offset(pgd, addr); remove_pud_table(pud, addr, next, direct); /* * When the PUD is folded on the PGD (three levels of paging), * I did already clear the PMD page in free_pmd_table, * and reset the corresponding PGD==PUD entry. */ #if CONFIG_PGTABLE_LEVELS > 3 free_pud_table(pgd, direct); #endif } flush_tlb_all(); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ /* * Check whether a kernel address is valid (derived from arch/x86/). */ Loading Loading @@ -686,6 +1103,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) #endif /* CONFIG_ARM64_64K_PAGES */ void vmemmap_free(unsigned long start, unsigned long end) { #ifdef CONFIG_MEMORY_HOTREMOVE remove_pagetable(start, end, false); #endif } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ Loading Loading
arch/arm64/Kconfig +1 −0 Original line number Diff line number Diff line Loading @@ -653,6 +653,7 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu. config ARCH_ENABLE_MEMORY_HOTPLUG depends on !NUMA def_bool y config ARCH_ENABLE_MEMORY_HOTREMOVE Loading
arch/arm64/include/asm/mmu.h +7 −0 Original line number Diff line number Diff line Loading @@ -35,5 +35,12 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #ifdef CONFIG_MEMORY_HOTPLUG extern void hotplug_paging(phys_addr_t start, phys_addr_t size); #ifdef CONFIG_MEMORY_HOTREMOVE extern void remove_pagetable(unsigned long start, unsigned long end, bool direct); #endif #endif #endif
arch/arm64/include/asm/pgtable.h +15 −0 Original line number Diff line number Diff line Loading @@ -461,6 +461,11 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { return (unsigned long) __va(pmd_page_paddr(pmd)); } /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) Loading Loading @@ -512,6 +517,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; } static inline unsigned long pud_page_vaddr(pud_t pud) { return (unsigned long) __va(pud_page_paddr(pud)); } /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) Loading Loading @@ -564,6 +574,11 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; } static inline unsigned long pgd_page_vaddr(pgd_t pgd) { return (unsigned long) __va(pgd_page_paddr(pgd)); } /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) Loading
arch/arm64/mm/init.c +78 −7 Original line number Diff line number Diff line Loading @@ -503,14 +503,74 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) struct zone *zone; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long end_pfn = start_pfn + nr_pages; unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); int ret; if (end_pfn > max_sparsemem_pfn) { pr_err("end_pfn too big"); return -1; } hotplug_paging(start, size); /* * Mark the first page in the range as unusable. This is needed * because __add_section (within __add_pages) wants pfn_valid * of it to be false, and in arm64 pfn falid is implemented by * just checking at the nomap flag for existing blocks. * * A small trick here is that __add_section() requires only * phys_start_pfn (that is the first pfn of a section) to be * invalid. Regardless of whether it was assumed (by the function * author) that all pfns within a section are either all valid * or all invalid, it allows to avoid looping twice (once here, * second when memblock_clear_nomap() is called) through all * pfns of the section and modify only one pfn. Thanks to that, * further, in __add_zone() only this very first pfn is skipped * and corresponding page is not flagged reserved. Therefore it * is enough to correct this setup only for it. * * When arch_add_memory() returns the walk_memory_range() function * is called and passed with online_memory_block() callback, * which execution finally reaches the memory_block_action() * function, where also only the first pfn of a memory block is * checked to be reserved. Above, it was first pfn of a section, * here it is a block but * (drivers/base/memory.c): * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; * (include/linux/memory.h): * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) * so we can consider block and section equivalently */ memblock_mark_nomap(start, 1<<PAGE_SHIFT); pgdat = NODE_DATA(nid); zone = pgdat->node_zones + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); ret = __add_pages(nid, zone, start_pfn, nr_pages); /* * Make the pages usable after they have been added. * This will make pfn_valid return true */ memblock_clear_nomap(start, 1<<PAGE_SHIFT); /* * This is a hack to avoid having to mix arch specific code * into arch independent code. SetPageReserved is supposed * to be called by __add_zone (within __add_section, within * __add_pages). However, when it is called there, it assumes that * pfn_valid returns true. For the way pfn_valid is implemented * in arm64 (a check on the nomap flag), the only way to make * this evaluate true inside __add_zone is to clear the nomap * flags of blocks in architecture independent code. * * To avoid this, we set the Reserved flag here after we cleared * the nomap flag in the line above. */ SetPageReserved(pfn_to_page(start_pfn)); if (ret) pr_warn("%s: Problem encountered in __add_pages() ret=%d\n", __func__, ret); Loading @@ -519,21 +579,32 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) } #ifdef CONFIG_MEMORY_HOTREMOVE static void kernel_physical_mapping_remove(unsigned long start, unsigned long end) { start = (unsigned long)__va(start); end = (unsigned long)__va(end); remove_pagetable(start, end, true); } int arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); struct zone *zone; int ret; int ret = 0; zone = page_zone(pfn_to_page(start_pfn)); zone = page_zone(page); ret = __remove_pages(zone, start_pfn, nr_pages); if (ret) pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n", __func__, ret); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); return ret; } #endif #endif #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */
arch/arm64/mm/mmu.c +420 −0 Original line number Diff line number Diff line Loading @@ -605,6 +605,423 @@ void __init paging_init(void) bootmem_init(); } #ifdef CONFIG_MEMORY_HOTPLUG static phys_addr_t pgd_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) BUG(); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); return __pa(ptr); } /* * hotplug_paging() is used by memory hotplug to build new page tables * for hot added memory. */ void hotplug_paging(phys_addr_t start, phys_addr_t size) { struct page *pg; phys_addr_t pgd_phys = pgd_pgtable_alloc(); pgd_t *pgd = pgd_set_fixmap(pgd_phys); memcpy(pgd, swapper_pg_dir, PAGE_SIZE); __create_pgd_mapping(pgd, start, __phys_to_virt(start), size, PAGE_KERNEL, pgd_pgtable_alloc); cpu_replace_ttbr1(__va(pgd_phys)); memcpy(swapper_pg_dir, pgd, PAGE_SIZE); cpu_replace_ttbr1(swapper_pg_dir); pgd_clear_fixmap(); pg = phys_to_page(pgd_phys); pgtable_page_dtor(pg); __free_pages(pg, 0); } #ifdef CONFIG_MEMORY_HOTREMOVE #define PAGE_INUSE 0xFD static void free_pagetable(struct page *page, int order, bool direct) { unsigned long magic; unsigned int nr_pages = 1 << order; /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); magic = (unsigned long)page->lru.next; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); } else { while (nr_pages--) free_reserved_page(page++); } } else { /* * Only direct pagetable allocation (those allocated via * hotplug) call the pgtable_page_ctor; vmemmap pgtable * allocations don't. */ if (direct) pgtable_page_dtor(page); free_pages((unsigned long)page_address(page), order); } } static void free_pte_table(pmd_t *pmd, bool direct) { pte_t *pte_start, *pte; struct page *page; int i; pte_start = (pte_t *) pmd_page_vaddr(*pmd); /* Check if there is no valid entry in the PMD */ for (i = 0; i < PTRS_PER_PTE; i++) { pte = pte_start + i; if (!pte_none(*pte)) return; } page = pmd_page(*pmd); free_pagetable(page, 0, direct); /* * This spin lock could be only taken in _pte_aloc_kernel * in mm/memory.c and nowhere else (for arm64). Not sure if * the function above can be called concurrently. In doubt, * I am living it here for now, but it probably can be removed */ spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } static void free_pmd_table(pud_t *pud, bool direct) { pmd_t *pmd_start, *pmd; struct page *page; int i; pmd_start = (pmd_t *) pud_page_vaddr(*pud); /* Check if there is no valid entry in the PMD */ for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; if (!pmd_none(*pmd)) return; } page = pud_page(*pud); free_pagetable(page, 0, direct); /* * This spin lock could be only taken in _pte_aloc_kernel * in mm/memory.c and nowhere else (for arm64). Not sure if * the function above can be called concurrently. In doubt, * I am living it here for now, but it probably can be removed */ spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } /* * When the PUD is folded on the PGD (three levels of paging), * there's no need to free PUDs */ #if CONFIG_PGTABLE_LEVELS > 3 static void free_pud_table(pgd_t *pgd, bool direct) { pud_t *pud_start, *pud; struct page *page; int i; pud_start = (pud_t *) pgd_page_vaddr(*pgd); /* Check if there is no valid entry in the PUD */ for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; if (!pud_none(*pud)) return; } page = pgd_page(*pgd); free_pagetable(page, 0, direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pgd_clear(pgd); spin_unlock(&init_mm.page_table_lock); } #endif static void remove_pte_table(pte_t *pte, unsigned long addr, unsigned long end, bool direct) { unsigned long next; void *page_addr; for (; addr < end; addr = next, pte++) { next = (addr + PAGE_SIZE) & PAGE_MASK; if (next > end) next = end; if (!pte_present(*pte)) continue; if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { /* * Do not free direct mapping pages since they were * freed when offlining, or simplely not in use. */ if (!direct) free_pagetable(pte_page(*pte), 0, direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); spin_unlock(&init_mm.page_table_lock); } else { /* * If we are here, we are freeing vmemmap pages since * direct mapped memory ranges to be freed are aligned. * * If we are not removing the whole page, it means * other page structs in this page are being used and * we canot remove them. So fill the unused page_structs * with 0xFD, and remove the page when it is wholly * filled with 0xFD. */ memset((void *)addr, PAGE_INUSE, next - addr); page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { free_pagetable(pte_page(*pte), 0, direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); spin_unlock(&init_mm.page_table_lock); } } } // I am adding this flush here in simmetry to the x86 code. // Why do I need to call it here and not in remove_p[mu]d flush_tlb_all(); } static void remove_pmd_table(pmd_t *pmd, unsigned long addr, unsigned long end, bool direct) { unsigned long next; void *page_addr; pte_t *pte; for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) continue; // check if we are using 2MB section mappings if (pmd_sect(*pmd)) { if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { if (!direct) { free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE), direct); } /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } else { /* If here, we are freeing vmemmap pages. */ memset((void *)addr, PAGE_INUSE, next - addr); page_addr = page_address(pmd_page(*pmd)); if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE), direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } } continue; } BUG_ON(!pmd_table(*pmd)); pte = pte_offset_map(pmd, addr); remove_pte_table(pte, addr, next, direct); free_pte_table(pmd, direct); } } static void remove_pud_table(pud_t *pud, unsigned long addr, unsigned long end, bool direct) { unsigned long next; pmd_t *pmd; void *page_addr; for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); if (!pud_present(*pud)) continue; /* * If we are using 4K granules, check if we are using * 1GB section mapping. */ if (pud_sect(*pud)) { if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { if (!direct) { free_pagetable(pud_page(*pud), get_order(PUD_SIZE), direct); } /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } else { /* If here, we are freeing vmemmap pages. */ memset((void *)addr, PAGE_INUSE, next - addr); page_addr = page_address(pud_page(*pud)); if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), get_order(PUD_SIZE), direct); /* * This spin lock could be only * taken in _pte_aloc_kernel in * mm/memory.c and nowhere else * (for arm64). Not sure if the * function above can be called * concurrently. In doubt, * I am living it here for now, * but it probably can be removed. */ spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } } continue; } BUG_ON(!pud_table(*pud)); pmd = pmd_offset(pud, addr); remove_pmd_table(pmd, addr, next, direct); free_pmd_table(pud, direct); } } void remove_pagetable(unsigned long start, unsigned long end, bool direct) { unsigned long next; unsigned long addr; pgd_t *pgd; pud_t *pud; for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); if (pgd_none(*pgd)) continue; pud = pud_offset(pgd, addr); remove_pud_table(pud, addr, next, direct); /* * When the PUD is folded on the PGD (three levels of paging), * I did already clear the PMD page in free_pmd_table, * and reset the corresponding PGD==PUD entry. */ #if CONFIG_PGTABLE_LEVELS > 3 free_pud_table(pgd, direct); #endif } flush_tlb_all(); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ /* * Check whether a kernel address is valid (derived from arch/x86/). */ Loading Loading @@ -686,6 +1103,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) #endif /* CONFIG_ARM64_64K_PAGES */ void vmemmap_free(unsigned long start, unsigned long end) { #ifdef CONFIG_MEMORY_HOTREMOVE remove_pagetable(start, end, false); #endif } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ Loading