Loading arch/x86/mm/init_64.c +175 −126 Original line number Original line Diff line number Diff line Loading @@ -67,22 +67,26 @@ void show_mem(void) { { long i, total = 0, reserved = 0; long i, total = 0, reserved = 0; long shared = 0, cached = 0; long shared = 0, cached = 0; pg_data_t *pgdat; struct page *page; struct page *page; pg_data_t *pgdat; printk(KERN_INFO "Mem-info:\n"); printk(KERN_INFO "Mem-info:\n"); show_free_areas(); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages << (PAGE_SHIFT-10)); for_each_online_pgdat(pgdat) { for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { /* this loop can take a while with 256 GB and 4k pages /* so update the NMI watchdog */ * This loop can take a while with 256 GB and if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { * 4k pages so defer the NMI watchdog: */ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) touch_nmi_watchdog(); touch_nmi_watchdog(); } if (!pfn_valid(pgdat->node_start_pfn + i)) if (!pfn_valid(pgdat->node_start_pfn + i)) continue; continue; page = pfn_to_page(pgdat->node_start_pfn + i); page = pfn_to_page(pgdat->node_start_pfn + i); total++; total++; if (PageReserved(page)) if (PageReserved(page)) Loading @@ -104,19 +108,24 @@ int after_bootmem; static __init void *spp_getpage(void) static __init void *spp_getpage(void) { { void *ptr; void *ptr; if (after_bootmem) if (after_bootmem) ptr = (void *) get_zeroed_page(GFP_ATOMIC); ptr = (void *) get_zeroed_page(GFP_ATOMIC); else else ptr = alloc_bootmem_pages(PAGE_SIZE); ptr = alloc_bootmem_pages(PAGE_SIZE); if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem ? "after bootmem" : ""); } Dprintk("spp_getpage %p\n", ptr); Dprintk("spp_getpage %p\n", ptr); return ptr; return ptr; } } static __init void set_pte_phys(unsigned long vaddr, static __init void unsigned long phys, pgprot_t prot) set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { { pgd_t *pgd; pgd_t *pgd; pud_t *pud; pud_t *pud; Loading @@ -135,7 +144,8 @@ static __init void set_pte_phys(unsigned long vaddr, pmd = (pmd_t *) spp_getpage(); pmd = (pmd_t *) spp_getpage(); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); if (pmd != pmd_offset(pud, 0)) { if (pmd != pmd_offset(pud, 0)) { printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud, 0)); return; return; } } } } Loading Loading @@ -187,6 +197,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) if (after_bootmem) { if (after_bootmem) { adr = (void *)get_zeroed_page(GFP_ATOMIC); adr = (void *)get_zeroed_page(GFP_ATOMIC); *phys = __pa(adr); *phys = __pa(adr); return adr; return adr; } } Loading @@ -201,7 +212,6 @@ static __meminit void *alloc_low_page(unsigned long *phys) static __meminit void unmap_low_page(void *adr) static __meminit void unmap_low_page(void *adr) { { if (after_bootmem) if (after_bootmem) return; return; Loading @@ -211,33 +221,39 @@ static __meminit void unmap_low_page(void *adr) /* Must run before zap_low_mappings */ /* Must run before zap_low_mappings */ __meminit void *early_ioremap(unsigned long addr, unsigned long size) __meminit void *early_ioremap(unsigned long addr, unsigned long size) { { unsigned long vaddr; pmd_t *pmd, *last_pmd; pmd_t *pmd, *last_pmd; unsigned long vaddr; int i, pmds; int i, pmds; pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; vaddr = __START_KERNEL_map; vaddr = __START_KERNEL_map; pmd = level2_kernel_pgt; pmd = level2_kernel_pgt; last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { for (i = 0; i < pmds; i++) { for (i = 0; i < pmds; i++) { if (pmd_present(pmd[i])) if (pmd_present(pmd[i])) goto next; goto continue_outer_loop; } } vaddr += addr & ~PMD_MASK; vaddr += addr & ~PMD_MASK; addr &= PMD_MASK; addr &= PMD_MASK; for (i = 0; i < pmds; i++, addr += PMD_SIZE) for (i = 0; i < pmds; i++, addr += PMD_SIZE) set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); __flush_tlb_all(); __flush_tlb_all(); return (void *)vaddr; return (void *)vaddr; next: continue_outer_loop: ; ; } } printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); return NULL; return NULL; } } /* To avoid virtual aliases later */ /* * To avoid virtual aliases later: */ __meminit void early_iounmap(void *addr, unsigned long size) __meminit void early_iounmap(void *addr, unsigned long size) { { unsigned long vaddr; unsigned long vaddr; Loading @@ -247,8 +263,10 @@ __meminit void early_iounmap(void *addr, unsigned long size) vaddr = (unsigned long)addr; vaddr = (unsigned long)addr; pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; pmd = level2_kernel_pgt + pmd_index(vaddr); pmd = level2_kernel_pgt + pmd_index(vaddr); for (i = 0; i < pmds; i++) for (i = 0; i < pmds; i++) pmd_clear(pmd + i); pmd_clear(pmd + i); __flush_tlb_all(); __flush_tlb_all(); } } Loading @@ -262,9 +280,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) pmd_t *pmd = pmd_page + pmd_index(address); pmd_t *pmd = pmd_page + pmd_index(address); if (address >= end) { if (address >= end) { if (!after_bootmem) if (!after_bootmem) { for (; i < PTRS_PER_PMD; i++, pmd++) for (; i < PTRS_PER_PMD; i++, pmd++) set_pmd(pmd, __pmd(0)); set_pmd(pmd, __pmd(0)); } break; break; } } Loading @@ -287,11 +306,11 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) __flush_tlb_all(); __flush_tlb_all(); } } static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { { int i = pud_index(addr); int i = pud_index(addr); for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { unsigned long pmd_phys; unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pud_t *pud = pud_page + pud_index(addr); Loading @@ -300,7 +319,8 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne if (addr >= end) if (addr >= end) break; break; if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) { if (!after_bootmem && !e820_any_mapped(addr, addr+PUD_SIZE, 0)) { set_pud(pud, __pud(0)); set_pud(pud, __pud(0)); continue; continue; } } Loading @@ -311,10 +331,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne } } pmd = alloc_low_page(&pmd_phys); pmd = alloc_low_page(&pmd_phys); spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); phys_pmd_init(pmd, addr, end); phys_pmd_init(pmd, addr, end); spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock); unmap_low_page(pmd); unmap_low_page(pmd); } } __flush_tlb_all(); __flush_tlb_all(); Loading @@ -329,9 +351,11 @@ static void __init find_early_table_space(unsigned long end) tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + round_up(pmds * sizeof(pmd_t), PAGE_SIZE); round_up(pmds * sizeof(pmd_t), PAGE_SIZE); /* RED-PEN putting page tables only on node 0 could /* cause a hotspot and fill up ZONE_DMA. The page tables * RED-PEN putting page tables only on node 0 could need roughly 0.5KB per GB. */ * cause a hotspot and fill up ZONE_DMA. The page tables * need roughly 0.5KB per GB. */ start = 0x8000; start = 0x8000; table_start = find_e820_area(start, end, tables); table_start = find_e820_area(start, end, tables); if (table_start == -1UL) if (table_start == -1UL) Loading @@ -345,9 +369,11 @@ static void __init find_early_table_space(unsigned long end) (table_start << PAGE_SHIFT) + tables); (table_start << PAGE_SHIFT) + tables); } } /* Setup the direct mapping of the physical memory at PAGE_OFFSET. /* This runs before bootmem is initialized and gets pages directly from the * Setup the direct mapping of the physical memory at PAGE_OFFSET. physical memory. To access them they are temporarily mapped. */ * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) void __init_refok init_memory_mapping(unsigned long start, unsigned long end) { { unsigned long next; unsigned long next; Loading @@ -356,9 +382,10 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) /* /* * Find space for the kernel direct mapping tables. * Find space for the kernel direct mapping tables. * Later we should allocate these tables in the local node of the memory * * mapped. Unfortunately this is done currently before the nodes are * Later we should allocate these tables in the local node of the * discovered. * memory mapped. Unfortunately this is done currently before the * nodes are discovered. */ */ if (!after_bootmem) if (!after_bootmem) find_early_table_space(end); find_early_table_space(end); Loading @@ -367,8 +394,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) end = (unsigned long)__va(end); end = (unsigned long)__va(end); for (; start < end; start = next) { for (; start < end; start = next) { unsigned long pud_phys; pgd_t *pgd = pgd_offset_k(start); pgd_t *pgd = pgd_offset_k(start); unsigned long pud_phys; pud_t *pud; pud_t *pud; if (after_bootmem) if (after_bootmem) Loading Loading @@ -396,6 +423,7 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) void __init paging_init(void) void __init paging_init(void) { { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; Loading @@ -407,10 +435,12 @@ void __init paging_init(void) } } #endif #endif /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches /* from the CPU leading to inconsistent cache lines. address and size * Unmap a kernel mapping if it exists. This is useful to avoid must be aligned to 2MB boundaries. * prefetches from the CPU leading to inconsistent cache lines. Does nothing when the mapping doesn't exist. */ * address and size must be aligned to 2MB boundaries. * Does nothing when the mapping doesn't exist. */ void __init clear_kernel_mapping(unsigned long address, unsigned long size) void __init clear_kernel_mapping(unsigned long address, unsigned long size) { { unsigned long end = address + size; unsigned long end = address + size; Loading @@ -422,18 +452,25 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) pgd_t *pgd = pgd_offset_k(address); pgd_t *pgd = pgd_offset_k(address); pud_t *pud; pud_t *pud; pmd_t *pmd; pmd_t *pmd; if (pgd_none(*pgd)) if (pgd_none(*pgd)) continue; continue; pud = pud_offset(pgd, address); pud = pud_offset(pgd, address); if (pud_none(*pud)) if (pud_none(*pud)) continue; continue; pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address); if (!pmd || pmd_none(*pmd)) if (!pmd || pmd_none(*pmd)) continue; continue; if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { /* Could handle this, but it should not happen currently. */ if (!(pmd_val(*pmd) & _PAGE_PSE)) { printk(KERN_ERR /* "clear_kernel_mapping: mapping has been split. will leak memory\n"); * Could handle this, but it should not happen * currently: */ printk(KERN_ERR "clear_kernel_mapping: " "mapping has been split. will leak memory\n"); pmd_ERROR(*pmd); pmd_ERROR(*pmd); } } set_pmd(pmd, __pmd(0)); set_pmd(pmd, __pmd(0)); Loading Loading @@ -466,15 +503,12 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; int ret; init_memory_mapping(start, (start + size -1)); init_memory_mapping(start, start + size-1); ret = __add_pages(zone, start_pfn, nr_pages); ret = __add_pages(zone, start_pfn, nr_pages); if (ret) if (ret) goto error; return ret; error: printk("%s: Problem encountered in __add_pages!\n", __func__); printk("%s: Problem encountered in __add_pages!\n", __func__); return ret; return ret; } } EXPORT_SYMBOL_GPL(arch_add_memory); EXPORT_SYMBOL_GPL(arch_add_memory); Loading @@ -489,8 +523,8 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_vsyscall; kcore_modules, kcore_vsyscall; void __init mem_init(void) void __init mem_init(void) { { Loading Loading @@ -518,7 +552,6 @@ void __init mem_init(void) #endif #endif reservedpages = end_pfn - totalram_pages - reservedpages = end_pfn - totalram_pages - absent_pages_in_range(0, end_pfn); absent_pages_in_range(0, end_pfn); after_bootmem = 1; after_bootmem = 1; codesize = (unsigned long) &_etext - (unsigned long) &_text; codesize = (unsigned long) &_etext - (unsigned long) &_text; Loading @@ -534,7 +567,8 @@ void __init mem_init(void) kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START); VSYSCALL_END - VSYSCALL_START); printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", printk("Memory: %luk/%luk available (%ldk kernel code, " "%ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), codesize >> 10, codesize >> 10, Loading @@ -561,6 +595,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) set_memory_np(begin, (end - begin) >> PAGE_SHIFT); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else #else printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (addr = begin; addr < end; addr += PAGE_SIZE) { for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); init_page_count(virt_to_page(addr)); Loading Loading @@ -633,11 +668,15 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) int nid = phys_to_nid(phys); int nid = phys_to_nid(phys); #endif #endif unsigned long pfn = phys >> PAGE_SHIFT; unsigned long pfn = phys >> PAGE_SHIFT; if (pfn >= end_pfn) { if (pfn >= end_pfn) { /* This can happen with kdump kernels when accessing firmware /* tables. */ * This can happen with kdump kernels when accessing * firmware tables: */ if (pfn < end_pfn_map) if (pfn < end_pfn_map) return; return; printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", phys, len); phys, len); return; return; Loading Loading @@ -677,22 +716,25 @@ int kern_addr_valid(unsigned long addr) pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) if (pmd_none(*pmd)) return 0; return 0; if (pmd_large(*pmd)) if (pmd_large(*pmd)) return pfn_valid(pmd_pfn(*pmd)); return pfn_valid(pmd_pfn(*pmd)); pte = pte_offset_kernel(pmd, addr); pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) if (pte_none(*pte)) return 0; return 0; return pfn_valid(pte_pfn(*pte)); return pfn_valid(pte_pfn(*pte)); } } /* A pseudo VMA to allow ptrace access for the vsyscall page. This only /* covers the 64bit vsyscall page now. 32bit has a real VMA now and does * A pseudo VMA to allow ptrace access for the vsyscall page. This only not need special handling anymore. */ * covers the 64bit vsyscall page now. 32bit has a real VMA now and does * not need special handling anymore: */ static struct vm_area_struct gate_vma = { static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, .vm_start = VSYSCALL_START, .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), .vm_page_prot = PAGE_READONLY_EXEC, .vm_page_prot = PAGE_READONLY_EXEC, .vm_flags = VM_READ | VM_EXEC .vm_flags = VM_READ | VM_EXEC }; }; Loading @@ -709,14 +751,17 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) int in_gate_area(struct task_struct *task, unsigned long addr) { { struct vm_area_struct *vma = get_gate_vma(task); struct vm_area_struct *vma = get_gate_vma(task); if (!vma) if (!vma) return 0; return 0; return (addr >= vma->vm_start) && (addr < vma->vm_end); return (addr >= vma->vm_start) && (addr < vma->vm_end); } } /* Use this when you have no reliable task/vma, typically from interrupt /* * Use this when you have no reliable task/vma, typically from interrupt * context. It is less reliable than using the task's vma and may give * context. It is less reliable than using the task's vma and may give * false positives. * false positives: */ */ int in_gate_area_no_task(unsigned long addr) int in_gate_area_no_task(unsigned long addr) { { Loading @@ -736,8 +781,8 @@ const char *arch_vma_name(struct vm_area_struct *vma) /* /* * Initialise the sparsemem vmemmap using huge-pages at the PMD level. * Initialise the sparsemem vmemmap using huge-pages at the PMD level. */ */ int __meminit vmemmap_populate(struct page *start_page, int __meminit unsigned long size, int node) vmemmap_populate(struct page *start_page, unsigned long size, int node) { { unsigned long addr = (unsigned long)start_page; unsigned long addr = (unsigned long)start_page; unsigned long end = (unsigned long)(start_page + size); unsigned long end = (unsigned long)(start_page + size); Loading @@ -752,6 +797,7 @@ int __meminit vmemmap_populate(struct page *start_page, pgd = vmemmap_pgd_populate(addr, node); pgd = vmemmap_pgd_populate(addr, node); if (!pgd) if (!pgd) return -ENOMEM; return -ENOMEM; pud = vmemmap_pud_populate(pgd, addr, node); pud = vmemmap_pud_populate(pgd, addr, node); if (!pud) if (!pud) return -ENOMEM; return -ENOMEM; Loading @@ -759,19 +805,22 @@ int __meminit vmemmap_populate(struct page *start_page, pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { if (pmd_none(*pmd)) { pte_t entry; pte_t entry; void *p = vmemmap_alloc_block(PMD_SIZE, node); void *p; p = vmemmap_alloc_block(PMD_SIZE, node); if (!p) if (!p) return -ENOMEM; return -ENOMEM; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE); entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE); set_pmd(pmd, __pmd(pte_val(entry))); set_pmd(pmd, __pmd(pte_val(entry))); printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", addr, addr + PMD_SIZE - 1, p, node); addr, addr + PMD_SIZE - 1, p, node); } else } else { vmemmap_verify((pte_t *)pmd, node, addr, next); vmemmap_verify((pte_t *)pmd, node, addr, next); } } } return 0; return 0; } } #endif #endif Loading
arch/x86/mm/init_64.c +175 −126 Original line number Original line Diff line number Diff line Loading @@ -67,22 +67,26 @@ void show_mem(void) { { long i, total = 0, reserved = 0; long i, total = 0, reserved = 0; long shared = 0, cached = 0; long shared = 0, cached = 0; pg_data_t *pgdat; struct page *page; struct page *page; pg_data_t *pgdat; printk(KERN_INFO "Mem-info:\n"); printk(KERN_INFO "Mem-info:\n"); show_free_areas(); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages << (PAGE_SHIFT-10)); for_each_online_pgdat(pgdat) { for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { /* this loop can take a while with 256 GB and 4k pages /* so update the NMI watchdog */ * This loop can take a while with 256 GB and if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { * 4k pages so defer the NMI watchdog: */ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) touch_nmi_watchdog(); touch_nmi_watchdog(); } if (!pfn_valid(pgdat->node_start_pfn + i)) if (!pfn_valid(pgdat->node_start_pfn + i)) continue; continue; page = pfn_to_page(pgdat->node_start_pfn + i); page = pfn_to_page(pgdat->node_start_pfn + i); total++; total++; if (PageReserved(page)) if (PageReserved(page)) Loading @@ -104,19 +108,24 @@ int after_bootmem; static __init void *spp_getpage(void) static __init void *spp_getpage(void) { { void *ptr; void *ptr; if (after_bootmem) if (after_bootmem) ptr = (void *) get_zeroed_page(GFP_ATOMIC); ptr = (void *) get_zeroed_page(GFP_ATOMIC); else else ptr = alloc_bootmem_pages(PAGE_SIZE); ptr = alloc_bootmem_pages(PAGE_SIZE); if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem ? "after bootmem" : ""); } Dprintk("spp_getpage %p\n", ptr); Dprintk("spp_getpage %p\n", ptr); return ptr; return ptr; } } static __init void set_pte_phys(unsigned long vaddr, static __init void unsigned long phys, pgprot_t prot) set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { { pgd_t *pgd; pgd_t *pgd; pud_t *pud; pud_t *pud; Loading @@ -135,7 +144,8 @@ static __init void set_pte_phys(unsigned long vaddr, pmd = (pmd_t *) spp_getpage(); pmd = (pmd_t *) spp_getpage(); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); if (pmd != pmd_offset(pud, 0)) { if (pmd != pmd_offset(pud, 0)) { printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud, 0)); return; return; } } } } Loading Loading @@ -187,6 +197,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) if (after_bootmem) { if (after_bootmem) { adr = (void *)get_zeroed_page(GFP_ATOMIC); adr = (void *)get_zeroed_page(GFP_ATOMIC); *phys = __pa(adr); *phys = __pa(adr); return adr; return adr; } } Loading @@ -201,7 +212,6 @@ static __meminit void *alloc_low_page(unsigned long *phys) static __meminit void unmap_low_page(void *adr) static __meminit void unmap_low_page(void *adr) { { if (after_bootmem) if (after_bootmem) return; return; Loading @@ -211,33 +221,39 @@ static __meminit void unmap_low_page(void *adr) /* Must run before zap_low_mappings */ /* Must run before zap_low_mappings */ __meminit void *early_ioremap(unsigned long addr, unsigned long size) __meminit void *early_ioremap(unsigned long addr, unsigned long size) { { unsigned long vaddr; pmd_t *pmd, *last_pmd; pmd_t *pmd, *last_pmd; unsigned long vaddr; int i, pmds; int i, pmds; pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; vaddr = __START_KERNEL_map; vaddr = __START_KERNEL_map; pmd = level2_kernel_pgt; pmd = level2_kernel_pgt; last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { for (i = 0; i < pmds; i++) { for (i = 0; i < pmds; i++) { if (pmd_present(pmd[i])) if (pmd_present(pmd[i])) goto next; goto continue_outer_loop; } } vaddr += addr & ~PMD_MASK; vaddr += addr & ~PMD_MASK; addr &= PMD_MASK; addr &= PMD_MASK; for (i = 0; i < pmds; i++, addr += PMD_SIZE) for (i = 0; i < pmds; i++, addr += PMD_SIZE) set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); __flush_tlb_all(); __flush_tlb_all(); return (void *)vaddr; return (void *)vaddr; next: continue_outer_loop: ; ; } } printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); return NULL; return NULL; } } /* To avoid virtual aliases later */ /* * To avoid virtual aliases later: */ __meminit void early_iounmap(void *addr, unsigned long size) __meminit void early_iounmap(void *addr, unsigned long size) { { unsigned long vaddr; unsigned long vaddr; Loading @@ -247,8 +263,10 @@ __meminit void early_iounmap(void *addr, unsigned long size) vaddr = (unsigned long)addr; vaddr = (unsigned long)addr; pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; pmd = level2_kernel_pgt + pmd_index(vaddr); pmd = level2_kernel_pgt + pmd_index(vaddr); for (i = 0; i < pmds; i++) for (i = 0; i < pmds; i++) pmd_clear(pmd + i); pmd_clear(pmd + i); __flush_tlb_all(); __flush_tlb_all(); } } Loading @@ -262,9 +280,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) pmd_t *pmd = pmd_page + pmd_index(address); pmd_t *pmd = pmd_page + pmd_index(address); if (address >= end) { if (address >= end) { if (!after_bootmem) if (!after_bootmem) { for (; i < PTRS_PER_PMD; i++, pmd++) for (; i < PTRS_PER_PMD; i++, pmd++) set_pmd(pmd, __pmd(0)); set_pmd(pmd, __pmd(0)); } break; break; } } Loading @@ -287,11 +306,11 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) __flush_tlb_all(); __flush_tlb_all(); } } static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { { int i = pud_index(addr); int i = pud_index(addr); for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { unsigned long pmd_phys; unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pud_t *pud = pud_page + pud_index(addr); Loading @@ -300,7 +319,8 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne if (addr >= end) if (addr >= end) break; break; if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) { if (!after_bootmem && !e820_any_mapped(addr, addr+PUD_SIZE, 0)) { set_pud(pud, __pud(0)); set_pud(pud, __pud(0)); continue; continue; } } Loading @@ -311,10 +331,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne } } pmd = alloc_low_page(&pmd_phys); pmd = alloc_low_page(&pmd_phys); spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); phys_pmd_init(pmd, addr, end); phys_pmd_init(pmd, addr, end); spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock); unmap_low_page(pmd); unmap_low_page(pmd); } } __flush_tlb_all(); __flush_tlb_all(); Loading @@ -329,9 +351,11 @@ static void __init find_early_table_space(unsigned long end) tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + round_up(pmds * sizeof(pmd_t), PAGE_SIZE); round_up(pmds * sizeof(pmd_t), PAGE_SIZE); /* RED-PEN putting page tables only on node 0 could /* cause a hotspot and fill up ZONE_DMA. The page tables * RED-PEN putting page tables only on node 0 could need roughly 0.5KB per GB. */ * cause a hotspot and fill up ZONE_DMA. The page tables * need roughly 0.5KB per GB. */ start = 0x8000; start = 0x8000; table_start = find_e820_area(start, end, tables); table_start = find_e820_area(start, end, tables); if (table_start == -1UL) if (table_start == -1UL) Loading @@ -345,9 +369,11 @@ static void __init find_early_table_space(unsigned long end) (table_start << PAGE_SHIFT) + tables); (table_start << PAGE_SHIFT) + tables); } } /* Setup the direct mapping of the physical memory at PAGE_OFFSET. /* This runs before bootmem is initialized and gets pages directly from the * Setup the direct mapping of the physical memory at PAGE_OFFSET. physical memory. To access them they are temporarily mapped. */ * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) void __init_refok init_memory_mapping(unsigned long start, unsigned long end) { { unsigned long next; unsigned long next; Loading @@ -356,9 +382,10 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) /* /* * Find space for the kernel direct mapping tables. * Find space for the kernel direct mapping tables. * Later we should allocate these tables in the local node of the memory * * mapped. Unfortunately this is done currently before the nodes are * Later we should allocate these tables in the local node of the * discovered. * memory mapped. Unfortunately this is done currently before the * nodes are discovered. */ */ if (!after_bootmem) if (!after_bootmem) find_early_table_space(end); find_early_table_space(end); Loading @@ -367,8 +394,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) end = (unsigned long)__va(end); end = (unsigned long)__va(end); for (; start < end; start = next) { for (; start < end; start = next) { unsigned long pud_phys; pgd_t *pgd = pgd_offset_k(start); pgd_t *pgd = pgd_offset_k(start); unsigned long pud_phys; pud_t *pud; pud_t *pud; if (after_bootmem) if (after_bootmem) Loading Loading @@ -396,6 +423,7 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end) void __init paging_init(void) void __init paging_init(void) { { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; Loading @@ -407,10 +435,12 @@ void __init paging_init(void) } } #endif #endif /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches /* from the CPU leading to inconsistent cache lines. address and size * Unmap a kernel mapping if it exists. This is useful to avoid must be aligned to 2MB boundaries. * prefetches from the CPU leading to inconsistent cache lines. Does nothing when the mapping doesn't exist. */ * address and size must be aligned to 2MB boundaries. * Does nothing when the mapping doesn't exist. */ void __init clear_kernel_mapping(unsigned long address, unsigned long size) void __init clear_kernel_mapping(unsigned long address, unsigned long size) { { unsigned long end = address + size; unsigned long end = address + size; Loading @@ -422,18 +452,25 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) pgd_t *pgd = pgd_offset_k(address); pgd_t *pgd = pgd_offset_k(address); pud_t *pud; pud_t *pud; pmd_t *pmd; pmd_t *pmd; if (pgd_none(*pgd)) if (pgd_none(*pgd)) continue; continue; pud = pud_offset(pgd, address); pud = pud_offset(pgd, address); if (pud_none(*pud)) if (pud_none(*pud)) continue; continue; pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address); if (!pmd || pmd_none(*pmd)) if (!pmd || pmd_none(*pmd)) continue; continue; if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { /* Could handle this, but it should not happen currently. */ if (!(pmd_val(*pmd) & _PAGE_PSE)) { printk(KERN_ERR /* "clear_kernel_mapping: mapping has been split. will leak memory\n"); * Could handle this, but it should not happen * currently: */ printk(KERN_ERR "clear_kernel_mapping: " "mapping has been split. will leak memory\n"); pmd_ERROR(*pmd); pmd_ERROR(*pmd); } } set_pmd(pmd, __pmd(0)); set_pmd(pmd, __pmd(0)); Loading Loading @@ -466,15 +503,12 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; int ret; init_memory_mapping(start, (start + size -1)); init_memory_mapping(start, start + size-1); ret = __add_pages(zone, start_pfn, nr_pages); ret = __add_pages(zone, start_pfn, nr_pages); if (ret) if (ret) goto error; return ret; error: printk("%s: Problem encountered in __add_pages!\n", __func__); printk("%s: Problem encountered in __add_pages!\n", __func__); return ret; return ret; } } EXPORT_SYMBOL_GPL(arch_add_memory); EXPORT_SYMBOL_GPL(arch_add_memory); Loading @@ -489,8 +523,8 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_vsyscall; kcore_modules, kcore_vsyscall; void __init mem_init(void) void __init mem_init(void) { { Loading Loading @@ -518,7 +552,6 @@ void __init mem_init(void) #endif #endif reservedpages = end_pfn - totalram_pages - reservedpages = end_pfn - totalram_pages - absent_pages_in_range(0, end_pfn); absent_pages_in_range(0, end_pfn); after_bootmem = 1; after_bootmem = 1; codesize = (unsigned long) &_etext - (unsigned long) &_text; codesize = (unsigned long) &_etext - (unsigned long) &_text; Loading @@ -534,7 +567,8 @@ void __init mem_init(void) kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START); VSYSCALL_END - VSYSCALL_START); printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", printk("Memory: %luk/%luk available (%ldk kernel code, " "%ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), codesize >> 10, codesize >> 10, Loading @@ -561,6 +595,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) set_memory_np(begin, (end - begin) >> PAGE_SHIFT); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else #else printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (addr = begin; addr < end; addr += PAGE_SIZE) { for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); init_page_count(virt_to_page(addr)); Loading Loading @@ -633,11 +668,15 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) int nid = phys_to_nid(phys); int nid = phys_to_nid(phys); #endif #endif unsigned long pfn = phys >> PAGE_SHIFT; unsigned long pfn = phys >> PAGE_SHIFT; if (pfn >= end_pfn) { if (pfn >= end_pfn) { /* This can happen with kdump kernels when accessing firmware /* tables. */ * This can happen with kdump kernels when accessing * firmware tables: */ if (pfn < end_pfn_map) if (pfn < end_pfn_map) return; return; printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", phys, len); phys, len); return; return; Loading Loading @@ -677,22 +716,25 @@ int kern_addr_valid(unsigned long addr) pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) if (pmd_none(*pmd)) return 0; return 0; if (pmd_large(*pmd)) if (pmd_large(*pmd)) return pfn_valid(pmd_pfn(*pmd)); return pfn_valid(pmd_pfn(*pmd)); pte = pte_offset_kernel(pmd, addr); pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) if (pte_none(*pte)) return 0; return 0; return pfn_valid(pte_pfn(*pte)); return pfn_valid(pte_pfn(*pte)); } } /* A pseudo VMA to allow ptrace access for the vsyscall page. This only /* covers the 64bit vsyscall page now. 32bit has a real VMA now and does * A pseudo VMA to allow ptrace access for the vsyscall page. This only not need special handling anymore. */ * covers the 64bit vsyscall page now. 32bit has a real VMA now and does * not need special handling anymore: */ static struct vm_area_struct gate_vma = { static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, .vm_start = VSYSCALL_START, .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), .vm_page_prot = PAGE_READONLY_EXEC, .vm_page_prot = PAGE_READONLY_EXEC, .vm_flags = VM_READ | VM_EXEC .vm_flags = VM_READ | VM_EXEC }; }; Loading @@ -709,14 +751,17 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) int in_gate_area(struct task_struct *task, unsigned long addr) { { struct vm_area_struct *vma = get_gate_vma(task); struct vm_area_struct *vma = get_gate_vma(task); if (!vma) if (!vma) return 0; return 0; return (addr >= vma->vm_start) && (addr < vma->vm_end); return (addr >= vma->vm_start) && (addr < vma->vm_end); } } /* Use this when you have no reliable task/vma, typically from interrupt /* * Use this when you have no reliable task/vma, typically from interrupt * context. It is less reliable than using the task's vma and may give * context. It is less reliable than using the task's vma and may give * false positives. * false positives: */ */ int in_gate_area_no_task(unsigned long addr) int in_gate_area_no_task(unsigned long addr) { { Loading @@ -736,8 +781,8 @@ const char *arch_vma_name(struct vm_area_struct *vma) /* /* * Initialise the sparsemem vmemmap using huge-pages at the PMD level. * Initialise the sparsemem vmemmap using huge-pages at the PMD level. */ */ int __meminit vmemmap_populate(struct page *start_page, int __meminit unsigned long size, int node) vmemmap_populate(struct page *start_page, unsigned long size, int node) { { unsigned long addr = (unsigned long)start_page; unsigned long addr = (unsigned long)start_page; unsigned long end = (unsigned long)(start_page + size); unsigned long end = (unsigned long)(start_page + size); Loading @@ -752,6 +797,7 @@ int __meminit vmemmap_populate(struct page *start_page, pgd = vmemmap_pgd_populate(addr, node); pgd = vmemmap_pgd_populate(addr, node); if (!pgd) if (!pgd) return -ENOMEM; return -ENOMEM; pud = vmemmap_pud_populate(pgd, addr, node); pud = vmemmap_pud_populate(pgd, addr, node); if (!pud) if (!pud) return -ENOMEM; return -ENOMEM; Loading @@ -759,19 +805,22 @@ int __meminit vmemmap_populate(struct page *start_page, pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { if (pmd_none(*pmd)) { pte_t entry; pte_t entry; void *p = vmemmap_alloc_block(PMD_SIZE, node); void *p; p = vmemmap_alloc_block(PMD_SIZE, node); if (!p) if (!p) return -ENOMEM; return -ENOMEM; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE); entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE); set_pmd(pmd, __pmd(pte_val(entry))); set_pmd(pmd, __pmd(pte_val(entry))); printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", addr, addr + PMD_SIZE - 1, p, node); addr, addr + PMD_SIZE - 1, p, node); } else } else { vmemmap_verify((pte_t *)pmd, node, addr, next); vmemmap_verify((pte_t *)pmd, node, addr, next); } } } return 0; return 0; } } #endif #endif