Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b4e98d9a authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds
Browse files

mm: account pud page tables

On a machine with 5-level paging support a process can allocate
significant amount of memory and stay unnoticed by oom-killer and memory
cgroup.  The trick is to allocate a lot of PUD page tables.  We don't
account PUD page tables, only PMD and PTE.

We already addressed the same issue for PMD page tables, see commit
dc6c9a35 ("mm: account pmd page tables to the process").
Introduction of 5-level paging brings the same issue for PUD page
tables.

The patch expands accounting to PUD level.

[kirill.shutemov@linux.intel.com: s/pmd_t/pud_t/]
  Link: http://lkml.kernel.org/r/20171004074305.x35eh5u7ybbt5kar@black.fi.intel.com
[heiko.carstens@de.ibm.com: s390/mm: fix pud table accounting]
  Link: http://lkml.kernel.org/r/20171103090551.18231-1-heiko.carstens@de.ibm.com
Link: http://lkml.kernel.org/r/20171002080427.3320-1-kirill.shutemov@linux.intel.com


Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 7d6c4dfa
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -629,10 +629,10 @@ oom_dump_tasks

Enables a system-wide task dump (excluding kernel threads) to be produced
when the kernel performs an OOM-killing and includes such information as
pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj
score, and name.  This is helpful to determine why the OOM killer was
invoked, to identify the rogue task that caused it, and to determine why
the OOM killer chose the task it did to kill.
pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, nr_puds, swapents,
oom_score_adj score, and name.  This is helpful to determine why the OOM
killer was invoked, to identify the rogue task that caused it, and to
determine why the OOM killer chose the task it did to kill.

If this is set to zero, this information is suppressed.  On very
large systems with thousands of tasks it may not be feasible to dump
+1 −0
Original line number Diff line number Diff line
@@ -433,6 +433,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
	pud = pud_offset(pgd, start);
	pgd_clear(pgd);
	pud_free_tlb(tlb, pud, start);
	mm_dec_nr_puds(tlb->mm);
}

/*
+3 −1
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@ static inline int init_new_context(struct task_struct *tsk,
		mm->context.asce_limit = STACK_TOP_MAX;
		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
				   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
		/* pgd_alloc() did not account this pud */
		mm_inc_nr_puds(mm);
		break;
	case -PAGE_SIZE:
		/* forked 5-level task, set new asce with new_mm->pgd */
@@ -59,7 +61,7 @@ static inline int init_new_context(struct task_struct *tsk,
		/* forked 2-level compat task, set new asce with new mm->pgd */
		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
		/* pgd_alloc() did not increase mm->nr_pmds */
		/* pgd_alloc() did not account this pmd */
		mm_inc_nr_pmds(mm);
	}
	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+1 −0
Original line number Diff line number Diff line
@@ -472,6 +472,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
	pud = pud_offset(pgd, start);
	pgd_clear(pgd);
	pud_free_tlb(tlb, pud, start);
	mm_dec_nr_puds(tlb->mm);
}

void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+4 −1
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@

void task_mem(struct seq_file *m, struct mm_struct *mm)
{
	unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
	unsigned long text, lib, swap, ptes, pmds, puds, anon, file, shmem;
	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;

	anon = get_mm_counter(mm, MM_ANONPAGES);
@@ -52,6 +52,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
	swap = get_mm_counter(mm, MM_SWAPENTS);
	ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
	seq_printf(m,
		"VmPeak:\t%8lu kB\n"
		"VmSize:\t%8lu kB\n"
@@ -68,6 +69,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
		"VmLib:\t%8lu kB\n"
		"VmPTE:\t%8lu kB\n"
		"VmPMD:\t%8lu kB\n"
		"VmPUD:\t%8lu kB\n"
		"VmSwap:\t%8lu kB\n",
		hiwater_vm << (PAGE_SHIFT-10),
		total_vm << (PAGE_SHIFT-10),
@@ -82,6 +84,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
		ptes >> 10,
		pmds >> 10,
		puds >> 10,
		swap << (PAGE_SHIFT-10));
	hugetlb_report_usage(m, mm);
}
Loading