Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 365e9c87 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

[PATCH] mm: update_hiwaters just in time



update_mem_hiwater has attracted various criticisms, in particular from those
concerned with mm scalability.  Originally it was called whenever rss or
total_vm got raised.  Then many of those callsites were replaced by a timer
tick call from account_system_time.  Now Frank van Maarseveen reports that to
be found inadequate.  How about this?  Works for Frank.

Replace update_mem_hiwater, a poor combination of two unrelated ops, by macros
update_hiwater_rss and update_hiwater_vm.  Don't attempt to keep
mm->hiwater_rss up to date at timer tick, nor every time we raise rss (usually
by 1): those are hot paths.  Do the opposite, update only when about to lower
rss (usually by many), or just before final accounting in do_exit.  Handle
mm->hiwater_vm in the same way, though it's much less of an issue.  Demand
that whoever collects these hiwater statistics do the work of taking the
maximum with rss or total_vm.

And there has been no collector of these hiwater statistics in the tree.  The
new convention needs an example, so match Frank's usage by adding a VmPeak
line above VmSize to /proc/<pid>/status, and also a VmHWM line above VmRSS
(High-Water-Mark or High-Water-Memory).

There was a particular anomaly during mremap move, that hiwater_vm might be
captured too high.  A fleeting such anomaly remains, but it's quickly
corrected now, whereas before it would stick.

What locking?  None: if the app is racy then these statistics will be racy,
it's not worth any overhead to make them exact.  But whenever it suits,
hiwater_vm is updated under exclusive mmap_sem, and hiwater_rss under
page_table_lock (for now) or with preemption disabled (later on): without
going to any trouble, minimize the time between reading current values and
updating, to minimize those occasions when a racing thread bumps a count up
and back down in between.

Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 861f2fb8
Loading
Loading
Loading
Loading
+0 −1
Original line number Original line Diff line number Diff line
@@ -1490,7 +1490,6 @@ int compat_do_execve(char * filename,
		/* execve success */
		/* execve success */
		security_bprm_free(bprm);
		security_bprm_free(bprm);
		acct_update_integrals(current);
		acct_update_integrals(current);
		update_mem_hiwater(current);
		kfree(bprm);
		kfree(bprm);
		return retval;
		return retval;
	}
	}
+0 −1
Original line number Original line Diff line number Diff line
@@ -1207,7 +1207,6 @@ int do_execve(char * filename,
		/* execve success */
		/* execve success */
		security_bprm_free(bprm);
		security_bprm_free(bprm);
		acct_update_integrals(current);
		acct_update_integrals(current);
		update_mem_hiwater(current);
		kfree(bprm);
		kfree(bprm);
		return retval;
		return retval;
	}
	}
+21 −2
Original line number Original line Diff line number Diff line
@@ -14,22 +14,41 @@
char *task_mem(struct mm_struct *mm, char *buffer)
char *task_mem(struct mm_struct *mm, char *buffer)
{
{
	unsigned long data, text, lib;
	unsigned long data, text, lib;
	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;

	/*
	 * Note: to minimize their overhead, mm maintains hiwater_vm and
	 * hiwater_rss only when about to *lower* total_vm or rss.  Any
	 * collector of these hiwater stats must therefore get total_vm
	 * and rss too, which will usually be the higher.  Barriers? not
	 * worth the effort, such snapshots can always be inconsistent.
	 */
	hiwater_vm = total_vm = mm->total_vm;
	if (hiwater_vm < mm->hiwater_vm)
		hiwater_vm = mm->hiwater_vm;
	hiwater_rss = total_rss = get_mm_rss(mm);
	if (hiwater_rss < mm->hiwater_rss)
		hiwater_rss = mm->hiwater_rss;


	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
	buffer += sprintf(buffer,
	buffer += sprintf(buffer,
		"VmPeak:\t%8lu kB\n"
		"VmSize:\t%8lu kB\n"
		"VmSize:\t%8lu kB\n"
		"VmLck:\t%8lu kB\n"
		"VmLck:\t%8lu kB\n"
		"VmHWM:\t%8lu kB\n"
		"VmRSS:\t%8lu kB\n"
		"VmRSS:\t%8lu kB\n"
		"VmData:\t%8lu kB\n"
		"VmData:\t%8lu kB\n"
		"VmStk:\t%8lu kB\n"
		"VmStk:\t%8lu kB\n"
		"VmExe:\t%8lu kB\n"
		"VmExe:\t%8lu kB\n"
		"VmLib:\t%8lu kB\n"
		"VmLib:\t%8lu kB\n"
		"VmPTE:\t%8lu kB\n",
		"VmPTE:\t%8lu kB\n",
		(mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
		hiwater_vm << (PAGE_SHIFT-10),
		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
		mm->locked_vm << (PAGE_SHIFT-10),
		mm->locked_vm << (PAGE_SHIFT-10),
		get_mm_rss(mm) << (PAGE_SHIFT-10),
		hiwater_rss << (PAGE_SHIFT-10),
		total_rss << (PAGE_SHIFT-10),
		data << (PAGE_SHIFT-10),
		data << (PAGE_SHIFT-10),
		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
+0 −3
Original line number Original line Diff line number Diff line
@@ -938,9 +938,6 @@ static inline void vm_stat_account(struct mm_struct *mm,
}
}
#endif /* CONFIG_PROC_FS */
#endif /* CONFIG_PROC_FS */


/* update per process rss and vm hiwater data */
extern void update_mem_hiwater(struct task_struct *tsk);

#ifndef CONFIG_DEBUG_PAGEALLOC
#ifndef CONFIG_DEBUG_PAGEALLOC
static inline void
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
kernel_map_pages(struct page *page, int numpages, int enable)
+10 −0
Original line number Original line Diff line number Diff line
@@ -256,6 +256,16 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define dec_mm_counter(mm, member) (mm)->_##member--
#define dec_mm_counter(mm, member) (mm)->_##member--
#define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss)
#define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss)


#define update_hiwater_rss(mm)	do {			\
	unsigned long _rss = get_mm_rss(mm);		\
	if ((mm)->hiwater_rss < _rss)			\
		(mm)->hiwater_rss = _rss;		\
} while (0)
#define update_hiwater_vm(mm)	do {			\
	if ((mm)->hiwater_vm < (mm)->total_vm)		\
		(mm)->hiwater_vm = (mm)->total_vm;	\
} while (0)

typedef unsigned long mm_counter_t;
typedef unsigned long mm_counter_t;


struct mm_struct {
struct mm_struct {
Loading