Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7ba5c9e authored by Pavel Emelianov's avatar Pavel Emelianov Committed by Linus Torvalds
Browse files

Memory controller: OOM handling



Out of memory handling for cgroups over their limit. A task from the
cgroup over limit is chosen using the existing OOM logic and killed.

TODO:
1. As discussed in the OLS BOF session, consider implementing a user
space policy for OOM handling.

[akpm@linux-foundation.org: fix build due to oom-killer changes]
Signed-off-by: default avatarPavel Emelianov <xemul@openvz.org>
Signed-off-by: default avatarBalbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0eea1030
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -39,6 +39,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
					int mode, struct zone *z,
					int mode, struct zone *z,
					struct mem_cgroup *mem_cont,
					struct mem_cgroup *mem_cont,
					int active);
					int active);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);


static inline void mem_cgroup_uncharge_page(struct page *page)
static inline void mem_cgroup_uncharge_page(struct page *page)
{
{
+1 −0
Original line number Original line Diff line number Diff line
@@ -329,6 +329,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
		}
		}


		css_put(&mem->css);
		css_put(&mem->css);
		mem_cgroup_out_of_memory(mem, GFP_KERNEL);
		goto free_pc;
		goto free_pc;
	}
	}


+39 −4
Original line number Original line Diff line number Diff line
@@ -25,6 +25,7 @@
#include <linux/cpuset.h>
#include <linux/cpuset.h>
#include <linux/module.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/notifier.h>
#include <linux/memcontrol.h>


int sysctl_panic_on_oom;
int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_kill_allocating_task;
@@ -50,7 +51,8 @@ static DEFINE_SPINLOCK(zone_scan_mutex);
 *    of least surprise ... (be careful when you change it)
 *    of least surprise ... (be careful when you change it)
 */
 */


unsigned long badness(struct task_struct *p, unsigned long uptime)
unsigned long badness(struct task_struct *p, unsigned long uptime,
			struct mem_cgroup *mem)
{
{
	unsigned long points, cpu_time, run_time, s;
	unsigned long points, cpu_time, run_time, s;
	struct mm_struct *mm;
	struct mm_struct *mm;
@@ -63,6 +65,13 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
		return 0;
		return 0;
	}
	}


#ifdef CONFIG_CGROUP_MEM_CONT
	if (mem != NULL && mm->mem_cgroup != mem) {
		task_unlock(p);
		return 0;
	}
#endif

	/*
	/*
	 * The memory size of the process is the basis for the badness.
	 * The memory size of the process is the basis for the badness.
	 */
	 */
@@ -193,7 +202,8 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 *
 *
 * (not docbooked, we don't want this one cluttering up the manual)
 * (not docbooked, we don't want this one cluttering up the manual)
 */
 */
static struct task_struct *select_bad_process(unsigned long *ppoints)
static struct task_struct *select_bad_process(unsigned long *ppoints,
						struct mem_cgroup *mem)
{
{
	struct task_struct *g, *p;
	struct task_struct *g, *p;
	struct task_struct *chosen = NULL;
	struct task_struct *chosen = NULL;
@@ -247,7 +257,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
		if (p->oomkilladj == OOM_DISABLE)
		if (p->oomkilladj == OOM_DISABLE)
			continue;
			continue;


		points = badness(p, uptime.tv_sec);
		points = badness(p, uptime.tv_sec, mem);
		if (points > *ppoints || !chosen) {
		if (points > *ppoints || !chosen) {
			chosen = p;
			chosen = p;
			*ppoints = points;
			*ppoints = points;
@@ -368,6 +378,31 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
	return oom_kill_task(p);
	return oom_kill_task(p);
}
}


#ifdef CONFIG_CGROUP_MEM_CONT
void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
{
	unsigned long points = 0;
	struct task_struct *p;

	cgroup_lock();
	rcu_read_lock();
retry:
	p = select_bad_process(&points, mem);
	if (PTR_ERR(p) == -1UL)
		goto out;

	if (!p)
		p = current;

	if (oom_kill_process(p, gfp_mask, 0, points,
				"Memory cgroup out of memory"))
		goto retry;
out:
	rcu_read_unlock();
	cgroup_unlock();
}
#endif

static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
static BLOCKING_NOTIFIER_HEAD(oom_notify_list);


int register_oom_notifier(struct notifier_block *nb)
int register_oom_notifier(struct notifier_block *nb)
@@ -484,7 +519,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
		 * Rambo mode: Shoot down a process and hope it solves whatever
		 * Rambo mode: Shoot down a process and hope it solves whatever
		 * issues we may have.
		 * issues we may have.
		 */
		 */
		p = select_bad_process(&points);
		p = select_bad_process(&points, NULL);


		if (PTR_ERR(p) == -1UL)
		if (PTR_ERR(p) == -1UL)
			goto out;
			goto out;