Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 90c41510 authored by qctecmdr's avatar qctecmdr Committed by Gerrit - the friendly Code Review server
Browse files

Merge "mm/oom-kill: Add debug policy"

parents 862373cb d1b6cf4f
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -123,14 +123,18 @@ extern void dump_tasks(struct mem_cgroup *memcg,
		       const nodemask_t *nodemask);

#ifdef CONFIG_HAVE_USERSPACE_LOW_MEMORY_KILLER
extern bool should_ulmk_retry(void);
extern bool should_ulmk_retry(gfp_t gfp);
extern void ulmk_update_last_kill(void);
extern void ulmk_watchdog_fn(struct timer_list *t);
extern void ulmk_watchdog_pet(struct timer_list *t);
#else
static inline bool should_ulmk_retry(void)
static inline bool should_ulmk_retry(gfp_t gfp)
{
	return false;
}
static inline void ulmk_update_last_kill(void) {}
static inline void ulmk_watchdog_fn(struct timer_list *t) {}
static inline void ulmk_watchdog_pet(struct timer_list *t) {}
#endif

/* sysctls */
+5 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ void psi_memstall_leave(unsigned long *flags);
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);

void psi_emergency_trigger(void);
bool psi_is_trigger_active(void);

#ifdef CONFIG_CGROUPS
int psi_cgroup_alloc(struct cgroup *cgrp);
@@ -46,6 +47,10 @@ static inline void psi_memstall_enter(unsigned long *flags) {}
static inline void psi_memstall_leave(unsigned long *flags) {}

static inline void psi_emergency_trigger(void){}
static inline bool psi_is_trigger_active(void)
{
	return false;
}

#ifdef CONFIG_CGROUPS
static inline int psi_cgroup_alloc(struct cgroup *cgrp)
+2 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include <linux/kref.h>
#include <linux/wait.h>
#include <linux/timer.h>

#ifdef CONFIG_PSI

@@ -126,6 +127,7 @@ struct psi_trigger {

	/* Task that created the trigger */
	char comm[TASK_COMM_LEN];
	struct timer_list wdog_timer;
};

struct psi_group {
+52 −3
Original line number Diff line number Diff line
@@ -575,8 +575,12 @@ static u64 update_triggers(struct psi_group *group, u64 now)
		trace_psi_event(t->state, t->threshold);

		/* Generate an event */
		if (cmpxchg(&t->event, 0, 1) == 0)
		if (cmpxchg(&t->event, 0, 1) == 0) {
			if (!strcmp(t->comm, ULMK_MAGIC))
				mod_timer(&t->wdog_timer, jiffies +
					  nsecs_to_jiffies(2 * t->win.size));
			wake_up_interruptible(&t->event_wait);
		}
		t->last_event_time = now;
	}

@@ -588,10 +592,14 @@ static u64 update_triggers(struct psi_group *group, u64 now)
	return now + group->poll_min_period;
}

/*
 * Allows sending more than one event per window.
 */
void psi_emergency_trigger(void)
{
	struct psi_group *group = &psi_system;
	struct psi_trigger *t;
	u64 now;

	if (static_branch_likely(&psi_disabled))
		return;
@@ -603,18 +611,54 @@ void psi_emergency_trigger(void)
	if (!mutex_trylock(&group->trigger_lock))
		return;

	now = sched_clock();
	list_for_each_entry(t, &group->triggers, node) {
		if (strcmp(t->comm, ULMK_MAGIC))
			continue;
		trace_psi_event(t->state, t->threshold);

		/* Generate an event */
		if (cmpxchg(&t->event, 0, 1) == 0)
		if (cmpxchg(&t->event, 0, 1) == 0) {
			mod_timer(&t->wdog_timer, (unsigned long)t->win.size);
			wake_up_interruptible(&t->event_wait);
		}
		t->last_event_time = now;
	}
	mutex_unlock(&group->trigger_lock);
}

/*
 * Return true if any trigger is active.
 */
bool psi_is_trigger_active(void)
{
	struct psi_group *group = &psi_system;
	struct psi_trigger *t;
	bool trigger_active = false;
	u64 now;

	if (static_branch_likely(&psi_disabled))
		return false;

	/*
	 * In unlikely case that OOM was triggered while adding/
	 * removing triggers.
	 */
	if (!mutex_trylock(&group->trigger_lock))
		return true;

	now = sched_clock();
	list_for_each_entry(t, &group->triggers, node) {
		if (strcmp(t->comm, ULMK_MAGIC))
			continue;

		if (now <= t->last_event_time + t->win.size)
			trigger_active = true;
	}
	mutex_unlock(&group->trigger_lock);
	return trigger_active;
}

/*
 * Schedule polling if it's not already scheduled. It's safe to call even from
 * hotpath because even though kthread_queue_delayed_work takes worker->lock
@@ -1116,6 +1160,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
	init_waitqueue_head(&t->event_wait);
	kref_init(&t->refcount);
	get_task_comm(t->comm, current);
	timer_setup(&t->wdog_timer, ulmk_watchdog_fn, TIMER_DEFERRABLE);

	mutex_lock(&group->trigger_lock);

@@ -1188,6 +1233,7 @@ static void psi_trigger_destroy(struct kref *ref)
		}
	}

	del_timer_sync(&t->wdog_timer);
	mutex_unlock(&group->trigger_lock);

	/*
@@ -1241,8 +1287,11 @@ __poll_t psi_trigger_poll(void **trigger_ptr,

	poll_wait(file, &t->event_wait, wait);

	if (cmpxchg(&t->event, 1, 0) == 1)
	if (cmpxchg(&t->event, 1, 0) == 1) {
		ret |= EPOLLPRI;
		if (!strcmp(t->comm, ULMK_MAGIC))
			ulmk_watchdog_pet(&t->wdog_timer);
	}

	kref_put(&t->refcount, psi_trigger_destroy);

+75 −30
Original line number Diff line number Diff line
@@ -77,8 +77,21 @@ DEFINE_MUTEX(oom_lock);
 */

#ifdef CONFIG_HAVE_USERSPACE_LOW_MEMORY_KILLER

/* The maximum amount of time to loop in should_ulmk_retry() */
#define ULMK_TIMEOUT (20 * HZ)

#define ULMK_DBG_POLICY_TRIGGER (BIT(0))
#define ULMK_DBG_POLICY_WDOG (BIT(1))
#define ULMK_DBG_POLICY_POSITIVE_ADJ (BIT(2))
#define ULMK_DBG_POLICY_ALL (BIT(3) - 1)
static unsigned int ulmk_dbg_policy;
module_param(ulmk_dbg_policy, uint, 0644);

static atomic64_t ulmk_wdog_expired = ATOMIC64_INIT(0);
static atomic64_t ulmk_kill_jiffies = ATOMIC64_INIT(INITIAL_JIFFIES);
static unsigned long psi_emergency_jiffies = INITIAL_JIFFIES;
/* Prevents contention on the mutex_trylock in psi_emergency_jiffies */
static DEFINE_MUTEX(ulmk_retry_lock);

static bool ulmk_kill_possible(void)
@@ -105,50 +118,81 @@ static bool ulmk_kill_possible(void)
}

/*
 * psi_emergency_jiffies represents the last ULMK emergency event.
 * Give ULMK a 2 second window to handle this event.
 * If ULMK has made some progress since then, send another.
 * Repeat as necessary.
 * If CONFIG_DEBUG_PANIC_ON_OOM is enabled, attempt to determine *why*
 * we are in this state.
 * 1) No events were sent by PSI to userspace
 * 2) PSI sent an event to userspace, but userspace was not able to
 * receive the event. Possible causes of this include waiting for a
 * mutex which is held by a process in direct relcaim. Or the userspace
 * component has crashed.
 * 3) Userspace received the event, but decided not to kill anything.
 */
bool should_ulmk_retry(void)
bool should_ulmk_retry(gfp_t gfp_mask)
{
	unsigned long now, last_kill;
	bool ret = false;
	bool ret = true;
	bool wdog_expired, trigger_active;

	struct oom_control oc = {
		.zonelist = node_zonelist(first_memory_node, gfp_mask),
		.nodemask = NULL,
		.memcg = NULL,
		.gfp_mask = gfp_mask,
		.order = 0,
		/* Also causes check_panic_on_oom not to panic */
		.only_positive_adj = true,
	};

	if (!sysctl_panic_on_oom)
		return false;

	if (gfp_mask & __GFP_RETRY_MAYFAIL)
		return false;

	/* Someone else is already checking. */
	if (!mutex_trylock(&ulmk_retry_lock))
		return true;

	mutex_lock(&ulmk_retry_lock);
	now = jiffies;
	last_kill = atomic64_read(&ulmk_kill_jiffies);
	if (time_before(now, psi_emergency_jiffies + 2 * HZ)) {
		ret = true;
		goto out;
	}
	wdog_expired = atomic64_read(&ulmk_wdog_expired);
	trigger_active = psi_is_trigger_active();

	if (time_after_eq(last_kill, psi_emergency_jiffies)) {
	if (time_after(last_kill, psi_emergency_jiffies)) {
		psi_emergency_jiffies = now;
		psi_emergency_trigger();
		ret = true;
		goto out;
	}

	/*
	 * We reached here means no kill have had happened since the last
	 * emergency trigger for 2*HZ window. We can't derive the status
	 * of the low memory killer here. So, before falling back to OOM,
	 * check for any +ve adj tasks left in the system in repeat for
	 * next 20*HZ. Indirectly the below logic also giving 20HZ window
	 * for the first emergency trigger.
	 */
	if (time_after(psi_emergency_jiffies + 20 * HZ, now) &&
	    ulmk_kill_possible()) {
	} else if (time_after(now, psi_emergency_jiffies + ULMK_TIMEOUT)) {
		ret = false;
	} else if (!trigger_active) {
		BUG_ON(ulmk_dbg_policy & ULMK_DBG_POLICY_TRIGGER);
		psi_emergency_trigger();
		ret = true;
		goto out;
	} else if (wdog_expired) {
		mutex_lock(&oom_lock);
		ret = out_of_memory(&oc);
		mutex_unlock(&oom_lock);
		BUG_ON(!ret && ulmk_dbg_policy & ULMK_DBG_POLICY_POSITIVE_ADJ);
	} else if (!ulmk_kill_possible()) {
		BUG_ON(ulmk_dbg_policy & ULMK_DBG_POLICY_POSITIVE_ADJ);
		ret = false;
	}

out:
	mutex_unlock(&ulmk_retry_lock);
	return ret;
}

void ulmk_watchdog_fn(struct timer_list *t)
{
	atomic64_set(&ulmk_wdog_expired, 1);
	BUG_ON(ulmk_dbg_policy & ULMK_DBG_POLICY_WDOG);
}

void ulmk_watchdog_pet(struct timer_list *t)
{
	del_timer_sync(t);
	atomic64_set(&ulmk_wdog_expired, 0);
}

void ulmk_update_last_kill(void)
{
	atomic64_set(&ulmk_kill_jiffies, jiffies);
@@ -1143,7 +1187,7 @@ static void check_panic_on_oom(struct oom_control *oc,
			return;
	}
	/* Do not panic for oom kills triggered by sysrq */
	if (is_sysrq_oom(oc))
	if (is_sysrq_oom(oc) || oc->only_positive_adj)
		return;
	dump_header(oc, NULL);
	panic("Out of memory: %s panic_on_oom is enabled\n",
@@ -1244,7 +1288,8 @@ bool out_of_memory(struct oom_control *oc)
		 * system level, we cannot survive this and will enter
		 * an endless loop in the allocator. Bail out now.
		 */
		if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
		if (!is_sysrq_oom(oc) && !is_memcg_oom(oc) &&
		    !oc->only_positive_adj)
			panic("System is deadlocked on memory\n");
	}
	if (oc->chosen && oc->chosen != (void *)-1UL)
Loading