Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d29a8d3a authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "lowmemorykiller: adapt to vmpressure"

parents 4438ea36 b99475dc
Loading
Loading
Loading
Loading
+104 −1
Original line number Diff line number Diff line
@@ -45,6 +45,10 @@
#include <linux/swap.h>
#include <linux/fs.h>
#include <linux/cpuset.h>
#include <linux/vmpressure.h>

#define CREATE_TRACE_POINTS
#include <trace/events/almk.h>

#ifdef CONFIG_HIGHMEM
#define _ZONE ZONE_HIGHMEM
@@ -86,6 +90,96 @@ static unsigned long lowmem_count(struct shrinker *s,
		global_page_state(NR_INACTIVE_FILE);
}

static atomic_t shift_adj = ATOMIC_INIT(0);
static short adj_max_shift = 353;

/* User knob to enable/disable adaptive lmk feature */
static int enable_adaptive_lmk;
module_param_named(enable_adaptive_lmk, enable_adaptive_lmk, int,
	S_IRUGO | S_IWUSR);

/*
 * This parameter controls the behaviour of LMK when vmpressure is in
 * the range of 90-94. Adaptive lmk triggers based on number of file
 * pages wrt vmpressure_file_min, when vmpressure is in the range of
 * 90-94. Usually this is a pseudo minfree value, higher than the
 * highest configured value in minfree array.
 */
static int vmpressure_file_min;
module_param_named(vmpressure_file_min, vmpressure_file_min, int,
	S_IRUGO | S_IWUSR);

enum {
	VMPRESSURE_NO_ADJUST = 0,
	VMPRESSURE_ADJUST_ENCROACH,
	VMPRESSURE_ADJUST_NORMAL,
};

int adjust_minadj(short *min_score_adj)
{
	int ret = VMPRESSURE_NO_ADJUST;

	if (!enable_adaptive_lmk)
		return 0;

	if (atomic_read(&shift_adj) &&
		(*min_score_adj > adj_max_shift)) {
		if (*min_score_adj == OOM_SCORE_ADJ_MAX + 1)
			ret = VMPRESSURE_ADJUST_ENCROACH;
		else
			ret = VMPRESSURE_ADJUST_NORMAL;
		*min_score_adj = adj_max_shift;
	}
	atomic_set(&shift_adj, 0);

	return ret;
}

static int lmk_vmpressure_notifier(struct notifier_block *nb,
			unsigned long action, void *data)
{
	int other_free, other_file;
	unsigned long pressure = action;
	int array_size = ARRAY_SIZE(lowmem_adj);

	if (!enable_adaptive_lmk)
		return 0;

	if (pressure >= 95) {
		other_file = global_page_state(NR_FILE_PAGES) -
			global_page_state(NR_SHMEM) -
			total_swapcache_pages();
		other_free = global_page_state(NR_FREE_PAGES);

		atomic_set(&shift_adj, 1);
		trace_almk_vmpressure(pressure, other_free, other_file);
	} else if (pressure >= 90) {
		if (lowmem_adj_size < array_size)
			array_size = lowmem_adj_size;
		if (lowmem_minfree_size < array_size)
			array_size = lowmem_minfree_size;

		other_file = global_page_state(NR_FILE_PAGES) -
			global_page_state(NR_SHMEM) -
			total_swapcache_pages();

		other_free = global_page_state(NR_FREE_PAGES);

		if ((other_free < lowmem_minfree[array_size - 1]) &&
			(other_file < vmpressure_file_min)) {
				atomic_set(&shift_adj, 1);
				trace_almk_vmpressure(pressure, other_free,
					other_file);
		}
	}

	return 0;
}

static struct notifier_block lmk_vmpr_nb = {
	.notifier_call = lmk_vmpressure_notifier,
};

static int test_task_flag(struct task_struct *p, int flag)
{
	struct task_struct *t = p;
@@ -279,6 +373,7 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
	unsigned long rem = 0;
	int tasksize;
	int i;
	int ret = 0;
	short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
	int minfree = 0;
	int selected_tasksize = 0;
@@ -314,11 +409,14 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
		}
	}

	ret = adjust_minadj(&min_score_adj);

	lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
			sc->nr_to_scan, sc->gfp_mask, other_free,
			other_file, min_score_adj);

	if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
		trace_almk_shrink(0, ret, other_free, other_file, 0);
		lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
			     sc->nr_to_scan, sc->gfp_mask);
		mutex_unlock(&scan_mutex);
@@ -414,8 +512,12 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
		rcu_read_unlock();
		/* give the system time to free up the memory */
		msleep_interruptible(20);
	} else
		trace_almk_shrink(selected_tasksize, ret,
			other_free, other_file, selected_oom_score_adj);
	} else {
		trace_almk_shrink(1, ret, other_free, other_file, 0);
		rcu_read_unlock();
	}

	lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
@@ -432,6 +534,7 @@ static struct shrinker lowmem_shrinker = {
static int __init lowmem_init(void)
{
	register_shrinker(&lowmem_shrinker);
	vmpressure_notifier_register(&lmk_vmpr_nb);
	return 0;
}

+7 −5
Original line number Diff line number Diff line
@@ -25,11 +25,13 @@ struct vmpressure {

struct mem_cgroup;

#ifdef CONFIG_MEMCG
extern int vmpressure_notifier_register(struct notifier_block *nb);
extern int vmpressure_notifier_unregister(struct notifier_block *nb);
extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
		       unsigned long scanned, unsigned long reclaimed);
extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);

#ifdef CONFIG_MEMCG
extern void vmpressure_init(struct vmpressure *vmpr);
extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
@@ -40,9 +42,9 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
					struct eventfd_ctx *eventfd);
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
			      unsigned long scanned, unsigned long reclaimed) {}
static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
				   int prio) {}
static inline struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
{
	return NULL;
}
#endif /* CONFIG_MEMCG */
#endif /* __LINUX_VMPRESSURE_H */
+84 −0
Original line number Diff line number Diff line
/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM almk

#if !defined(_TRACE_EVENT_ALMK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_EVENT_ALMK_H

#include <linux/tracepoint.h>
#include <linux/types.h>

TRACE_EVENT(almk_vmpressure,

	TP_PROTO(unsigned long pressure,
		int other_free,
		int other_file),

	TP_ARGS(pressure, other_free, other_file),

	TP_STRUCT__entry(
		__field(unsigned long, pressure)
		__field(int, other_free)
		__field(int, other_file)
	),

	TP_fast_assign(
		__entry->pressure	= pressure;
		__entry->other_free	= other_free;
		__entry->other_file	= other_file;
	),

	TP_printk("%lu, %d, %d",
			__entry->pressure, __entry->other_free,
			__entry->other_file)
);

TRACE_EVENT(almk_shrink,

	TP_PROTO(int tsize,
		 int vmp,
		 int other_free,
		 int other_file,
		 short adj),

	TP_ARGS(tsize, vmp, other_free, other_file, adj),

	TP_STRUCT__entry(
		__field(int, tsize)
		__field(int, vmp)
		__field(int, other_free)
		__field(int, other_file)
		__field(short, adj)
	),

	TP_fast_assign(
		__entry->tsize		= tsize;
		__entry->vmp		= vmp;
		__entry->other_free     = other_free;
		__entry->other_file     = other_file;
		__entry->adj		= adj;
	),

	TP_printk("%d, %d, %d, %d, %d",
		__entry->tsize,
		__entry->vmp,
		__entry->other_free,
		__entry->other_file,
		__entry->adj)
);

#endif

#include <trace/define_trace.h>
+3 −2
Original line number Diff line number Diff line
@@ -21,7 +21,8 @@ obj-y := filemap.o mempool.o oom_kill.o \
			   mm_init.o mmu_context.o percpu.o slab_common.o \
			   compaction.o vmacache.o \
			   interval_tree.o list_lru.o workingset.o \
			   iov_iter.o debug.o $(mmu-y) showmem.o
			   iov_iter.o debug.o $(mmu-y) showmem.o \
			   vmpressure.o

obj-y += init-mm.o

@@ -59,7 +60,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
+100 −17
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/printk.h>
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/vmpressure.h>

/*
@@ -49,6 +51,24 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
static const unsigned int vmpressure_level_med = 60;
static const unsigned int vmpressure_level_critical = 95;

static struct vmpressure global_vmpressure;
BLOCKING_NOTIFIER_HEAD(vmpressure_notifier);

int vmpressure_notifier_register(struct notifier_block *nb)
{
	return blocking_notifier_chain_register(&vmpressure_notifier, nb);
}

int vmpressure_notifier_unregister(struct notifier_block *nb)
{
	return blocking_notifier_chain_unregister(&vmpressure_notifier, nb);
}

void vmpressure_notify(unsigned long pressure)
{
	blocking_notifier_call_chain(&vmpressure_notifier, pressure, NULL);
}

/*
 * When there are too little pages left to scan, vmpressure() may miss the
 * critical pressure as number of pages will be less than "window size".
@@ -75,6 +95,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work)
	return container_of(work, struct vmpressure, work);
}

#ifdef CONFIG_MEMCG
static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
{
	struct cgroup_subsys_state *css = vmpressure_to_css(vmpr);
@@ -85,6 +106,12 @@ static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
		return NULL;
	return memcg_to_vmpressure(memcg);
}
#else
static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
{
	return NULL;
}
#endif

enum vmpressure_levels {
	VMPRESSURE_LOW = 0,
@@ -108,7 +135,7 @@ static enum vmpressure_levels vmpressure_level(unsigned long pressure)
	return VMPRESSURE_LOW;
}

static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
static unsigned long vmpressure_calc_pressure(unsigned long scanned,
						    unsigned long reclaimed)
{
	unsigned long scale = scanned + reclaimed;
@@ -127,7 +154,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
	pr_debug("%s: %3lu  (s: %lu  r: %lu)\n", __func__, pressure,
		 scanned, reclaimed);

	return vmpressure_level(pressure);
	return pressure;
}

struct vmpressure_event {
@@ -141,9 +168,11 @@ static bool vmpressure_event(struct vmpressure *vmpr,
{
	struct vmpressure_event *ev;
	enum vmpressure_levels level;
	unsigned long pressure;
	bool signalled = false;

	level = vmpressure_calc_level(scanned, reclaimed);
	pressure = vmpressure_calc_pressure(scanned, reclaimed);
	level = vmpressure_level(pressure);

	mutex_lock(&vmpr->events_lock);

@@ -195,24 +224,13 @@ static void vmpressure_work_fn(struct work_struct *work)
	} while ((vmpr = vmpressure_parent(vmpr)));
}

/**
 * vmpressure() - Account memory pressure through scanned/reclaimed ratio
 * @gfp:	reclaimer's gfp mask
 * @memcg:	cgroup memory controller handle
 * @scanned:	number of pages scanned
 * @reclaimed:	number of pages reclaimed
 *
 * This function should be called from the vmscan reclaim path to account
 * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
 * pressure index is then further refined and averaged over time.
 *
 * This function does not return any value.
 */
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg,
		unsigned long scanned, unsigned long reclaimed)
{
	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);

	BUG_ON(!vmpr);

	/*
	 * Here we only want to account pressure that userland is able to
	 * help us with. For example, suppose that DMA zone is under
@@ -249,6 +267,60 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
	schedule_work(&vmpr->work);
}

void vmpressure_global(gfp_t gfp, unsigned long scanned,
		unsigned long reclaimed)
{
	struct vmpressure *vmpr = &global_vmpressure;
	unsigned long pressure;

	if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
		return;

	if (!scanned)
		return;

	spin_lock(&vmpr->sr_lock);
	vmpr->scanned += scanned;
	vmpr->reclaimed += reclaimed;
	scanned = vmpr->scanned;
	reclaimed = vmpr->reclaimed;
	spin_unlock(&vmpr->sr_lock);

	if (scanned < vmpressure_win)
		return;

	spin_lock(&vmpr->sr_lock);
	vmpr->scanned = 0;
	vmpr->reclaimed = 0;
	spin_unlock(&vmpr->sr_lock);

	pressure = vmpressure_calc_pressure(scanned, reclaimed);
	vmpressure_notify(pressure);
}

/**
 * vmpressure() - Account memory pressure through scanned/reclaimed ratio
 * @gfp:	reclaimer's gfp mask
 * @memcg:	cgroup memory controller handle
 * @scanned:	number of pages scanned
 * @reclaimed:	number of pages reclaimed
 *
 * This function should be called from the vmscan reclaim path to account
 * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
 * pressure index is then further refined and averaged over time.
 *
 * This function does not return any value.
 */
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
		unsigned long scanned, unsigned long reclaimed)
{
	if (!memcg)
		vmpressure_global(gfp, scanned, reclaimed);

	if (IS_ENABLED(CONFIG_MEMCG))
		vmpressure_memcg(gfp, memcg, scanned, reclaimed);
}

/**
 * vmpressure_prio() - Account memory pressure through reclaimer priority level
 * @gfp:	reclaimer's gfp mask
@@ -300,6 +372,8 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
	struct vmpressure_event *ev;
	int level;

	BUG_ON(!vmpr);

	for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) {
		if (!strcmp(vmpressure_str_levels[level], args))
			break;
@@ -339,6 +413,8 @@ void vmpressure_unregister_event(struct mem_cgroup *memcg,
	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
	struct vmpressure_event *ev;

	BUG_ON(!vmpr);

	mutex_lock(&vmpr->events_lock);
	list_for_each_entry(ev, &vmpr->events, node) {
		if (ev->efd != eventfd)
@@ -380,3 +456,10 @@ void vmpressure_cleanup(struct vmpressure *vmpr)
	 */
	flush_work(&vmpr->work);
}

int vmpressure_global_init(void)
{
	vmpressure_init(&global_vmpressure);
	return 0;
}
late_initcall(vmpressure_global_init);