Loading Documentation/filesystems/proc.txt +20 −0 Original line number Diff line number Diff line Loading @@ -138,6 +138,7 @@ Table 1-1: Process specific entries in /proc maps Memory maps to executables and library files (2.4) mem Memory held by this process root Link to the root directory of this process reclaim Reclaim pages in this process stat Process status statm Process memory status information status Process status in human readable form Loading Loading @@ -528,6 +529,25 @@ current value: Any other value written to /proc/PID/clear_refs will have no effect. The file /proc/PID/reclaim is used to reclaim pages in this process. To reclaim file-backed pages, > echo file > /proc/PID/reclaim To reclaim anonymous pages, > echo anon > /proc/PID/reclaim To reclaim all pages, > echo all > /proc/PID/reclaim Also, you can specify address range of process so part of address space will be reclaimed. The format is following as > echo addr size-byte > /proc/PID/reclaim NOTE: addr should be page-aligned. Below is example which try to reclaim 2M from 0x100000. > echo 0x100000 2M > /proc/PID/reclaim The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. Loading arch/arm/include/asm/thread_info.h +1 −0 Original line number Diff line number Diff line Loading @@ -148,6 +148,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 #define TIF_MM_RELEASED 21 /* task MM has been released */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) Loading arch/um/include/asm/thread_info.h +1 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ #define TIF_MM_RELEASED 10 /* task MM has been released */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) Loading drivers/block/zram/zram_drv.c +1 −0 Original line number Diff line number Diff line Loading @@ -1271,6 +1271,7 @@ static int zram_add(void) zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); __set_bit(QUEUE_FLAG_FAST, &zram->disk->queue->queue_flags); /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ set_capacity(zram->disk, 0); /* zram devices sort of resembles non-rotational disks */ Loading drivers/staging/android/lowmemorykiller.c +406 −25 Original line number Diff line number Diff line Loading @@ -42,6 +42,21 @@ #include <linux/rcupdate.h> #include <linux/profile.h> #include <linux/notifier.h> #include <linux/mutex.h> #include <linux/delay.h> #include <linux/swap.h> #include <linux/fs.h> #include <linux/cpuset.h> #include <linux/vmpressure.h> #define CREATE_TRACE_POINTS #include <trace/events/almk.h> #ifdef CONFIG_HIGHMEM #define _ZONE ZONE_HIGHMEM #else #define _ZONE ZONE_NORMAL #endif #define CREATE_TRACE_POINTS #include "trace/lowmemorykiller.h" Loading @@ -63,6 +78,7 @@ static int lowmem_minfree[6] = { }; static int lowmem_minfree_size = 4; static int lmk_fast_run = 1; static unsigned long lowmem_deathpending_timeout; Loading @@ -81,6 +97,314 @@ static unsigned long lowmem_count(struct shrinker *s, global_node_page_state(NR_INACTIVE_FILE); } static atomic_t shift_adj = ATOMIC_INIT(0); static short adj_max_shift = 353; module_param_named(adj_max_shift, adj_max_shift, short, 0644); /* User knob to enable/disable adaptive lmk feature */ static int enable_adaptive_lmk; module_param_named(enable_adaptive_lmk, enable_adaptive_lmk, int, 0644); /* * This parameter controls the behaviour of LMK when vmpressure is in * the range of 90-94. Adaptive lmk triggers based on number of file * pages wrt vmpressure_file_min, when vmpressure is in the range of * 90-94. Usually this is a pseudo minfree value, higher than the * highest configured value in minfree array. */ static int vmpressure_file_min; module_param_named(vmpressure_file_min, vmpressure_file_min, int, 0644); enum { VMPRESSURE_NO_ADJUST = 0, VMPRESSURE_ADJUST_ENCROACH, VMPRESSURE_ADJUST_NORMAL, }; static int adjust_minadj(short *min_score_adj) { int ret = VMPRESSURE_NO_ADJUST; if (!enable_adaptive_lmk) return 0; if (atomic_read(&shift_adj) && (*min_score_adj > adj_max_shift)) { if (*min_score_adj == OOM_SCORE_ADJ_MAX + 1) ret = VMPRESSURE_ADJUST_ENCROACH; else ret = VMPRESSURE_ADJUST_NORMAL; *min_score_adj = adj_max_shift; } atomic_set(&shift_adj, 0); return ret; } static int lmk_vmpressure_notifier(struct notifier_block *nb, unsigned long action, void *data) { int other_free, other_file; unsigned long pressure = action; int array_size = ARRAY_SIZE(lowmem_adj); if (!enable_adaptive_lmk) return 0; if (pressure >= 95) { other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); other_free = global_page_state(NR_FREE_PAGES); atomic_set(&shift_adj, 1); trace_almk_vmpressure(pressure, other_free, other_file); } else if (pressure >= 90) { if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); other_free = global_page_state(NR_FREE_PAGES); if ((other_free < lowmem_minfree[array_size - 1]) && (other_file < vmpressure_file_min)) { atomic_set(&shift_adj, 1); trace_almk_vmpressure(pressure, other_free, other_file); } } else if (atomic_read(&shift_adj)) { other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); other_free = global_page_state(NR_FREE_PAGES); /* * shift_adj would have been set by a previous invocation * of notifier, which is not followed by a lowmem_shrink yet. * Since vmpressure has improved, reset shift_adj to avoid * false adaptive LMK trigger. */ trace_almk_vmpressure(pressure, other_free, other_file); atomic_set(&shift_adj, 0); } return 0; } static struct notifier_block lmk_vmpr_nb = { .notifier_call = lmk_vmpressure_notifier, }; static int test_task_flag(struct task_struct *p, int flag) { struct task_struct *t; for_each_thread(p, t) { task_lock(t); if (test_tsk_thread_flag(t, flag)) { task_unlock(t); return 1; } task_unlock(t); } return 0; } static int test_task_lmk_waiting(struct task_struct *p) { struct task_struct *t; for_each_thread(p, t) { task_lock(t); if (task_lmk_waiting(t)) { task_unlock(t); return 1; } task_unlock(t); } return 0; } static DEFINE_MUTEX(scan_mutex); static int can_use_cma_pages(gfp_t gfp_mask) { int can_use = 0; int mtype = gfpflags_to_migratetype(gfp_mask); int i = 0; int *mtype_fallbacks = get_migratetype_fallbacks(mtype); if (is_migrate_cma(mtype)) { can_use = 1; } else { for (i = 0;; i++) { int fallbacktype = mtype_fallbacks[i]; if (is_migrate_cma(fallbacktype)) { can_use = 1; break; } if (fallbacktype == MIGRATE_TYPES) break; } } return can_use; } void tune_lmk_zone_param(struct zonelist *zonelist, int classzone_idx, int *other_free, int *other_file, int use_cma_pages) { struct zone *zone; struct zoneref *zoneref; int zone_idx; for_each_zone_zonelist(zone, zoneref, zonelist, MAX_NR_ZONES) { zone_idx = zonelist_zone_idx(zoneref); if (zone_idx == ZONE_MOVABLE) { if (!use_cma_pages && other_free) *other_free -= zone_page_state(zone, NR_FREE_CMA_PAGES); continue; } if (zone_idx > classzone_idx) { if (other_free != NULL) *other_free -= zone_page_state(zone, NR_FREE_PAGES); if (other_file != NULL) *other_file -= zone_page_state(zone, NR_ZONE_INACTIVE_FILE) + zone_page_state(zone, NR_ZONE_ACTIVE_FILE); } else if (zone_idx < classzone_idx) { if (zone_watermark_ok(zone, 0, 0, classzone_idx, 0) && other_free) { if (!use_cma_pages) { *other_free -= min( zone->lowmem_reserve[classzone_idx] + zone_page_state( zone, NR_FREE_CMA_PAGES), zone_page_state( zone, NR_FREE_PAGES)); } else { *other_free -= zone->lowmem_reserve[classzone_idx]; } } else { if (other_free) *other_free -= zone_page_state(zone, NR_FREE_PAGES); } } } } #ifdef CONFIG_HIGHMEM static void adjust_gfp_mask(gfp_t *gfp_mask) { struct zone *preferred_zone; struct zoneref *zref; struct zonelist *zonelist; enum zone_type high_zoneidx; if (current_is_kswapd()) { zonelist = node_zonelist(0, *gfp_mask); high_zoneidx = gfp_zone(*gfp_mask); zref = first_zones_zonelist(zonelist, high_zoneidx, NULL); preferred_zone = zref->zone; if (high_zoneidx == ZONE_NORMAL) { if (zone_watermark_ok_safe( preferred_zone, 0, high_wmark_pages(preferred_zone), 0)) *gfp_mask |= __GFP_HIGHMEM; } else if (high_zoneidx == ZONE_HIGHMEM) { *gfp_mask |= __GFP_HIGHMEM; } } } #else static void adjust_gfp_mask(gfp_t *unused) { } #endif void tune_lmk_param(int *other_free, int *other_file, struct shrink_control *sc) { gfp_t gfp_mask; struct zone *preferred_zone; struct zoneref *zref; struct zonelist *zonelist; enum zone_type high_zoneidx, classzone_idx; unsigned long balance_gap; int use_cma_pages; gfp_mask = sc->gfp_mask; adjust_gfp_mask(&gfp_mask); zonelist = node_zonelist(0, gfp_mask); high_zoneidx = gfp_zone(gfp_mask); zref = first_zones_zonelist(zonelist, high_zoneidx, NULL); preferred_zone = zref->zone; classzone_idx = zone_idx(preferred_zone); use_cma_pages = can_use_cma_pages(gfp_mask); balance_gap = min(low_wmark_pages(preferred_zone), (preferred_zone->present_pages + 100-1) / 100); if (likely(current_is_kswapd() && zone_watermark_ok(preferred_zone, 0, high_wmark_pages(preferred_zone) + SWAP_CLUSTER_MAX + balance_gap, 0, 0))) { if (lmk_fast_run) tune_lmk_zone_param(zonelist, classzone_idx, other_free, other_file, use_cma_pages); else tune_lmk_zone_param(zonelist, classzone_idx, other_free, NULL, use_cma_pages); if (zone_watermark_ok(preferred_zone, 0, 0, _ZONE, 0)) { if (!use_cma_pages) { *other_free -= min( preferred_zone->lowmem_reserve[_ZONE] + zone_page_state( preferred_zone, NR_FREE_CMA_PAGES), zone_page_state( preferred_zone, NR_FREE_PAGES)); } else { *other_free -= preferred_zone->lowmem_reserve[_ZONE]; } } else { *other_free -= zone_page_state(preferred_zone, NR_FREE_PAGES); } lowmem_print(4, "lowmem_shrink of kswapd tunning for highmem " "ofree %d, %d\n", *other_free, *other_file); } else { tune_lmk_zone_param(zonelist, classzone_idx, other_free, other_file, use_cma_pages); if (!use_cma_pages) { *other_free -= zone_page_state(preferred_zone, NR_FREE_CMA_PAGES); } lowmem_print(4, "lowmem_shrink tunning for others ofree %d, " "%d\n", *other_free, *other_file); } } static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; Loading @@ -88,16 +412,31 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) unsigned long rem = 0; int tasksize; int i; int ret = 0; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; int minfree = 0; int selected_tasksize = 0; short selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; int other_file = global_node_page_state(NR_FILE_PAGES) - int other_free; int other_file; if (mutex_lock_interruptible(&scan_mutex) < 0) return 0; other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; if (global_node_page_state(NR_SHMEM) + total_swapcache_pages() + global_node_page_state(NR_UNEVICTABLE) < global_node_page_state(NR_FILE_PAGES)) other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - global_node_page_state(NR_UNEVICTABLE) - total_swapcache_pages(); else other_file = 0; tune_lmk_param(&other_free, &other_file, sc); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; Loading @@ -111,13 +450,17 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) } } ret = adjust_minadj(&min_score_adj); lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { trace_almk_shrink(0, ret, other_free, other_file, 0); lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", sc->nr_to_scan, sc->gfp_mask); mutex_unlock(&scan_mutex); return 0; } Loading @@ -131,16 +474,24 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) /* if task no longer has any memory ignore it */ if (test_task_flag(tsk, TIF_MM_RELEASED)) continue; if (task_lmk_waiting(p) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); if (time_before_eq(jiffies, lowmem_deathpending_timeout)) { if (test_task_lmk_waiting(tsk)) { rcu_read_unlock(); /* give the system time to free up the memory */ msleep_interruptible(20); mutex_unlock(&scan_mutex); return 0; } } p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); Loading @@ -160,7 +511,7 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", lowmem_print(3, "select '%s' (%d), adj %hd, size %d, to kill\n", p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { Loading @@ -176,22 +527,50 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) trace_lowmemory_kill(selected, cache_size, cache_limit, free); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" "to free %ldkB on behalf of '%s' (%d) because\n" " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" " Free memory is %ldkB above reserved\n", "cache %ldkB is below limit %ldkB for oom score %hd\n" "Free memory is %ldkB above reserved.\n" "Free CMA is %ldkB\n" "Total reserve is %ldkB\n" "Total free pages is %ldkB\n" "Total file cache is %ldkB\n" "GFP mask is 0x%x\n", selected->comm, selected->pid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, cache_size, cache_limit, min_score_adj, free); free, global_page_state(NR_FREE_CMA_PAGES) * (long)(PAGE_SIZE / 1024), totalreserve_pages * (long)(PAGE_SIZE / 1024), global_page_state(NR_FREE_PAGES) * (long)(PAGE_SIZE / 1024), global_node_page_state(NR_FILE_PAGES) * (long)(PAGE_SIZE / 1024), sc->gfp_mask); if (lowmem_debug_level >= 2 && selected_oom_score_adj == 0) { show_mem(SHOW_MEM_FILTER_NODES); dump_tasks(NULL, NULL); } lowmem_deathpending_timeout = jiffies + HZ; rem += selected_tasksize; rcu_read_unlock(); /* give the system time to free up the memory */ msleep_interruptible(20); trace_almk_shrink(selected_tasksize, ret, other_free, other_file, selected_oom_score_adj); } else { trace_almk_shrink(1, ret, other_free, other_file, 0); rcu_read_unlock(); } lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); mutex_unlock(&scan_mutex); return rem; } Loading @@ -204,6 +583,7 @@ static struct shrinker lowmem_shrinker = { static int __init lowmem_init(void) { register_shrinker(&lowmem_shrinker); vmpressure_notifier_register(&lmk_vmpr_nb); return 0; } device_initcall(lowmem_init); Loading Loading @@ -299,6 +679,7 @@ __MODULE_PARM_TYPE(adj, "array of short"); module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644); #endif module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, 0644); module_param_named(debug_level, lowmem_debug_level, uint, 0644); S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); module_param_named(lmk_fast_run, lmk_fast_run, int, S_IRUGO | S_IWUSR); Loading
Documentation/filesystems/proc.txt +20 −0 Original line number Diff line number Diff line Loading @@ -138,6 +138,7 @@ Table 1-1: Process specific entries in /proc maps Memory maps to executables and library files (2.4) mem Memory held by this process root Link to the root directory of this process reclaim Reclaim pages in this process stat Process status statm Process memory status information status Process status in human readable form Loading Loading @@ -528,6 +529,25 @@ current value: Any other value written to /proc/PID/clear_refs will have no effect. The file /proc/PID/reclaim is used to reclaim pages in this process. To reclaim file-backed pages, > echo file > /proc/PID/reclaim To reclaim anonymous pages, > echo anon > /proc/PID/reclaim To reclaim all pages, > echo all > /proc/PID/reclaim Also, you can specify address range of process so part of address space will be reclaimed. The format is following as > echo addr size-byte > /proc/PID/reclaim NOTE: addr should be page-aligned. Below is example which try to reclaim 2M from 0x100000. > echo 0x100000 2M > /proc/PID/reclaim The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. Loading
arch/arm/include/asm/thread_info.h +1 −0 Original line number Diff line number Diff line Loading @@ -148,6 +148,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 #define TIF_MM_RELEASED 21 /* task MM has been released */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) Loading
arch/um/include/asm/thread_info.h +1 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ #define TIF_MM_RELEASED 10 /* task MM has been released */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) Loading
drivers/block/zram/zram_drv.c +1 −0 Original line number Diff line number Diff line Loading @@ -1271,6 +1271,7 @@ static int zram_add(void) zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); __set_bit(QUEUE_FLAG_FAST, &zram->disk->queue->queue_flags); /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ set_capacity(zram->disk, 0); /* zram devices sort of resembles non-rotational disks */ Loading
drivers/staging/android/lowmemorykiller.c +406 −25 Original line number Diff line number Diff line Loading @@ -42,6 +42,21 @@ #include <linux/rcupdate.h> #include <linux/profile.h> #include <linux/notifier.h> #include <linux/mutex.h> #include <linux/delay.h> #include <linux/swap.h> #include <linux/fs.h> #include <linux/cpuset.h> #include <linux/vmpressure.h> #define CREATE_TRACE_POINTS #include <trace/events/almk.h> #ifdef CONFIG_HIGHMEM #define _ZONE ZONE_HIGHMEM #else #define _ZONE ZONE_NORMAL #endif #define CREATE_TRACE_POINTS #include "trace/lowmemorykiller.h" Loading @@ -63,6 +78,7 @@ static int lowmem_minfree[6] = { }; static int lowmem_minfree_size = 4; static int lmk_fast_run = 1; static unsigned long lowmem_deathpending_timeout; Loading @@ -81,6 +97,314 @@ static unsigned long lowmem_count(struct shrinker *s, global_node_page_state(NR_INACTIVE_FILE); } static atomic_t shift_adj = ATOMIC_INIT(0); static short adj_max_shift = 353; module_param_named(adj_max_shift, adj_max_shift, short, 0644); /* User knob to enable/disable adaptive lmk feature */ static int enable_adaptive_lmk; module_param_named(enable_adaptive_lmk, enable_adaptive_lmk, int, 0644); /* * This parameter controls the behaviour of LMK when vmpressure is in * the range of 90-94. Adaptive lmk triggers based on number of file * pages wrt vmpressure_file_min, when vmpressure is in the range of * 90-94. Usually this is a pseudo minfree value, higher than the * highest configured value in minfree array. */ static int vmpressure_file_min; module_param_named(vmpressure_file_min, vmpressure_file_min, int, 0644); enum { VMPRESSURE_NO_ADJUST = 0, VMPRESSURE_ADJUST_ENCROACH, VMPRESSURE_ADJUST_NORMAL, }; static int adjust_minadj(short *min_score_adj) { int ret = VMPRESSURE_NO_ADJUST; if (!enable_adaptive_lmk) return 0; if (atomic_read(&shift_adj) && (*min_score_adj > adj_max_shift)) { if (*min_score_adj == OOM_SCORE_ADJ_MAX + 1) ret = VMPRESSURE_ADJUST_ENCROACH; else ret = VMPRESSURE_ADJUST_NORMAL; *min_score_adj = adj_max_shift; } atomic_set(&shift_adj, 0); return ret; } static int lmk_vmpressure_notifier(struct notifier_block *nb, unsigned long action, void *data) { int other_free, other_file; unsigned long pressure = action; int array_size = ARRAY_SIZE(lowmem_adj); if (!enable_adaptive_lmk) return 0; if (pressure >= 95) { other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); other_free = global_page_state(NR_FREE_PAGES); atomic_set(&shift_adj, 1); trace_almk_vmpressure(pressure, other_free, other_file); } else if (pressure >= 90) { if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); other_free = global_page_state(NR_FREE_PAGES); if ((other_free < lowmem_minfree[array_size - 1]) && (other_file < vmpressure_file_min)) { atomic_set(&shift_adj, 1); trace_almk_vmpressure(pressure, other_free, other_file); } } else if (atomic_read(&shift_adj)) { other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); other_free = global_page_state(NR_FREE_PAGES); /* * shift_adj would have been set by a previous invocation * of notifier, which is not followed by a lowmem_shrink yet. * Since vmpressure has improved, reset shift_adj to avoid * false adaptive LMK trigger. */ trace_almk_vmpressure(pressure, other_free, other_file); atomic_set(&shift_adj, 0); } return 0; } static struct notifier_block lmk_vmpr_nb = { .notifier_call = lmk_vmpressure_notifier, }; static int test_task_flag(struct task_struct *p, int flag) { struct task_struct *t; for_each_thread(p, t) { task_lock(t); if (test_tsk_thread_flag(t, flag)) { task_unlock(t); return 1; } task_unlock(t); } return 0; } static int test_task_lmk_waiting(struct task_struct *p) { struct task_struct *t; for_each_thread(p, t) { task_lock(t); if (task_lmk_waiting(t)) { task_unlock(t); return 1; } task_unlock(t); } return 0; } static DEFINE_MUTEX(scan_mutex); static int can_use_cma_pages(gfp_t gfp_mask) { int can_use = 0; int mtype = gfpflags_to_migratetype(gfp_mask); int i = 0; int *mtype_fallbacks = get_migratetype_fallbacks(mtype); if (is_migrate_cma(mtype)) { can_use = 1; } else { for (i = 0;; i++) { int fallbacktype = mtype_fallbacks[i]; if (is_migrate_cma(fallbacktype)) { can_use = 1; break; } if (fallbacktype == MIGRATE_TYPES) break; } } return can_use; } void tune_lmk_zone_param(struct zonelist *zonelist, int classzone_idx, int *other_free, int *other_file, int use_cma_pages) { struct zone *zone; struct zoneref *zoneref; int zone_idx; for_each_zone_zonelist(zone, zoneref, zonelist, MAX_NR_ZONES) { zone_idx = zonelist_zone_idx(zoneref); if (zone_idx == ZONE_MOVABLE) { if (!use_cma_pages && other_free) *other_free -= zone_page_state(zone, NR_FREE_CMA_PAGES); continue; } if (zone_idx > classzone_idx) { if (other_free != NULL) *other_free -= zone_page_state(zone, NR_FREE_PAGES); if (other_file != NULL) *other_file -= zone_page_state(zone, NR_ZONE_INACTIVE_FILE) + zone_page_state(zone, NR_ZONE_ACTIVE_FILE); } else if (zone_idx < classzone_idx) { if (zone_watermark_ok(zone, 0, 0, classzone_idx, 0) && other_free) { if (!use_cma_pages) { *other_free -= min( zone->lowmem_reserve[classzone_idx] + zone_page_state( zone, NR_FREE_CMA_PAGES), zone_page_state( zone, NR_FREE_PAGES)); } else { *other_free -= zone->lowmem_reserve[classzone_idx]; } } else { if (other_free) *other_free -= zone_page_state(zone, NR_FREE_PAGES); } } } } #ifdef CONFIG_HIGHMEM static void adjust_gfp_mask(gfp_t *gfp_mask) { struct zone *preferred_zone; struct zoneref *zref; struct zonelist *zonelist; enum zone_type high_zoneidx; if (current_is_kswapd()) { zonelist = node_zonelist(0, *gfp_mask); high_zoneidx = gfp_zone(*gfp_mask); zref = first_zones_zonelist(zonelist, high_zoneidx, NULL); preferred_zone = zref->zone; if (high_zoneidx == ZONE_NORMAL) { if (zone_watermark_ok_safe( preferred_zone, 0, high_wmark_pages(preferred_zone), 0)) *gfp_mask |= __GFP_HIGHMEM; } else if (high_zoneidx == ZONE_HIGHMEM) { *gfp_mask |= __GFP_HIGHMEM; } } } #else static void adjust_gfp_mask(gfp_t *unused) { } #endif void tune_lmk_param(int *other_free, int *other_file, struct shrink_control *sc) { gfp_t gfp_mask; struct zone *preferred_zone; struct zoneref *zref; struct zonelist *zonelist; enum zone_type high_zoneidx, classzone_idx; unsigned long balance_gap; int use_cma_pages; gfp_mask = sc->gfp_mask; adjust_gfp_mask(&gfp_mask); zonelist = node_zonelist(0, gfp_mask); high_zoneidx = gfp_zone(gfp_mask); zref = first_zones_zonelist(zonelist, high_zoneidx, NULL); preferred_zone = zref->zone; classzone_idx = zone_idx(preferred_zone); use_cma_pages = can_use_cma_pages(gfp_mask); balance_gap = min(low_wmark_pages(preferred_zone), (preferred_zone->present_pages + 100-1) / 100); if (likely(current_is_kswapd() && zone_watermark_ok(preferred_zone, 0, high_wmark_pages(preferred_zone) + SWAP_CLUSTER_MAX + balance_gap, 0, 0))) { if (lmk_fast_run) tune_lmk_zone_param(zonelist, classzone_idx, other_free, other_file, use_cma_pages); else tune_lmk_zone_param(zonelist, classzone_idx, other_free, NULL, use_cma_pages); if (zone_watermark_ok(preferred_zone, 0, 0, _ZONE, 0)) { if (!use_cma_pages) { *other_free -= min( preferred_zone->lowmem_reserve[_ZONE] + zone_page_state( preferred_zone, NR_FREE_CMA_PAGES), zone_page_state( preferred_zone, NR_FREE_PAGES)); } else { *other_free -= preferred_zone->lowmem_reserve[_ZONE]; } } else { *other_free -= zone_page_state(preferred_zone, NR_FREE_PAGES); } lowmem_print(4, "lowmem_shrink of kswapd tunning for highmem " "ofree %d, %d\n", *other_free, *other_file); } else { tune_lmk_zone_param(zonelist, classzone_idx, other_free, other_file, use_cma_pages); if (!use_cma_pages) { *other_free -= zone_page_state(preferred_zone, NR_FREE_CMA_PAGES); } lowmem_print(4, "lowmem_shrink tunning for others ofree %d, " "%d\n", *other_free, *other_file); } } static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; Loading @@ -88,16 +412,31 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) unsigned long rem = 0; int tasksize; int i; int ret = 0; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; int minfree = 0; int selected_tasksize = 0; short selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; int other_file = global_node_page_state(NR_FILE_PAGES) - int other_free; int other_file; if (mutex_lock_interruptible(&scan_mutex) < 0) return 0; other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; if (global_node_page_state(NR_SHMEM) + total_swapcache_pages() + global_node_page_state(NR_UNEVICTABLE) < global_node_page_state(NR_FILE_PAGES)) other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - global_node_page_state(NR_UNEVICTABLE) - total_swapcache_pages(); else other_file = 0; tune_lmk_param(&other_free, &other_file, sc); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; Loading @@ -111,13 +450,17 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) } } ret = adjust_minadj(&min_score_adj); lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { trace_almk_shrink(0, ret, other_free, other_file, 0); lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", sc->nr_to_scan, sc->gfp_mask); mutex_unlock(&scan_mutex); return 0; } Loading @@ -131,16 +474,24 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) /* if task no longer has any memory ignore it */ if (test_task_flag(tsk, TIF_MM_RELEASED)) continue; if (task_lmk_waiting(p) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); if (time_before_eq(jiffies, lowmem_deathpending_timeout)) { if (test_task_lmk_waiting(tsk)) { rcu_read_unlock(); /* give the system time to free up the memory */ msleep_interruptible(20); mutex_unlock(&scan_mutex); return 0; } } p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); Loading @@ -160,7 +511,7 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", lowmem_print(3, "select '%s' (%d), adj %hd, size %d, to kill\n", p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { Loading @@ -176,22 +527,50 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) trace_lowmemory_kill(selected, cache_size, cache_limit, free); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" "to free %ldkB on behalf of '%s' (%d) because\n" " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" " Free memory is %ldkB above reserved\n", "cache %ldkB is below limit %ldkB for oom score %hd\n" "Free memory is %ldkB above reserved.\n" "Free CMA is %ldkB\n" "Total reserve is %ldkB\n" "Total free pages is %ldkB\n" "Total file cache is %ldkB\n" "GFP mask is 0x%x\n", selected->comm, selected->pid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, cache_size, cache_limit, min_score_adj, free); free, global_page_state(NR_FREE_CMA_PAGES) * (long)(PAGE_SIZE / 1024), totalreserve_pages * (long)(PAGE_SIZE / 1024), global_page_state(NR_FREE_PAGES) * (long)(PAGE_SIZE / 1024), global_node_page_state(NR_FILE_PAGES) * (long)(PAGE_SIZE / 1024), sc->gfp_mask); if (lowmem_debug_level >= 2 && selected_oom_score_adj == 0) { show_mem(SHOW_MEM_FILTER_NODES); dump_tasks(NULL, NULL); } lowmem_deathpending_timeout = jiffies + HZ; rem += selected_tasksize; rcu_read_unlock(); /* give the system time to free up the memory */ msleep_interruptible(20); trace_almk_shrink(selected_tasksize, ret, other_free, other_file, selected_oom_score_adj); } else { trace_almk_shrink(1, ret, other_free, other_file, 0); rcu_read_unlock(); } lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); mutex_unlock(&scan_mutex); return rem; } Loading @@ -204,6 +583,7 @@ static struct shrinker lowmem_shrinker = { static int __init lowmem_init(void) { register_shrinker(&lowmem_shrinker); vmpressure_notifier_register(&lmk_vmpr_nb); return 0; } device_initcall(lowmem_init); Loading Loading @@ -299,6 +679,7 @@ __MODULE_PARM_TYPE(adj, "array of short"); module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644); #endif module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, 0644); module_param_named(debug_level, lowmem_debug_level, uint, 0644); S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); module_param_named(lmk_fast_run, lmk_fast_run, int, S_IRUGO | S_IWUSR);