Loading include/linux/psi.h +4 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); void psi_emergency_trigger(void); #ifdef CONFIG_CGROUPS int psi_cgroup_alloc(struct cgroup *cgrp); void psi_cgroup_free(struct cgroup *cgrp); Loading @@ -43,6 +45,8 @@ static inline void psi_init(void) {} static inline void psi_memstall_enter(unsigned long *flags) {} static inline void psi_memstall_leave(unsigned long *flags) {} static inline void psi_emergency_trigger(void){} #ifdef CONFIG_CGROUPS static inline int psi_cgroup_alloc(struct cgroup *cgrp) { Loading include/trace/events/psi.h +9 −7 Original line number Diff line number Diff line Loading @@ -14,13 +14,14 @@ TRACE_EVENT(psi_window_vmstat, TP_PROTO(u64 memstall, const char *zone_name, u64 high, TP_PROTO(u64 mem_some, u64 mem_full, const char *zone_name, u64 high, u64 free, u64 cma, u64 file), TP_ARGS(memstall, zone_name, high, free, cma, file), TP_ARGS(mem_some, mem_full, zone_name, high, free, cma, file), TP_STRUCT__entry( __field(u64, memstall) __field(u64, mem_some) __field(u64, mem_full) __string(name, zone_name) __field(u64, high) __field(u64, free) Loading @@ -29,7 +30,8 @@ TRACE_EVENT(psi_window_vmstat, ), TP_fast_assign( __entry->memstall = memstall; __entry->mem_some = mem_some; __entry->mem_full = mem_full; __assign_str(name, zone_name); __entry->high = high; __entry->free = free; Loading @@ -37,9 +39,9 @@ TRACE_EVENT(psi_window_vmstat, __entry->file = file; ), TP_printk("%16s: Memstall: %#16llx High: %#8llx Free: %#8llx CMA: %#8llx File: %#8llx", __get_str(name), __entry->memstall, __entry->high, __entry->free, __entry->cma, __entry->file TP_printk("%16s: MEMSOME: %9lluns MEMFULL: %9lluns High: %9llukB Free: %9llukB CMA: %8llukB File: %9llukB", __get_str(name), __entry->mem_some, __entry->mem_full, __entry->high, __entry->free, __entry->cma, __entry->file ) ); Loading kernel/sched/psi.c +41 −8 Original line number Diff line number Diff line Loading @@ -445,6 +445,9 @@ static void psi_avgs_work(struct work_struct *work) } #ifdef CONFIG_PSI_FTRACE #define TOKB(x) ((x) * (PAGE_SIZE / 1024)) static void trace_event_helper(struct psi_group *group) { struct zone *zone; Loading @@ -452,17 +455,22 @@ static void trace_event_helper(struct psi_group *group) unsigned long free; unsigned long cma; unsigned long file; u64 memstall = group->total[PSI_POLL][PSI_MEM_SOME]; u64 mem_some_delta = group->total[PSI_POLL][PSI_MEM_SOME] - group->polling_total[PSI_MEM_SOME]; u64 mem_full_delta = group->total[PSI_POLL][PSI_MEM_FULL] - group->polling_total[PSI_MEM_FULL]; for_each_populated_zone(zone) { wmark = high_wmark_pages(zone); free = zone_page_state(zone, NR_FREE_PAGES); cma = zone_page_state(zone, NR_FREE_CMA_PAGES); file = zone_page_state(zone, NR_ZONE_ACTIVE_FILE) + zone_page_state(zone, NR_ZONE_INACTIVE_FILE); wmark = TOKB(high_wmark_pages(zone)); free = TOKB(zone_page_state(zone, NR_FREE_PAGES)); cma = TOKB(zone_page_state(zone, NR_FREE_CMA_PAGES)); file = TOKB(zone_page_state(zone, NR_ZONE_ACTIVE_FILE) + zone_page_state(zone, NR_ZONE_INACTIVE_FILE)); trace_psi_window_vmstat( memstall, zone->name, wmark, free, cma, file); mem_some_delta, mem_full_delta, zone->name, wmark, free, cma, file); } } #else Loading Loading @@ -571,6 +579,7 @@ static u64 update_triggers(struct psi_group *group, u64 now) t->last_event_time = now; } trace_event_helper(group); if (new_stall) memcpy(group->polling_total, total, sizeof(group->polling_total)); Loading @@ -578,6 +587,31 @@ static u64 update_triggers(struct psi_group *group, u64 now) return now + group->poll_min_period; } void psi_emergency_trigger(void) { struct psi_group *group = &psi_system; struct psi_trigger *t; if (static_branch_likely(&psi_disabled)) return; /* * In unlikely case that OOM was triggered while adding/ * removing triggers. */ if (!mutex_trylock(&group->trigger_lock)) return; list_for_each_entry(t, &group->triggers, node) { trace_psi_event(t->state, t->threshold); /* Generate an event */ if (cmpxchg(&t->event, 0, 1) == 0) wake_up_interruptible(&t->event_wait); } mutex_unlock(&group->trigger_lock); } /* * Schedule polling if it's not already scheduled. It's safe to call even from * hotpath because even though kthread_queue_delayed_work takes worker->lock Loading Loading @@ -637,7 +671,6 @@ static void psi_poll_work(struct kthread_work *work) */ group->polling_until = now + group->poll_min_period * UPDATES_PER_WINDOW; trace_event_helper(group); } if (now > group->polling_until) { Loading mm/oom_kill.c +29 −2 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ #include <linux/mmu_notifier.h> #include <linux/memory_hotplug.h> #include <linux/show_mem_notifier.h> #include <linux/psi.h> #include <asm/tlb.h> #include "internal.h" Loading Loading @@ -74,13 +75,39 @@ DEFINE_MUTEX(oom_lock); #ifdef CONFIG_HAVE_USERSPACE_LOW_MEMORY_KILLER static atomic64_t ulmk_kill_jiffies = ATOMIC64_INIT(INITIAL_JIFFIES); static unsigned long psi_emergency_jiffies = INITIAL_JIFFIES; static DEFINE_MUTEX(ulmk_retry_lock); /* * psi_emergency_jiffies represents the last ULMK emergency event. * Give ULMK a 2 second window to handle this event. * If ULMK has made some progress since then, send another. * Repeat as necessary. */ bool should_ulmk_retry(void) { unsigned long j = atomic64_read(&ulmk_kill_jiffies); unsigned long now, last_kill; bool ret = false; mutex_lock(&ulmk_retry_lock); now = jiffies; last_kill = atomic64_read(&ulmk_kill_jiffies); if (time_before(now, psi_emergency_jiffies + 2 * HZ)) { ret = true; goto out; } if (time_after_eq(last_kill, psi_emergency_jiffies)) { psi_emergency_jiffies = now; psi_emergency_trigger(); ret = true; goto out; } return time_before(jiffies, j + 2 * HZ); out: mutex_unlock(&ulmk_retry_lock); return ret; } void ulmk_update_last_kill(void) Loading mm/page_alloc.c +2 −3 Original line number Diff line number Diff line Loading @@ -4565,9 +4565,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) goto nopage; if (order <= PAGE_ALLOC_COSTLY_ORDER && should_ulmk_retry()) goto retry; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, did_some_progress > 0, &no_progress_loops)) goto retry; Loading @@ -4585,6 +4582,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, &compaction_retries)) goto retry; if (order <= PAGE_ALLOC_COSTLY_ORDER && should_ulmk_retry()) goto retry; /* Deal with possible cpuset update races before we start OOM killing */ if (check_retry_cpuset(cpuset_mems_cookie, ac)) Loading Loading
include/linux/psi.h +4 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); void psi_emergency_trigger(void); #ifdef CONFIG_CGROUPS int psi_cgroup_alloc(struct cgroup *cgrp); void psi_cgroup_free(struct cgroup *cgrp); Loading @@ -43,6 +45,8 @@ static inline void psi_init(void) {} static inline void psi_memstall_enter(unsigned long *flags) {} static inline void psi_memstall_leave(unsigned long *flags) {} static inline void psi_emergency_trigger(void){} #ifdef CONFIG_CGROUPS static inline int psi_cgroup_alloc(struct cgroup *cgrp) { Loading
include/trace/events/psi.h +9 −7 Original line number Diff line number Diff line Loading @@ -14,13 +14,14 @@ TRACE_EVENT(psi_window_vmstat, TP_PROTO(u64 memstall, const char *zone_name, u64 high, TP_PROTO(u64 mem_some, u64 mem_full, const char *zone_name, u64 high, u64 free, u64 cma, u64 file), TP_ARGS(memstall, zone_name, high, free, cma, file), TP_ARGS(mem_some, mem_full, zone_name, high, free, cma, file), TP_STRUCT__entry( __field(u64, memstall) __field(u64, mem_some) __field(u64, mem_full) __string(name, zone_name) __field(u64, high) __field(u64, free) Loading @@ -29,7 +30,8 @@ TRACE_EVENT(psi_window_vmstat, ), TP_fast_assign( __entry->memstall = memstall; __entry->mem_some = mem_some; __entry->mem_full = mem_full; __assign_str(name, zone_name); __entry->high = high; __entry->free = free; Loading @@ -37,9 +39,9 @@ TRACE_EVENT(psi_window_vmstat, __entry->file = file; ), TP_printk("%16s: Memstall: %#16llx High: %#8llx Free: %#8llx CMA: %#8llx File: %#8llx", __get_str(name), __entry->memstall, __entry->high, __entry->free, __entry->cma, __entry->file TP_printk("%16s: MEMSOME: %9lluns MEMFULL: %9lluns High: %9llukB Free: %9llukB CMA: %8llukB File: %9llukB", __get_str(name), __entry->mem_some, __entry->mem_full, __entry->high, __entry->free, __entry->cma, __entry->file ) ); Loading
kernel/sched/psi.c +41 −8 Original line number Diff line number Diff line Loading @@ -445,6 +445,9 @@ static void psi_avgs_work(struct work_struct *work) } #ifdef CONFIG_PSI_FTRACE #define TOKB(x) ((x) * (PAGE_SIZE / 1024)) static void trace_event_helper(struct psi_group *group) { struct zone *zone; Loading @@ -452,17 +455,22 @@ static void trace_event_helper(struct psi_group *group) unsigned long free; unsigned long cma; unsigned long file; u64 memstall = group->total[PSI_POLL][PSI_MEM_SOME]; u64 mem_some_delta = group->total[PSI_POLL][PSI_MEM_SOME] - group->polling_total[PSI_MEM_SOME]; u64 mem_full_delta = group->total[PSI_POLL][PSI_MEM_FULL] - group->polling_total[PSI_MEM_FULL]; for_each_populated_zone(zone) { wmark = high_wmark_pages(zone); free = zone_page_state(zone, NR_FREE_PAGES); cma = zone_page_state(zone, NR_FREE_CMA_PAGES); file = zone_page_state(zone, NR_ZONE_ACTIVE_FILE) + zone_page_state(zone, NR_ZONE_INACTIVE_FILE); wmark = TOKB(high_wmark_pages(zone)); free = TOKB(zone_page_state(zone, NR_FREE_PAGES)); cma = TOKB(zone_page_state(zone, NR_FREE_CMA_PAGES)); file = TOKB(zone_page_state(zone, NR_ZONE_ACTIVE_FILE) + zone_page_state(zone, NR_ZONE_INACTIVE_FILE)); trace_psi_window_vmstat( memstall, zone->name, wmark, free, cma, file); mem_some_delta, mem_full_delta, zone->name, wmark, free, cma, file); } } #else Loading Loading @@ -571,6 +579,7 @@ static u64 update_triggers(struct psi_group *group, u64 now) t->last_event_time = now; } trace_event_helper(group); if (new_stall) memcpy(group->polling_total, total, sizeof(group->polling_total)); Loading @@ -578,6 +587,31 @@ static u64 update_triggers(struct psi_group *group, u64 now) return now + group->poll_min_period; } void psi_emergency_trigger(void) { struct psi_group *group = &psi_system; struct psi_trigger *t; if (static_branch_likely(&psi_disabled)) return; /* * In unlikely case that OOM was triggered while adding/ * removing triggers. */ if (!mutex_trylock(&group->trigger_lock)) return; list_for_each_entry(t, &group->triggers, node) { trace_psi_event(t->state, t->threshold); /* Generate an event */ if (cmpxchg(&t->event, 0, 1) == 0) wake_up_interruptible(&t->event_wait); } mutex_unlock(&group->trigger_lock); } /* * Schedule polling if it's not already scheduled. It's safe to call even from * hotpath because even though kthread_queue_delayed_work takes worker->lock Loading Loading @@ -637,7 +671,6 @@ static void psi_poll_work(struct kthread_work *work) */ group->polling_until = now + group->poll_min_period * UPDATES_PER_WINDOW; trace_event_helper(group); } if (now > group->polling_until) { Loading
mm/oom_kill.c +29 −2 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ #include <linux/mmu_notifier.h> #include <linux/memory_hotplug.h> #include <linux/show_mem_notifier.h> #include <linux/psi.h> #include <asm/tlb.h> #include "internal.h" Loading Loading @@ -74,13 +75,39 @@ DEFINE_MUTEX(oom_lock); #ifdef CONFIG_HAVE_USERSPACE_LOW_MEMORY_KILLER static atomic64_t ulmk_kill_jiffies = ATOMIC64_INIT(INITIAL_JIFFIES); static unsigned long psi_emergency_jiffies = INITIAL_JIFFIES; static DEFINE_MUTEX(ulmk_retry_lock); /* * psi_emergency_jiffies represents the last ULMK emergency event. * Give ULMK a 2 second window to handle this event. * If ULMK has made some progress since then, send another. * Repeat as necessary. */ bool should_ulmk_retry(void) { unsigned long j = atomic64_read(&ulmk_kill_jiffies); unsigned long now, last_kill; bool ret = false; mutex_lock(&ulmk_retry_lock); now = jiffies; last_kill = atomic64_read(&ulmk_kill_jiffies); if (time_before(now, psi_emergency_jiffies + 2 * HZ)) { ret = true; goto out; } if (time_after_eq(last_kill, psi_emergency_jiffies)) { psi_emergency_jiffies = now; psi_emergency_trigger(); ret = true; goto out; } return time_before(jiffies, j + 2 * HZ); out: mutex_unlock(&ulmk_retry_lock); return ret; } void ulmk_update_last_kill(void) Loading
mm/page_alloc.c +2 −3 Original line number Diff line number Diff line Loading @@ -4565,9 +4565,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) goto nopage; if (order <= PAGE_ALLOC_COSTLY_ORDER && should_ulmk_retry()) goto retry; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, did_some_progress > 0, &no_progress_loops)) goto retry; Loading @@ -4585,6 +4582,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, &compaction_retries)) goto retry; if (order <= PAGE_ALLOC_COSTLY_ORDER && should_ulmk_retry()) goto retry; /* Deal with possible cpuset update races before we start OOM killing */ if (check_retry_cpuset(cpuset_mems_cookie, ac)) Loading