Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 530098ce authored by Suren Baghdasaryan's avatar Suren Baghdasaryan Committed by Gerrit Code Review
Browse files

Merge "lmkd: Add support for process death notifications"

parents b18dc5d2 f2081a96
Loading
Loading
Loading
Loading
+245 −95
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <sys/time.h>
#include <sys/types.h>
@@ -139,6 +140,10 @@
/* ro.lmk.psi_complete_stall_ms property defaults */
#define DEF_COMPLETE_STALL 700

static inline int sys_pidfd_open(pid_t pid, unsigned int flags) {
    return syscall(__NR_pidfd_open, pid, flags);
}

/* default to old in-kernel interface if no memory pressure events */
static bool use_inkernel_interface = true;
static bool has_inkernel_module;
@@ -169,6 +174,11 @@ struct psi_threshold {

static int level_oomadj[VMPRESS_LEVEL_COUNT];
static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 };
static bool pidfd_supported;
static int last_kill_pid_or_fd = -1;
static struct timespec last_kill_tm;

/* lmkd configurable parameters */
static bool debug_process_killing;
static bool enable_pressure_upgrade;
static int64_t upgrade_pressure;
@@ -197,6 +207,8 @@ enum polling_update {
    POLLING_DO_NOT_CHANGE,
    POLLING_START,
    POLLING_STOP,
    POLLING_PAUSE,
    POLLING_RESUME,
};

/*
@@ -207,6 +219,7 @@ enum polling_update {
 */
struct polling_params {
    struct event_handler_info* poll_handler;
    struct event_handler_info* paused_handler;
    struct timespec poll_start_tm;
    struct timespec last_poll_tm;
    int polling_interval_ms;
@@ -235,8 +248,11 @@ static struct sock_event_handler_info data_sock[MAX_DATA_CONN];
/* vmpressure event handler data */
static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];

/* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket, 1 lmk events */
#define MAX_EPOLL_EVENTS (2 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT)
/*
 * 1 ctrl listen socket, 2 ctrl data socket, 3 memory pressure levels,
 * 1 lmk events + 1 fd to wait for process death
 */
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1)
static int epollfd;
static int maxevents;

@@ -1647,11 +1663,112 @@ static void set_process_group_and_prio(int pid, SchedPolicy sp, int prio) {
    closedir(d);
}

static int last_killed_pid = -1;
static bool is_kill_pending(void) {
    char buf[24];

    if (last_kill_pid_or_fd < 0) {
        return false;
    }

    if (pidfd_supported) {
        return true;
    }

    /* when pidfd is not supported base the decision on /proc/<pid> existence */
    snprintf(buf, sizeof(buf), "/proc/%d/", last_kill_pid_or_fd);
    if (access(buf, F_OK) == 0) {
        return true;
    }

    return false;
}

static bool is_waiting_for_kill(void) {
    return pidfd_supported && last_kill_pid_or_fd >= 0;
}

static void stop_wait_for_proc_kill(bool finished) {
    struct epoll_event epev;

    if (last_kill_pid_or_fd < 0) {
        return;
    }

    if (debug_process_killing) {
        struct timespec curr_tm;

        if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
            /*
             * curr_tm is used here merely to report kill duration, so this failure is not fatal.
             * Log an error and continue.
             */
            ALOGE("Failed to get current time");
        }

        if (finished) {
            ALOGI("Process got killed in %ldms",
                get_time_diff_ms(&last_kill_tm, &curr_tm));
        } else {
            ALOGI("Stop waiting for process kill after %ldms",
                get_time_diff_ms(&last_kill_tm, &curr_tm));
        }
    }

    if (pidfd_supported) {
        /* unregister fd */
        if (epoll_ctl(epollfd, EPOLL_CTL_DEL, last_kill_pid_or_fd, &epev) != 0) {
            ALOGE("epoll_ctl for last killed process failed; errno=%d", errno);
            return;
        }
        maxevents--;
        close(last_kill_pid_or_fd);
    }

    last_kill_pid_or_fd = -1;
}

static void kill_done_handler(int data __unused, uint32_t events __unused,
                              struct polling_params *poll_params) {
    stop_wait_for_proc_kill(true);
    poll_params->update = POLLING_RESUME;
}

static void start_wait_for_proc_kill(int pid) {
    static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler };
    struct epoll_event epev;

    if (last_kill_pid_or_fd >= 0) {
        /* Should not happen but if it does we should stop previous wait */
        ALOGE("Attempt to wait for a kill while another wait is in progress");
        stop_wait_for_proc_kill(false);
    }

    if (!pidfd_supported) {
        /* If pidfd is not supported store PID of the process being killed */
        last_kill_pid_or_fd = pid;
        return;
    }

    last_kill_pid_or_fd = TEMP_FAILURE_RETRY(sys_pidfd_open(pid, 0));
    if (last_kill_pid_or_fd < 0) {
        ALOGE("pidfd_open for process pid %d failed; errno=%d", pid, errno);
        return;
    }

    epev.events = EPOLLIN;
    epev.data.ptr = (void *)&kill_done_hinfo;
    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, last_kill_pid_or_fd, &epev) != 0) {
        ALOGE("epoll_ctl for last kill failed; errno=%d", errno);
        close(last_kill_pid_or_fd);
        last_kill_pid_or_fd = -1;
        return;
    }
    maxevents++;
}

/* Kill one process specified by procp.  Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
                            const char *kill_desc, union meminfo *mi) {
                            const char *kill_desc, union meminfo *mi, struct timespec *tm) {
    int pid = procp->pid;
    uid_t uid = procp->uid;
    int tgid;
@@ -1682,12 +1799,16 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas

    TRACE_KILL_START(pid);

    /* Have to start waiting before sending SIGKILL to make sure pid is valid */
    start_wait_for_proc_kill(pid);

    /* CAP_KILL required */
    r = kill(pid, SIGKILL);

    TRACE_KILL_END();

    if (r) {
        stop_wait_for_proc_kill(false);
        ALOGE("kill(%d): errno=%d", pid, errno);
        /* Delete process record even when we fail to kill so that we don't get stuck on it */
        goto out;
@@ -1695,6 +1816,8 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas

    set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);

    last_kill_tm = *tm;

    inc_killcnt(procp->oomadj);

    killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi);
@@ -1707,8 +1830,6 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
              uid, procp->oomadj, tasksize * page_k);
    }

    last_killed_pid = pid;

    stats_write_lmk_kill_occurred(LMK_KILL_OCCURRED, uid, taskname,
            procp->oomadj, min_oom_score, tasksize, mem_st);

@@ -1728,7 +1849,7 @@ out:
 * Returns size of the killed process.
 */
static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc,
                                 union meminfo *mi) {
                                 union meminfo *mi, struct timespec *tm) {
    int i;
    int killed_size = 0;
    bool lmk_state_change_start = false;
@@ -1743,7 +1864,7 @@ static int find_and_kill_process(int min_score_adj, int kill_reason, const char
            if (!procp)
                break;

            killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, mi);
            killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, mi, tm);
            if (killed_size >= 0) {
                if (!lmk_state_change_start) {
                    lmk_state_change_start = true;
@@ -1822,23 +1943,6 @@ enum vmpressure_level downgrade_level(enum vmpressure_level level) {
        level - 1 : level);
}

static bool is_kill_pending(void) {
    char buf[24];

    if (last_killed_pid < 0) {
        return false;
    }

    snprintf(buf, sizeof(buf), "/proc/%d/", last_killed_pid);
    if (access(buf, F_OK) == 0) {
        return true;
    }

    // reset last killed PID because there's nothing pending
    last_killed_pid = -1;
    return false;
}

enum zone_watermark {
    WMARK_MIN = 0,
    WMARK_LOW,
@@ -1934,9 +2038,13 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_

    /* Skip while still killing a process */
    if (is_kill_pending()) {
        /* TODO: replace this quick polling with pidfd polling if kernel supports */
        goto no_kill;
    }
    /*
     * Process is dead, stop waiting. This has no effect if pidfds are supported and
     * death notification already caused waiting to stop.
     */
    stop_wait_for_proc_kill(true);

    if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
        ALOGE("Failed to get current time");
@@ -2067,7 +2175,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_

    /* Kill a process if necessary */
    if (kill_reason != NONE) {
        int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi);
        int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi,
                                                &curr_tm);
        if (pages_freed > 0) {
            killing = true;
            if (cut_thrashing_limit) {
@@ -2081,6 +2190,13 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
    }

no_kill:
    /* Do not poll if kernel supports pidfd waiting */
    if (is_waiting_for_kill()) {
        /* Pause polling if we are waiting for process death notification */
        poll_params->update = POLLING_PAUSE;
        return;
    }

    /*
     * Start polling after initial PSI event;
     * extend polling while device is in direct reclaim or process is being killed;
@@ -2110,7 +2226,6 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
    union meminfo mi;
    struct zoneinfo zi;
    struct timespec curr_tm;
    static struct timespec last_kill_tm;
    static unsigned long kill_skip_count = 0;
    enum vmpressure_level level = (enum vmpressure_level)data;
    long other_free = 0, other_file = 0;
@@ -2159,15 +2274,26 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
        return;
    }

    if (kill_timeout_ms) {
        // If we're within the timeout, see if there's pending reclaim work
        // from the last killed process. If there is (as evidenced by
        // /proc/<pid> continuing to exist), skip killing for now.
        if ((get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) &&
            (low_ram_device || is_kill_pending())) {
    if (kill_timeout_ms && get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) {
        /*
         * If we're within the no-kill timeout, see if there's pending reclaim work
         * from the last killed process. If so, skip killing for now.
         */
        if (is_kill_pending()) {
            kill_skip_count++;
            return;
        }
        /*
         * Process is dead, stop waiting. This has no effect if pidfds are supported and
         * death notification already caused waiting to stop.
         */
        stop_wait_for_proc_kill(true);
    } else {
        /*
         * Killing took longer than no-kill timeout. Stop waiting for the last process
         * to die because we are ready to kill again.
         */
        stop_wait_for_proc_kill(false);
    }

    if (kill_skip_count > 0) {
@@ -2266,7 +2392,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill:
    if (low_ram_device) {
        /* For Go devices kill only one task */
        if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi) == 0) {
        if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &curr_tm) == 0) {
            if (debug_process_killing) {
                ALOGI("Nothing to kill");
            }
@@ -2289,7 +2415,7 @@ do_kill:
            min_score_adj = level_oomadj[level];
        }

        pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi);
        pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &curr_tm);

        if (pages_freed == 0) {
            /* Rate limit kill reports when nothing was reclaimed */
@@ -2297,9 +2423,6 @@ do_kill:
                report_skip_count++;
                return;
            }
        } else {
            /* If we killed anything, update the last killed timestamp. */
            last_kill_tm = curr_tm;
        }

        /* Log whenever we kill or when report rate limit allows */
@@ -2322,6 +2445,10 @@ do_kill:

        last_report_tm = curr_tm;
    }
    if (is_waiting_for_kill()) {
        /* pause polling if we are waiting for process death notification */
        poll_params->update = POLLING_PAUSE;
    }
}

static bool init_mp_psi(enum vmpressure_level level, bool use_new_strategy) {
@@ -2473,6 +2600,7 @@ static int init(void) {
        .fd = -1,
    };
    struct epoll_event epev;
    int pidfd;
    int i;
    int ret;

@@ -2563,9 +2691,61 @@ static int init(void) {
        ALOGE("Failed to read %s: %s", file_data.filename, strerror(errno));
    }

    /* check if kernel supports pidfd_open syscall */
    pidfd = TEMP_FAILURE_RETRY(sys_pidfd_open(getpid(), 0));
    if (pidfd < 0) {
        pidfd_supported = (errno != ENOSYS);
    } else {
        pidfd_supported = true;
        close(pidfd);
    }
    ALOGI("Process polling is %s", pidfd_supported ? "supported" : "not supported" );

    return 0;
}

static void call_handler(struct event_handler_info* handler_info,
                         struct polling_params *poll_params, uint32_t events) {
    struct timespec curr_tm;

    handler_info->handler(handler_info->data, events, poll_params);
    clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
    poll_params->last_poll_tm = curr_tm;

    switch (poll_params->update) {
    case POLLING_START:
        /*
         * Poll for the duration of PSI_WINDOW_SIZE_MS after the
         * initial PSI event because psi events are rate-limited
         * at one per sec.
         */
        poll_params->poll_start_tm = curr_tm;
        if (poll_params->poll_handler != handler_info) {
            poll_params->poll_handler = handler_info;
        }
        break;
    case POLLING_STOP:
        poll_params->poll_handler = NULL;
        break;
    case POLLING_PAUSE:
        poll_params->paused_handler = handler_info;
        poll_params->poll_handler = NULL;
        break;
    case POLLING_RESUME:
        poll_params->poll_start_tm = curr_tm;
        poll_params->poll_handler = poll_params->paused_handler;
        break;
    case POLLING_DO_NOT_CHANGE:
        if (get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) {
            /* Polled for the duration of PSI window, time to stop */
            poll_params->poll_handler = NULL;
        }
        /* WARNING: skipping the rest of the function */
        return;
    }
    poll_params->update = POLLING_DO_NOT_CHANGE;
}

static void mainloop(void) {
    struct event_handler_info* handler_info;
    struct polling_params poll_params;
@@ -2582,8 +2762,19 @@ static void mainloop(void) {
        int i;

        if (poll_params.poll_handler) {
            /* Calculate next timeout */
            bool poll_now;

            clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
            if (poll_params.poll_handler == poll_params.paused_handler) {
                /*
                 * Just transitioned into POLLING_RESUME. Reset paused_handler
                 * and poll immediately
                 */
                poll_params.paused_handler = NULL;
                poll_now = true;
                nevents = 0;
            } else {
                /* Calculate next timeout */
                delay = get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm);
                delay = (delay < poll_params.polling_interval_ms) ?
                    poll_params.polling_interval_ms - delay : poll_params.polling_interval_ms;
@@ -2591,32 +2782,13 @@ static void mainloop(void) {
                /* Wait for events until the next polling timeout */
                nevents = epoll_wait(epollfd, events, maxevents, delay);

                /* Update current time after wait */
                clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
            if (get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm) >=
                poll_params.polling_interval_ms) {
                /* Set input params for the call */
                poll_params.poll_handler->handler(poll_params.poll_handler->data, 0, &poll_params);
                poll_params.last_poll_tm = curr_tm;

                if (poll_params.update != POLLING_DO_NOT_CHANGE) {
                    switch (poll_params.update) {
                    case POLLING_START:
                        poll_params.poll_start_tm = curr_tm;
                        break;
                    case POLLING_STOP:
                        poll_params.poll_handler = NULL;
                        break;
                    default:
                        break;
                    }
                    poll_params.update = POLLING_DO_NOT_CHANGE;
                } else {
                    if (get_time_diff_ms(&poll_params.poll_start_tm, &curr_tm) >
                        PSI_WINDOW_SIZE_MS) {
                        /* Polled for the duration of PSI window, time to stop */
                        poll_params.poll_handler = NULL;
                    }
                poll_now = (get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm) >=
                    poll_params.polling_interval_ms);
            }
            if (poll_now) {
                call_handler(poll_params.poll_handler, &poll_params, 0);
            }
        } else {
            /* Wait for events with no timeout */
@@ -2656,29 +2828,7 @@ static void mainloop(void) {
            }
            if (evt->data.ptr) {
                handler_info = (struct event_handler_info*)evt->data.ptr;
                /* Set input params for the call */
                handler_info->handler(handler_info->data, evt->events, &poll_params);

                if (poll_params.update != POLLING_DO_NOT_CHANGE) {
                    switch (poll_params.update) {
                    case POLLING_START:
                        /*
                         * Poll for the duration of PSI_WINDOW_SIZE_MS after the
                         * initial PSI event because psi events are rate-limited
                         * at one per sec.
                         */
                        clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
                        poll_params.poll_start_tm = poll_params.last_poll_tm = curr_tm;
                        poll_params.poll_handler = handler_info;
                        break;
                    case POLLING_STOP:
                        poll_params.poll_handler = NULL;
                        break;
                    default:
                        break;
                    }
                    poll_params.update = POLLING_DO_NOT_CHANGE;
                }
                call_handler(handler_info, &poll_params, evt->events);
            }
        }
    }