Merge "lmkd: Add support for process death notifications"

This commit is contained in:
Suren Baghdasaryan 2019-10-09 16:18:40 +00:00 committed by Gerrit Code Review
commit 530098cebc

View file

@ -31,6 +31,7 @@
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <sys/time.h>
#include <sys/types.h>
@ -139,6 +140,10 @@
/* ro.lmk.psi_complete_stall_ms property defaults */
#define DEF_COMPLETE_STALL 700
static inline int sys_pidfd_open(pid_t pid, unsigned int flags) {
return syscall(__NR_pidfd_open, pid, flags);
}
/* default to old in-kernel interface if no memory pressure events */
static bool use_inkernel_interface = true;
static bool has_inkernel_module;
@ -169,6 +174,11 @@ struct psi_threshold {
static int level_oomadj[VMPRESS_LEVEL_COUNT];
static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 };
static bool pidfd_supported;
static int last_kill_pid_or_fd = -1;
static struct timespec last_kill_tm;
/* lmkd configurable parameters */
static bool debug_process_killing;
static bool enable_pressure_upgrade;
static int64_t upgrade_pressure;
@ -197,6 +207,8 @@ enum polling_update {
POLLING_DO_NOT_CHANGE,
POLLING_START,
POLLING_STOP,
POLLING_PAUSE,
POLLING_RESUME,
};
/*
@ -207,6 +219,7 @@ enum polling_update {
*/
struct polling_params {
struct event_handler_info* poll_handler;
struct event_handler_info* paused_handler;
struct timespec poll_start_tm;
struct timespec last_poll_tm;
int polling_interval_ms;
@ -235,8 +248,11 @@ static struct sock_event_handler_info data_sock[MAX_DATA_CONN];
/* vmpressure event handler data */
static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];
/* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket, 1 lmk events */
#define MAX_EPOLL_EVENTS (2 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT)
/*
* 1 ctrl listen socket, 2 ctrl data socket, 3 memory pressure levels,
* 1 lmk events + 1 fd to wait for process death
*/
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1)
static int epollfd;
static int maxevents;
@ -1647,11 +1663,112 @@ static void set_process_group_and_prio(int pid, SchedPolicy sp, int prio) {
closedir(d);
}
static int last_killed_pid = -1;
static bool is_kill_pending(void) {
char buf[24];
if (last_kill_pid_or_fd < 0) {
return false;
}
if (pidfd_supported) {
return true;
}
/* when pidfd is not supported base the decision on /proc/<pid> existence */
snprintf(buf, sizeof(buf), "/proc/%d/", last_kill_pid_or_fd);
if (access(buf, F_OK) == 0) {
return true;
}
return false;
}
static bool is_waiting_for_kill(void) {
return pidfd_supported && last_kill_pid_or_fd >= 0;
}
static void stop_wait_for_proc_kill(bool finished) {
struct epoll_event epev;
if (last_kill_pid_or_fd < 0) {
return;
}
if (debug_process_killing) {
struct timespec curr_tm;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
/*
* curr_tm is used here merely to report kill duration, so this failure is not fatal.
* Log an error and continue.
*/
ALOGE("Failed to get current time");
}
if (finished) {
ALOGI("Process got killed in %ldms",
get_time_diff_ms(&last_kill_tm, &curr_tm));
} else {
ALOGI("Stop waiting for process kill after %ldms",
get_time_diff_ms(&last_kill_tm, &curr_tm));
}
}
if (pidfd_supported) {
/* unregister fd */
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, last_kill_pid_or_fd, &epev) != 0) {
ALOGE("epoll_ctl for last killed process failed; errno=%d", errno);
return;
}
maxevents--;
close(last_kill_pid_or_fd);
}
last_kill_pid_or_fd = -1;
}
static void kill_done_handler(int data __unused, uint32_t events __unused,
struct polling_params *poll_params) {
stop_wait_for_proc_kill(true);
poll_params->update = POLLING_RESUME;
}
static void start_wait_for_proc_kill(int pid) {
static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler };
struct epoll_event epev;
if (last_kill_pid_or_fd >= 0) {
/* Should not happen but if it does we should stop previous wait */
ALOGE("Attempt to wait for a kill while another wait is in progress");
stop_wait_for_proc_kill(false);
}
if (!pidfd_supported) {
/* If pidfd is not supported store PID of the process being killed */
last_kill_pid_or_fd = pid;
return;
}
last_kill_pid_or_fd = TEMP_FAILURE_RETRY(sys_pidfd_open(pid, 0));
if (last_kill_pid_or_fd < 0) {
ALOGE("pidfd_open for process pid %d failed; errno=%d", pid, errno);
return;
}
epev.events = EPOLLIN;
epev.data.ptr = (void *)&kill_done_hinfo;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, last_kill_pid_or_fd, &epev) != 0) {
ALOGE("epoll_ctl for last kill failed; errno=%d", errno);
close(last_kill_pid_or_fd);
last_kill_pid_or_fd = -1;
return;
}
maxevents++;
}
/* Kill one process specified by procp. Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
const char *kill_desc, union meminfo *mi) {
const char *kill_desc, union meminfo *mi, struct timespec *tm) {
int pid = procp->pid;
uid_t uid = procp->uid;
int tgid;
@ -1682,12 +1799,16 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
TRACE_KILL_START(pid);
/* Have to start waiting before sending SIGKILL to make sure pid is valid */
start_wait_for_proc_kill(pid);
/* CAP_KILL required */
r = kill(pid, SIGKILL);
TRACE_KILL_END();
if (r) {
stop_wait_for_proc_kill(false);
ALOGE("kill(%d): errno=%d", pid, errno);
/* Delete process record even when we fail to kill so that we don't get stuck on it */
goto out;
@ -1695,6 +1816,8 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);
last_kill_tm = *tm;
inc_killcnt(procp->oomadj);
killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi);
@ -1707,8 +1830,6 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
uid, procp->oomadj, tasksize * page_k);
}
last_killed_pid = pid;
stats_write_lmk_kill_occurred(LMK_KILL_OCCURRED, uid, taskname,
procp->oomadj, min_oom_score, tasksize, mem_st);
@ -1728,7 +1849,7 @@ out:
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc,
union meminfo *mi) {
union meminfo *mi, struct timespec *tm) {
int i;
int killed_size = 0;
bool lmk_state_change_start = false;
@ -1743,7 +1864,7 @@ static int find_and_kill_process(int min_score_adj, int kill_reason, const char
if (!procp)
break;
killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, mi);
killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, mi, tm);
if (killed_size >= 0) {
if (!lmk_state_change_start) {
lmk_state_change_start = true;
@ -1822,23 +1943,6 @@ enum vmpressure_level downgrade_level(enum vmpressure_level level) {
level - 1 : level);
}
static bool is_kill_pending(void) {
char buf[24];
if (last_killed_pid < 0) {
return false;
}
snprintf(buf, sizeof(buf), "/proc/%d/", last_killed_pid);
if (access(buf, F_OK) == 0) {
return true;
}
// reset last killed PID because there's nothing pending
last_killed_pid = -1;
return false;
}
enum zone_watermark {
WMARK_MIN = 0,
WMARK_LOW,
@ -1934,9 +2038,13 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
/* Skip while still killing a process */
if (is_kill_pending()) {
/* TODO: replace this quick polling with pidfd polling if kernel supports */
goto no_kill;
}
/*
* Process is dead, stop waiting. This has no effect if pidfds are supported and
* death notification already caused waiting to stop.
*/
stop_wait_for_proc_kill(true);
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
@ -2067,7 +2175,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
/* Kill a process if necessary */
if (kill_reason != NONE) {
int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi);
int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi,
&curr_tm);
if (pages_freed > 0) {
killing = true;
if (cut_thrashing_limit) {
@ -2081,6 +2190,13 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
}
no_kill:
/* Do not poll if kernel supports pidfd waiting */
if (is_waiting_for_kill()) {
/* Pause polling if we are waiting for process death notification */
poll_params->update = POLLING_PAUSE;
return;
}
/*
* Start polling after initial PSI event;
* extend polling while device is in direct reclaim or process is being killed;
@ -2110,7 +2226,6 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
union meminfo mi;
struct zoneinfo zi;
struct timespec curr_tm;
static struct timespec last_kill_tm;
static unsigned long kill_skip_count = 0;
enum vmpressure_level level = (enum vmpressure_level)data;
long other_free = 0, other_file = 0;
@ -2159,15 +2274,26 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
return;
}
if (kill_timeout_ms) {
// If we're within the timeout, see if there's pending reclaim work
// from the last killed process. If there is (as evidenced by
// /proc/<pid> continuing to exist), skip killing for now.
if ((get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) &&
(low_ram_device || is_kill_pending())) {
if (kill_timeout_ms && get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) {
/*
* If we're within the no-kill timeout, see if there's pending reclaim work
* from the last killed process. If so, skip killing for now.
*/
if (is_kill_pending()) {
kill_skip_count++;
return;
}
/*
* Process is dead, stop waiting. This has no effect if pidfds are supported and
* death notification already caused waiting to stop.
*/
stop_wait_for_proc_kill(true);
} else {
/*
* Killing took longer than no-kill timeout. Stop waiting for the last process
* to die because we are ready to kill again.
*/
stop_wait_for_proc_kill(false);
}
if (kill_skip_count > 0) {
@ -2266,7 +2392,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi) == 0) {
if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &curr_tm) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@ -2289,7 +2415,7 @@ do_kill:
min_score_adj = level_oomadj[level];
}
pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi);
pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &curr_tm);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */
@ -2297,9 +2423,6 @@ do_kill:
report_skip_count++;
return;
}
} else {
/* If we killed anything, update the last killed timestamp. */
last_kill_tm = curr_tm;
}
/* Log whenever we kill or when report rate limit allows */
@ -2322,6 +2445,10 @@ do_kill:
last_report_tm = curr_tm;
}
if (is_waiting_for_kill()) {
/* pause polling if we are waiting for process death notification */
poll_params->update = POLLING_PAUSE;
}
}
static bool init_mp_psi(enum vmpressure_level level, bool use_new_strategy) {
@ -2473,6 +2600,7 @@ static int init(void) {
.fd = -1,
};
struct epoll_event epev;
int pidfd;
int i;
int ret;
@ -2563,9 +2691,61 @@ static int init(void) {
ALOGE("Failed to read %s: %s", file_data.filename, strerror(errno));
}
/* check if kernel supports pidfd_open syscall */
pidfd = TEMP_FAILURE_RETRY(sys_pidfd_open(getpid(), 0));
if (pidfd < 0) {
pidfd_supported = (errno != ENOSYS);
} else {
pidfd_supported = true;
close(pidfd);
}
ALOGI("Process polling is %s", pidfd_supported ? "supported" : "not supported" );
return 0;
}
static void call_handler(struct event_handler_info* handler_info,
struct polling_params *poll_params, uint32_t events) {
struct timespec curr_tm;
handler_info->handler(handler_info->data, events, poll_params);
clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
poll_params->last_poll_tm = curr_tm;
switch (poll_params->update) {
case POLLING_START:
/*
* Poll for the duration of PSI_WINDOW_SIZE_MS after the
* initial PSI event because psi events are rate-limited
* at one per sec.
*/
poll_params->poll_start_tm = curr_tm;
if (poll_params->poll_handler != handler_info) {
poll_params->poll_handler = handler_info;
}
break;
case POLLING_STOP:
poll_params->poll_handler = NULL;
break;
case POLLING_PAUSE:
poll_params->paused_handler = handler_info;
poll_params->poll_handler = NULL;
break;
case POLLING_RESUME:
poll_params->poll_start_tm = curr_tm;
poll_params->poll_handler = poll_params->paused_handler;
break;
case POLLING_DO_NOT_CHANGE:
if (get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) {
/* Polled for the duration of PSI window, time to stop */
poll_params->poll_handler = NULL;
}
/* WARNING: skipping the rest of the function */
return;
}
poll_params->update = POLLING_DO_NOT_CHANGE;
}
static void mainloop(void) {
struct event_handler_info* handler_info;
struct polling_params poll_params;
@ -2582,41 +2762,33 @@ static void mainloop(void) {
int i;
if (poll_params.poll_handler) {
/* Calculate next timeout */
clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
delay = get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm);
delay = (delay < poll_params.polling_interval_ms) ?
poll_params.polling_interval_ms - delay : poll_params.polling_interval_ms;
/* Wait for events until the next polling timeout */
nevents = epoll_wait(epollfd, events, maxevents, delay);
bool poll_now;
clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
if (get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm) >=
poll_params.polling_interval_ms) {
/* Set input params for the call */
poll_params.poll_handler->handler(poll_params.poll_handler->data, 0, &poll_params);
poll_params.last_poll_tm = curr_tm;
if (poll_params.poll_handler == poll_params.paused_handler) {
/*
* Just transitioned into POLLING_RESUME. Reset paused_handler
* and poll immediately
*/
poll_params.paused_handler = NULL;
poll_now = true;
nevents = 0;
} else {
/* Calculate next timeout */
delay = get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm);
delay = (delay < poll_params.polling_interval_ms) ?
poll_params.polling_interval_ms - delay : poll_params.polling_interval_ms;
if (poll_params.update != POLLING_DO_NOT_CHANGE) {
switch (poll_params.update) {
case POLLING_START:
poll_params.poll_start_tm = curr_tm;
break;
case POLLING_STOP:
poll_params.poll_handler = NULL;
break;
default:
break;
}
poll_params.update = POLLING_DO_NOT_CHANGE;
} else {
if (get_time_diff_ms(&poll_params.poll_start_tm, &curr_tm) >
PSI_WINDOW_SIZE_MS) {
/* Polled for the duration of PSI window, time to stop */
poll_params.poll_handler = NULL;
}
}
/* Wait for events until the next polling timeout */
nevents = epoll_wait(epollfd, events, maxevents, delay);
/* Update current time after wait */
clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
poll_now = (get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm) >=
poll_params.polling_interval_ms);
}
if (poll_now) {
call_handler(poll_params.poll_handler, &poll_params, 0);
}
} else {
/* Wait for events with no timeout */
@ -2656,29 +2828,7 @@ static void mainloop(void) {
}
if (evt->data.ptr) {
handler_info = (struct event_handler_info*)evt->data.ptr;
/* Set input params for the call */
handler_info->handler(handler_info->data, evt->events, &poll_params);
if (poll_params.update != POLLING_DO_NOT_CHANGE) {
switch (poll_params.update) {
case POLLING_START:
/*
* Poll for the duration of PSI_WINDOW_SIZE_MS after the
* initial PSI event because psi events are rate-limited
* at one per sec.
*/
clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
poll_params.poll_start_tm = poll_params.last_poll_tm = curr_tm;
poll_params.poll_handler = handler_info;
break;
case POLLING_STOP:
poll_params.poll_handler = NULL;
break;
default:
break;
}
poll_params.update = POLLING_DO_NOT_CHANGE;
}
call_handler(handler_info, &poll_params, evt->events);
}
}
}