From d2da814d1fd146c49117d8e940d0331f751ca086 Mon Sep 17 00:00:00 2001 From: Jim Blackler Date: Tue, 10 Sep 2019 15:30:05 +0100 Subject: [PATCH] lmkd: Maintain pid to taskname mapping to amend kill reports. Required because the kernel cannot always get the taskname safely at the time the process is killed (due to competition for mm->mmap_sem). Test: manually Bug: 130017100 Signed-off-by: Jim Blackler Change-Id: I27a2c3340da321570f0832d58fe9e79ca031620b --- lmkd/lmkd.c | 26 +++++++++++--- lmkd/statslog.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ lmkd/statslog.h | 29 +++++++++++++++ 3 files changed, 147 insertions(+), 4 deletions(-) diff --git a/lmkd/lmkd.c b/lmkd/lmkd.c index d17da128b..f06717da0 100644 --- a/lmkd/lmkd.c +++ b/lmkd/lmkd.c @@ -362,6 +362,9 @@ static uint32_t killcnt_total = 0; /* PAGE_SIZE / 1024 */ static long page_k; +static char* proc_get_name(int pid); +static void poll_kernel(); + static bool parse_int64(const char* str, int64_t* ret) { char* endptr; long long val = strtoll(str, &endptr, 10); @@ -634,6 +637,9 @@ static void cmd_procprio(LMKD_CTRL_PACKET packet) { } if (use_inkernel_interface) { +#ifdef LMKD_LOG_STATS + stats_store_taskname(params.pid, proc_get_name(params.pid)); +#endif return; } @@ -704,6 +710,16 @@ static void cmd_procremove(LMKD_CTRL_PACKET packet) { struct lmk_procremove params; if (use_inkernel_interface) { +#ifdef LMKD_LOG_STATS + /* Perform an extra check before the pid is removed, after which it + * will be impossible for poll_kernel to get the taskname. poll_kernel() + * is potentially a long-running blocking function; however this method + * handles AMS requests but does not block AMS.*/ + if (enable_stats_log) { + poll_kernel(); + } + stats_remove_taskname(params.pid); +#endif return; } @@ -721,6 +737,9 @@ static void cmd_procpurge() { struct proc *next; if (use_inkernel_interface) { +#ifdef LMKD_LOG_STATS + stats_purge_tasknames(); +#endif return; } @@ -1925,7 +1944,6 @@ err_open_mpfd: #ifdef LMKD_LOG_STATS static int kernel_poll_fd = -1; - static void poll_kernel() { if (kernel_poll_fd == -1) { // not waiting @@ -1958,9 +1976,9 @@ static void poll_kernel() { /* only the death of the group leader process is logged */ if (fields_read == 10 && group_leader_pid == pid) { int64_t process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK)); - stats_write_lmk_kill_occurred(log_ctx, LMK_KILL_OCCURRED, uid, taskname, oom_score_adj, - min_flt, maj_flt, rss_in_pages * PAGE_SIZE, 0, 0, - process_start_time_ns, min_score_adj); + stats_write_lmk_kill_occurred_pid(log_ctx, LMK_KILL_OCCURRED, uid, pid, oom_score_adj, + min_flt, maj_flt, rss_in_pages * PAGE_SIZE, 0, 0, + process_start_time_ns, min_score_adj); } free(taskname); diff --git a/lmkd/statslog.c b/lmkd/statslog.c index 0c230aeb9..f3a6e55d8 100644 --- a/lmkd/statslog.c +++ b/lmkd/statslog.c @@ -18,8 +18,22 @@ #include #include #include +#include +#include #include +#define LINE_MAX 128 + +struct proc { + int pid; + char taskname[LINE_MAX]; + struct proc* pidhash_next; +}; + +#define PIDHASH_SZ 1024 +static struct proc** pidhash = NULL; +#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) + static int64_t getElapsedRealTimeNs() { struct timespec t; t.tv_sec = t.tv_nsec = 0; @@ -57,6 +71,17 @@ stats_write_lmk_state_changed(android_log_context ctx, int32_t code, int32_t sta return write_to_logger(ctx, LOG_ID_STATS); } +static struct proc* pid_lookup(int pid) { + struct proc* procp; + + if (!pidhash) return NULL; + + for (procp = pidhash[pid_hashfn(pid)]; procp && procp->pid != pid; procp = procp->pidhash_next) + ; + + return procp; +} + /** * Logs the event when LMKD kills a process to reduce memory pressure. * Code: LMK_KILL_OCCURRED = 51 @@ -124,3 +149,74 @@ stats_write_lmk_kill_occurred(android_log_context ctx, int32_t code, int32_t uid return write_to_logger(ctx, LOG_ID_STATS); } + +int stats_write_lmk_kill_occurred_pid(android_log_context ctx, int32_t code, int32_t uid, int pid, + int32_t oom_score, int64_t pgfault, int64_t pgmajfault, + int64_t rss_in_bytes, int64_t cache_in_bytes, + int64_t swap_in_bytes, int64_t process_start_time_ns, + int32_t min_oom_score) { + struct proc* proc = pid_lookup(pid); + if (!proc) return -EINVAL; + + return stats_write_lmk_kill_occurred(ctx, code, uid, proc->taskname, oom_score, pgfault, + pgmajfault, rss_in_bytes, cache_in_bytes, swap_in_bytes, + process_start_time_ns, min_oom_score); +} + +static void proc_insert(struct proc* procp) { + if (!pidhash) + pidhash = calloc(PIDHASH_SZ, sizeof(struct proc)); + int hval = pid_hashfn(procp->pid); + procp->pidhash_next = pidhash[hval]; + pidhash[hval] = procp; +} + +void stats_remove_taskname(int pid) { + if (!pidhash) return; + + int hval = pid_hashfn(pid); + struct proc* procp; + struct proc* prevp; + + for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid; + procp = procp->pidhash_next) + prevp = procp; + + if (!procp) + return; + + if (!prevp) + pidhash[hval] = procp->pidhash_next; + else + prevp->pidhash_next = procp->pidhash_next; + + free(procp); +} + +void stats_store_taskname(int pid, const char* taskname) { + struct proc* procp = pid_lookup(pid); + if (procp != NULL && strcmp(procp->taskname, taskname) == 0) + return; + procp = malloc(sizeof(struct proc)); + stats_remove_taskname(pid); + procp->pid = pid; + strncpy(procp->taskname, taskname, LINE_MAX - 1); + procp->taskname[LINE_MAX - 1] = '\0'; + proc_insert(procp); +} + +void stats_purge_tasknames() { + if (!pidhash) return; + struct proc* procp; + struct proc* next; + int i; + for (i = 0; i < PIDHASH_SZ; i++) { + procp = pidhash[i]; + while (procp) { + next = procp->pidhash_next; + free(procp); + procp = next; + } + } + memset(pidhash, 0, PIDHASH_SZ * sizeof(struct proc)); +} diff --git a/lmkd/statslog.h b/lmkd/statslog.h index 2edba7a72..50d69f739 100644 --- a/lmkd/statslog.h +++ b/lmkd/statslog.h @@ -80,6 +80,17 @@ struct memory_stat { int stats_write_lmk_state_changed(android_log_context ctx, int32_t code, int32_t state); +/** + * Logs the event when LMKD kills a process to reduce memory pressure. + * Code: LMK_KILL_OCCURRED = 51 + */ +int +stats_write_lmk_kill_occurred_pid(android_log_context ctx, int32_t code, int32_t uid, int pid, + int32_t oom_score, int64_t pgfault, int64_t pgmajfault, + int64_t rss_in_bytes, int64_t cache_in_bytes, + int64_t swap_in_bytes, int64_t process_start_time_ns, + int32_t min_oom_score); + /** * Logs the event when LMKD kills a process to reduce memory pressure. * Code: LMK_KILL_OCCURRED = 51 @@ -91,6 +102,24 @@ stats_write_lmk_kill_occurred(android_log_context ctx, int32_t code, int32_t uid int64_t swap_in_bytes, int64_t process_start_time_ns, int32_t min_oom_score); +/** + * Registers a process taskname by pid, while it is still alive. + */ +void +stats_store_taskname(int pid, const char* taskname); + +/** + * Unregister all process tasknames. + */ +void +stats_purge_tasknames(); + +/** + * Unregister a process taskname, e.g. after it has been killed. + */ +void +stats_remove_taskname(int pid); + __END_DECLS #endif /* _STATSLOG_H_ */