platform_system_core/lmkd/lmkd.c
Suren Baghdasaryan 45c9e0b30c lmkd: Switch to using /proc/meminfo to have access to file cache size
Current mechanism of getting system memory state by using sysinfo()
is not enough because it does not provide information about file cache
size which is needed to correctly assess memory state. Switch to using
data from /proc/meminfo that includes information provided by sysinfo()
and file cache size in addition to that.


Bug: 77299493
Bug: 75322373
Merged-In: I16106fc4f9254f17f776803e60502b7b6474e1b7
Change-Id: I16106fc4f9254f17f776803e60502b7b6474e1b7
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
2018-04-15 10:03:05 +00:00

1438 lines
40 KiB
C

/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "lowmemorykiller"
#include <errno.h>
#include <inttypes.h>
#include <pwd.h>
#include <sched.h>
#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/cdefs.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/sysinfo.h>
#include <sys/types.h>
#include <unistd.h>
#include <cutils/properties.h>
#include <cutils/sockets.h>
#include <lmkd.h>
#include <log/log.h>
/*
* Define LMKD_TRACE_KILLS to record lmkd kills in kernel traces
* to profile and correlate with OOM kills
*/
#ifdef LMKD_TRACE_KILLS
#define ATRACE_TAG ATRACE_TAG_ALWAYS
#include <cutils/trace.h>
#define TRACE_KILL_START(pid) ATRACE_INT(__FUNCTION__, pid);
#define TRACE_KILL_END() ATRACE_INT(__FUNCTION__, 0);
#else /* LMKD_TRACE_KILLS */
#define TRACE_KILL_START(pid)
#define TRACE_KILL_END()
#endif /* LMKD_TRACE_KILLS */
#ifndef __unused
#define __unused __attribute__((__unused__))
#endif
#define MEMCG_SYSFS_PATH "/dev/memcg/"
#define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
#define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"
#define ZONEINFO_PATH "/proc/zoneinfo"
#define MEMINFO_PATH "/proc/meminfo"
#define LINE_MAX 128
#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
#define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define EIGHT_MEGA (1 << 23)
/* Defined as ProcessList.SYSTEM_ADJ in ProcessList.java */
#define SYSTEM_ADJ (-900)
/* default to old in-kernel interface if no memory pressure events */
static bool use_inkernel_interface = true;
static bool has_inkernel_module;
/* memory pressure levels */
enum vmpressure_level {
VMPRESS_LEVEL_LOW = 0,
VMPRESS_LEVEL_MEDIUM,
VMPRESS_LEVEL_CRITICAL,
VMPRESS_LEVEL_COUNT
};
static const char *level_name[] = {
"low",
"medium",
"critical"
};
struct {
int64_t min_nr_free_pages; /* recorded but not used yet */
int64_t max_nr_free_pages;
} low_pressure_mem = { -1, -1 };
static int level_oomadj[VMPRESS_LEVEL_COUNT];
static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 };
static bool debug_process_killing;
static bool enable_pressure_upgrade;
static int64_t upgrade_pressure;
static int64_t downgrade_pressure;
static bool low_ram_device;
static bool kill_heaviest_task;
static unsigned long kill_timeout_ms;
/* data required to handle events */
struct event_handler_info {
int data;
void (*handler)(int data, uint32_t events);
};
/* data required to handle socket events */
struct sock_event_handler_info {
int sock;
struct event_handler_info handler_info;
};
/* max supported number of data connections */
#define MAX_DATA_CONN 2
/* socket event handler data */
static struct sock_event_handler_info ctrl_sock;
static struct sock_event_handler_info data_sock[MAX_DATA_CONN];
/* vmpressure event handler data */
static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];
/* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket */
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT)
static int epollfd;
static int maxevents;
/* OOM score values used by both kernel and framework */
#define OOM_SCORE_ADJ_MIN (-1000)
#define OOM_SCORE_ADJ_MAX 1000
static int lowmem_adj[MAX_TARGETS];
static int lowmem_minfree[MAX_TARGETS];
static int lowmem_targets_size;
/* Fields to parse in /proc/zoneinfo */
enum zoneinfo_field {
ZI_NR_FREE_PAGES = 0,
ZI_NR_FILE_PAGES,
ZI_NR_SHMEM,
ZI_NR_UNEVICTABLE,
ZI_WORKINGSET_REFAULT,
ZI_HIGH,
ZI_FIELD_COUNT
};
static const char* const zoneinfo_field_names[ZI_FIELD_COUNT] = {
"nr_free_pages",
"nr_file_pages",
"nr_shmem",
"nr_unevictable",
"workingset_refault",
"high",
};
union zoneinfo {
struct {
int64_t nr_free_pages;
int64_t nr_file_pages;
int64_t nr_shmem;
int64_t nr_unevictable;
int64_t workingset_refault;
int64_t high;
/* fields below are calculated rather than read from the file */
int64_t totalreserve_pages;
} field;
int64_t arr[ZI_FIELD_COUNT];
};
/* Fields to parse in /proc/meminfo */
enum meminfo_field {
MI_NR_FREE_PAGES = 0,
MI_CACHED,
MI_SWAP_CACHED,
MI_BUFFERS,
MI_SHMEM,
MI_UNEVICTABLE,
MI_FREE_SWAP,
MI_DIRTY,
MI_FIELD_COUNT
};
static const char* const meminfo_field_names[MI_FIELD_COUNT] = {
"MemFree:",
"Cached:",
"SwapCached:",
"Buffers:",
"Shmem:",
"Unevictable:",
"SwapFree:",
"Dirty:",
};
union meminfo {
struct {
int64_t nr_free_pages;
int64_t cached;
int64_t swap_cached;
int64_t buffers;
int64_t shmem;
int64_t unevictable;
int64_t free_swap;
int64_t dirty;
/* fields below are calculated rather than read from the file */
int64_t nr_file_pages;
} field;
int64_t arr[MI_FIELD_COUNT];
};
enum field_match_result {
NO_MATCH,
PARSE_FAIL,
PARSE_SUCCESS
};
struct adjslot_list {
struct adjslot_list *next;
struct adjslot_list *prev;
};
struct proc {
struct adjslot_list asl;
int pid;
uid_t uid;
int oomadj;
struct proc *pidhash_next;
};
struct reread_data {
const char* const filename;
int fd;
};
#define PIDHASH_SZ 1024
static struct proc *pidhash[PIDHASH_SZ];
#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
#define ADJTOSLOT(adj) ((adj) + -OOM_SCORE_ADJ_MIN)
static struct adjslot_list procadjslot_list[ADJTOSLOT(OOM_SCORE_ADJ_MAX) + 1];
/* PAGE_SIZE / 1024 */
static long page_k;
static bool parse_int64(const char* str, int64_t* ret) {
char* endptr;
long long val = strtoll(str, &endptr, 10);
if (str == endptr || val > INT64_MAX) {
return false;
}
*ret = (int64_t)val;
return true;
}
static enum field_match_result match_field(const char* cp, const char* ap,
const char* const field_names[],
int field_count, int64_t* field,
int *field_idx) {
int64_t val;
int i;
for (i = 0; i < field_count; i++) {
if (!strcmp(cp, field_names[i])) {
*field_idx = i;
return parse_int64(ap, field) ? PARSE_SUCCESS : PARSE_FAIL;
}
}
return NO_MATCH;
}
/*
* Read file content from the beginning up to max_len bytes or EOF
* whichever happens first.
*/
static ssize_t read_all(int fd, char *buf, size_t max_len)
{
ssize_t ret = 0;
off_t offset = 0;
while (max_len > 0) {
ssize_t r = TEMP_FAILURE_RETRY(pread(fd, buf, max_len, offset));
if (r == 0) {
break;
}
if (r == -1) {
return -1;
}
ret += r;
buf += r;
offset += r;
max_len -= r;
}
return ret;
}
/*
* Read a new or already opened file from the beginning.
* If the file has not been opened yet data->fd should be set to -1.
* To be used with files which are read often and possibly during high
* memory pressure to minimize file opening which by itself requires kernel
* memory allocation and might result in a stall on memory stressed system.
*/
static int reread_file(struct reread_data *data, char *buf, size_t buf_size) {
ssize_t size;
if (data->fd == -1) {
data->fd = open(data->filename, O_RDONLY | O_CLOEXEC);
if (data->fd == -1) {
ALOGE("%s open: %s", data->filename, strerror(errno));
return -1;
}
}
size = read_all(data->fd, buf, buf_size - 1);
if (size < 0) {
ALOGE("%s read: %s", data->filename, strerror(errno));
close(data->fd);
data->fd = -1;
return -1;
}
ALOG_ASSERT((size_t)size < buf_size - 1, data->filename " too large");
buf[size] = 0;
return 0;
}
static struct proc *pid_lookup(int pid) {
struct proc *procp;
for (procp = pidhash[pid_hashfn(pid)]; procp && procp->pid != pid;
procp = procp->pidhash_next)
;
return procp;
}
static void adjslot_insert(struct adjslot_list *head, struct adjslot_list *new)
{
struct adjslot_list *next = head->next;
new->prev = head;
new->next = next;
next->prev = new;
head->next = new;
}
static void adjslot_remove(struct adjslot_list *old)
{
struct adjslot_list *prev = old->prev;
struct adjslot_list *next = old->next;
next->prev = prev;
prev->next = next;
}
static struct adjslot_list *adjslot_tail(struct adjslot_list *head) {
struct adjslot_list *asl = head->prev;
return asl == head ? NULL : asl;
}
static void proc_slot(struct proc *procp) {
int adjslot = ADJTOSLOT(procp->oomadj);
adjslot_insert(&procadjslot_list[adjslot], &procp->asl);
}
static void proc_unslot(struct proc *procp) {
adjslot_remove(&procp->asl);
}
static void proc_insert(struct proc *procp) {
int hval = pid_hashfn(procp->pid);
procp->pidhash_next = pidhash[hval];
pidhash[hval] = procp;
proc_slot(procp);
}
static int pid_remove(int pid) {
int hval = pid_hashfn(pid);
struct proc *procp;
struct proc *prevp;
for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid;
procp = procp->pidhash_next)
prevp = procp;
if (!procp)
return -1;
if (!prevp)
pidhash[hval] = procp->pidhash_next;
else
prevp->pidhash_next = procp->pidhash_next;
proc_unslot(procp);
free(procp);
return 0;
}
/*
* Write a string to a file.
* Returns false if the file does not exist.
*/
static bool writefilestring(const char *path, const char *s,
bool err_if_missing) {
int fd = open(path, O_WRONLY | O_CLOEXEC);
ssize_t len = strlen(s);
ssize_t ret;
if (fd < 0) {
if (err_if_missing) {
ALOGE("Error opening %s; errno=%d", path, errno);
}
return false;
}
ret = TEMP_FAILURE_RETRY(write(fd, s, len));
if (ret < 0) {
ALOGE("Error writing %s; errno=%d", path, errno);
} else if (ret < len) {
ALOGE("Short write on %s; length=%zd", path, ret);
}
close(fd);
return true;
}
static void cmd_procprio(LMKD_CTRL_PACKET packet) {
struct proc *procp;
char path[80];
char val[20];
int soft_limit_mult;
struct lmk_procprio params;
bool is_system_server;
struct passwd *pwdrec;
lmkd_pack_get_procprio(packet, &params);
if (params.oomadj < OOM_SCORE_ADJ_MIN ||
params.oomadj > OOM_SCORE_ADJ_MAX) {
ALOGE("Invalid PROCPRIO oomadj argument %d", params.oomadj);
return;
}
snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", params.pid);
snprintf(val, sizeof(val), "%d", params.oomadj);
if (!writefilestring(path, val, false)) {
ALOGW("Failed to open %s; errno=%d: process %d might have been killed",
path, errno, params.pid);
/* If this file does not exist the process is dead. */
return;
}
if (use_inkernel_interface) {
return;
}
if (params.oomadj >= 900) {
soft_limit_mult = 0;
} else if (params.oomadj >= 800) {
soft_limit_mult = 0;
} else if (params.oomadj >= 700) {
soft_limit_mult = 0;
} else if (params.oomadj >= 600) {
// Launcher should be perceptible, don't kill it.
params.oomadj = 200;
soft_limit_mult = 1;
} else if (params.oomadj >= 500) {
soft_limit_mult = 0;
} else if (params.oomadj >= 400) {
soft_limit_mult = 0;
} else if (params.oomadj >= 300) {
soft_limit_mult = 1;
} else if (params.oomadj >= 200) {
soft_limit_mult = 2;
} else if (params.oomadj >= 100) {
soft_limit_mult = 10;
} else if (params.oomadj >= 0) {
soft_limit_mult = 20;
} else {
// Persistent processes will have a large
// soft limit 512MB.
soft_limit_mult = 64;
}
snprintf(path, sizeof(path),
"/dev/memcg/apps/uid_%d/pid_%d/memory.soft_limit_in_bytes",
params.uid, params.pid);
snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);
/*
* system_server process has no memcg under /dev/memcg/apps but should be
* registered with lmkd. This is the best way so far to identify it.
*/
is_system_server = (params.oomadj == SYSTEM_ADJ &&
(pwdrec = getpwnam("system")) != NULL &&
params.uid == pwdrec->pw_uid);
writefilestring(path, val, !is_system_server);
procp = pid_lookup(params.pid);
if (!procp) {
procp = malloc(sizeof(struct proc));
if (!procp) {
// Oh, the irony. May need to rebuild our state.
return;
}
procp->pid = params.pid;
procp->uid = params.uid;
procp->oomadj = params.oomadj;
proc_insert(procp);
} else {
proc_unslot(procp);
procp->oomadj = params.oomadj;
proc_slot(procp);
}
}
static void cmd_procremove(LMKD_CTRL_PACKET packet) {
struct lmk_procremove params;
if (use_inkernel_interface) {
return;
}
lmkd_pack_get_procremove(packet, &params);
pid_remove(params.pid);
}
static void cmd_target(int ntargets, LMKD_CTRL_PACKET packet) {
int i;
struct lmk_target target;
if (ntargets > (int)ARRAY_SIZE(lowmem_adj))
return;
for (i = 0; i < ntargets; i++) {
lmkd_pack_get_target(packet, i, &target);
lowmem_minfree[i] = target.minfree;
lowmem_adj[i] = target.oom_adj_score;
}
lowmem_targets_size = ntargets;
if (has_inkernel_module) {
char minfreestr[128];
char killpriostr[128];
minfreestr[0] = '\0';
killpriostr[0] = '\0';
for (i = 0; i < lowmem_targets_size; i++) {
char val[40];
if (i) {
strlcat(minfreestr, ",", sizeof(minfreestr));
strlcat(killpriostr, ",", sizeof(killpriostr));
}
snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_minfree[i] : 0);
strlcat(minfreestr, val, sizeof(minfreestr));
snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_adj[i] : 0);
strlcat(killpriostr, val, sizeof(killpriostr));
}
writefilestring(INKERNEL_MINFREE_PATH, minfreestr, true);
writefilestring(INKERNEL_ADJ_PATH, killpriostr, true);
}
}
static void ctrl_data_close(int dsock_idx) {
struct epoll_event epev;
ALOGI("closing lmkd data connection");
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, data_sock[dsock_idx].sock, &epev) == -1) {
// Log a warning and keep going
ALOGW("epoll_ctl for data connection socket failed; errno=%d", errno);
}
maxevents--;
close(data_sock[dsock_idx].sock);
data_sock[dsock_idx].sock = -1;
}
static int ctrl_data_read(int dsock_idx, char *buf, size_t bufsz) {
int ret = 0;
ret = TEMP_FAILURE_RETRY(read(data_sock[dsock_idx].sock, buf, bufsz));
if (ret == -1) {
ALOGE("control data socket read failed; errno=%d", errno);
} else if (ret == 0) {
ALOGE("Got EOF on control data socket");
ret = -1;
}
return ret;
}
static void ctrl_command_handler(int dsock_idx) {
LMKD_CTRL_PACKET packet;
int len;
enum lmk_cmd cmd;
int nargs;
int targets;
len = ctrl_data_read(dsock_idx, (char *)packet, CTRL_PACKET_MAX_SIZE);
if (len <= 0)
return;
if (len < (int)sizeof(int)) {
ALOGE("Wrong control socket read length len=%d", len);
return;
}
cmd = lmkd_pack_get_cmd(packet);
nargs = len / sizeof(int) - 1;
if (nargs < 0)
goto wronglen;
switch(cmd) {
case LMK_TARGET:
targets = nargs / 2;
if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
goto wronglen;
cmd_target(targets, packet);
break;
case LMK_PROCPRIO:
if (nargs != 3)
goto wronglen;
cmd_procprio(packet);
break;
case LMK_PROCREMOVE:
if (nargs != 1)
goto wronglen;
cmd_procremove(packet);
break;
default:
ALOGE("Received unknown command code %d", cmd);
return;
}
return;
wronglen:
ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len);
}
static void ctrl_data_handler(int data, uint32_t events) {
if (events & EPOLLIN) {
ctrl_command_handler(data);
}
}
static int get_free_dsock() {
for (int i = 0; i < MAX_DATA_CONN; i++) {
if (data_sock[i].sock < 0) {
return i;
}
}
return -1;
}
static void ctrl_connect_handler(int data __unused, uint32_t events __unused) {
struct epoll_event epev;
int free_dscock_idx = get_free_dsock();
if (free_dscock_idx < 0) {
/*
* Number of data connections exceeded max supported. This should not
* happen but if it does we drop all existing connections and accept
* the new one. This prevents inactive connections from monopolizing
* data socket and if we drop ActivityManager connection it will
* immediately reconnect.
*/
for (int i = 0; i < MAX_DATA_CONN; i++) {
ctrl_data_close(i);
}
free_dscock_idx = 0;
}
data_sock[free_dscock_idx].sock = accept(ctrl_sock.sock, NULL, NULL);
if (data_sock[free_dscock_idx].sock < 0) {
ALOGE("lmkd control socket accept failed; errno=%d", errno);
return;
}
ALOGI("lmkd data connection established");
/* use data to store data connection idx */
data_sock[free_dscock_idx].handler_info.data = free_dscock_idx;
data_sock[free_dscock_idx].handler_info.handler = ctrl_data_handler;
epev.events = EPOLLIN;
epev.data.ptr = (void *)&(data_sock[free_dscock_idx].handler_info);
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, data_sock[free_dscock_idx].sock, &epev) == -1) {
ALOGE("epoll_ctl for data connection socket failed; errno=%d", errno);
ctrl_data_close(free_dscock_idx);
return;
}
maxevents++;
}
/* /prop/zoneinfo parsing routines */
static int64_t zoneinfo_parse_protection(char *cp) {
int64_t max = 0;
long long zoneval;
char *save_ptr;
for (cp = strtok_r(cp, "(), ", &save_ptr); cp;
cp = strtok_r(NULL, "), ", &save_ptr)) {
zoneval = strtoll(cp, &cp, 0);
if (zoneval > max) {
max = (zoneval > INT64_MAX) ? INT64_MAX : zoneval;
}
}
return max;
}
static bool zoneinfo_parse_line(char *line, union zoneinfo *zi) {
char *cp = line;
char *ap;
char *save_ptr;
int64_t val;
int field_idx;
cp = strtok_r(line, " ", &save_ptr);
if (!cp) {
return true;
}
if (!strcmp(cp, "protection:")) {
ap = strtok_r(NULL, ")", &save_ptr);
} else {
ap = strtok_r(NULL, " ", &save_ptr);
}
if (!ap) {
return true;
}
switch (match_field(cp, ap, zoneinfo_field_names,
ZI_FIELD_COUNT, &val, &field_idx)) {
case (PARSE_SUCCESS):
zi->arr[field_idx] += val;
break;
case (NO_MATCH):
if (!strcmp(cp, "protection:")) {
zi->field.totalreserve_pages +=
zoneinfo_parse_protection(ap);
}
break;
case (PARSE_FAIL):
default:
return false;
}
return true;
}
static int zoneinfo_parse(union zoneinfo *zi) {
static struct reread_data file_data = {
.filename = ZONEINFO_PATH,
.fd = -1,
};
char buf[PAGE_SIZE];
char *save_ptr;
char *line;
memset(zi, 0, sizeof(union zoneinfo));
if (reread_file(&file_data, buf, sizeof(buf)) < 0) {
return -1;
}
for (line = strtok_r(buf, "\n", &save_ptr); line;
line = strtok_r(NULL, "\n", &save_ptr)) {
if (!zoneinfo_parse_line(line, zi)) {
ALOGE("%s parse error", file_data.filename);
return -1;
}
}
zi->field.totalreserve_pages += zi->field.high;
return 0;
}
/* /prop/meminfo parsing routines */
static bool meminfo_parse_line(char *line, union meminfo *mi) {
char *cp = line;
char *ap;
char *save_ptr;
int64_t val;
int field_idx;
enum field_match_result match_res;
cp = strtok_r(line, " ", &save_ptr);
if (!cp) {
return false;
}
ap = strtok_r(NULL, " ", &save_ptr);
if (!ap) {
return false;
}
match_res = match_field(cp, ap, meminfo_field_names, MI_FIELD_COUNT,
&val, &field_idx);
if (match_res == PARSE_SUCCESS) {
mi->arr[field_idx] = val / page_k;
}
return (match_res != PARSE_FAIL);
}
static int meminfo_parse(union meminfo *mi) {
static struct reread_data file_data = {
.filename = MEMINFO_PATH,
.fd = -1,
};
char buf[PAGE_SIZE];
char *save_ptr;
char *line;
memset(mi, 0, sizeof(union meminfo));
if (reread_file(&file_data, buf, sizeof(buf)) < 0) {
return -1;
}
for (line = strtok_r(buf, "\n", &save_ptr); line;
line = strtok_r(NULL, "\n", &save_ptr)) {
if (!meminfo_parse_line(line, mi)) {
ALOGE("%s parse error", file_data.filename);
return -1;
}
}
mi->field.nr_file_pages = mi->field.cached + mi->field.swap_cached +
mi->field.buffers;
return 0;
}
static int proc_get_size(int pid) {
char path[PATH_MAX];
char line[LINE_MAX];
int fd;
int rss = 0;
int total;
ssize_t ret;
snprintf(path, PATH_MAX, "/proc/%d/statm", pid);
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd == -1)
return -1;
ret = read_all(fd, line, sizeof(line) - 1);
if (ret < 0) {
close(fd);
return -1;
}
sscanf(line, "%d %d ", &total, &rss);
close(fd);
return rss;
}
static char *proc_get_name(int pid) {
char path[PATH_MAX];
static char line[LINE_MAX];
int fd;
char *cp;
ssize_t ret;
snprintf(path, PATH_MAX, "/proc/%d/cmdline", pid);
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd == -1)
return NULL;
ret = read_all(fd, line, sizeof(line) - 1);
close(fd);
if (ret < 0) {
return NULL;
}
cp = strchr(line, ' ');
if (cp)
*cp = '\0';
return line;
}
static struct proc *proc_adj_lru(int oomadj) {
return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
}
static struct proc *proc_get_heaviest(int oomadj) {
struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
struct adjslot_list *curr = head->next;
struct proc *maxprocp = NULL;
int maxsize = 0;
while (curr != head) {
int pid = ((struct proc *)curr)->pid;
int tasksize = proc_get_size(pid);
if (tasksize <= 0) {
struct adjslot_list *next = curr->next;
pid_remove(pid);
curr = next;
} else {
if (tasksize > maxsize) {
maxsize = tasksize;
maxprocp = (struct proc *)curr;
}
curr = curr->next;
}
}
return maxprocp;
}
/* Kill one process specified by procp. Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_score_adj,
enum vmpressure_level level) {
int pid = procp->pid;
uid_t uid = procp->uid;
char *taskname;
int tasksize;
int r;
taskname = proc_get_name(pid);
if (!taskname) {
pid_remove(pid);
return -1;
}
tasksize = proc_get_size(pid);
if (tasksize <= 0) {
pid_remove(pid);
return -1;
}
TRACE_KILL_START(pid);
r = kill(pid, SIGKILL);
ALOGI(
"Killing '%s' (%d), uid %d, adj %d\n"
" to free %ldkB because system is under %s memory pressure (min_oom_adj=%d)\n",
taskname, pid, uid, procp->oomadj, tasksize * page_k,
level_name[level], min_score_adj);
pid_remove(pid);
TRACE_KILL_END();
if (r) {
ALOGE("kill(%d): errno=%d", pid, errno);
return -1;
}
return tasksize;
}
/*
* Find processes to kill to free required number of pages.
* If pages_to_free is set to 0 only one process will be killed.
* Returns the size of the killed processes.
*/
static int find_and_kill_processes(enum vmpressure_level level,
int pages_to_free) {
int i;
int killed_size;
int pages_freed = 0;
int min_score_adj = level_oomadj[level];
for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
struct proc *procp;
while (true) {
procp = kill_heaviest_task ?
proc_get_heaviest(i) : proc_adj_lru(i);
if (!procp)
break;
killed_size = kill_one_process(procp, min_score_adj, level);
if (killed_size >= 0) {
pages_freed += killed_size;
if (pages_freed >= pages_to_free) {
return pages_freed;
}
}
}
}
return pages_freed;
}
static int64_t get_memory_usage(struct reread_data *file_data) {
int ret;
int64_t mem_usage;
char buf[32];
if (reread_file(file_data, buf, sizeof(buf)) < 0) {
return -1;
}
if (!parse_int64(buf, &mem_usage)) {
ALOGE("%s parse error", file_data->filename);
return -1;
}
if (mem_usage == 0) {
ALOGE("No memory!");
return -1;
}
return mem_usage;
}
void record_low_pressure_levels(union meminfo *mi) {
if (low_pressure_mem.min_nr_free_pages == -1 ||
low_pressure_mem.min_nr_free_pages > mi->field.nr_free_pages) {
if (debug_process_killing) {
ALOGI("Low pressure min memory update from %" PRId64 " to %" PRId64,
low_pressure_mem.min_nr_free_pages, mi->field.nr_free_pages);
}
low_pressure_mem.min_nr_free_pages = mi->field.nr_free_pages;
}
/*
* Free memory at low vmpressure events occasionally gets spikes,
* possibly a stale low vmpressure event with memory already
* freed up (no memory pressure should have been reported).
* Ignore large jumps in max_nr_free_pages that would mess up our stats.
*/
if (low_pressure_mem.max_nr_free_pages == -1 ||
(low_pressure_mem.max_nr_free_pages < mi->field.nr_free_pages &&
mi->field.nr_free_pages - low_pressure_mem.max_nr_free_pages <
low_pressure_mem.max_nr_free_pages * 0.1)) {
if (debug_process_killing) {
ALOGI("Low pressure max memory update from %" PRId64 " to %" PRId64,
low_pressure_mem.max_nr_free_pages, mi->field.nr_free_pages);
}
low_pressure_mem.max_nr_free_pages = mi->field.nr_free_pages;
}
}
enum vmpressure_level upgrade_level(enum vmpressure_level level) {
return (enum vmpressure_level)((level < VMPRESS_LEVEL_CRITICAL) ?
level + 1 : level);
}
enum vmpressure_level downgrade_level(enum vmpressure_level level) {
return (enum vmpressure_level)((level > VMPRESS_LEVEL_LOW) ?
level - 1 : level);
}
static inline unsigned long get_time_diff_ms(struct timeval *from,
struct timeval *to) {
return (to->tv_sec - from->tv_sec) * 1000 +
(to->tv_usec - from->tv_usec) / 1000;
}
static void mp_event_common(int data, uint32_t events __unused) {
int ret;
unsigned long long evcount;
int64_t mem_usage, memsw_usage;
int64_t mem_pressure;
enum vmpressure_level lvl;
union meminfo mi;
static struct timeval last_report_tm;
static unsigned long skip_count = 0;
enum vmpressure_level level = (enum vmpressure_level)data;
static struct reread_data mem_usage_file_data = {
.filename = MEMCG_MEMORY_USAGE,
.fd = -1,
};
static struct reread_data memsw_usage_file_data = {
.filename = MEMCG_MEMORYSW_USAGE,
.fd = -1,
};
/*
* Check all event counters from low to critical
* and upgrade to the highest priority one. By reading
* eventfd we also reset the event counters.
*/
for (lvl = VMPRESS_LEVEL_LOW; lvl < VMPRESS_LEVEL_COUNT; lvl++) {
if (mpevfd[lvl] != -1 &&
TEMP_FAILURE_RETRY(read(mpevfd[lvl],
&evcount, sizeof(evcount))) > 0 &&
evcount > 0 && lvl > level) {
level = lvl;
}
}
if (kill_timeout_ms) {
struct timeval curr_tm;
gettimeofday(&curr_tm, NULL);
if (get_time_diff_ms(&last_report_tm, &curr_tm) < kill_timeout_ms) {
skip_count++;
return;
}
}
if (skip_count > 0) {
if (debug_process_killing) {
ALOGI("%lu memory pressure events were skipped after a kill!",
skip_count);
}
skip_count = 0;
}
if (meminfo_parse(&mi) < 0) {
ALOGE("Failed to get free memory!");
return;
}
if (level == VMPRESS_LEVEL_LOW) {
record_low_pressure_levels(&mi);
}
if (level_oomadj[level] > OOM_SCORE_ADJ_MAX) {
/* Do not monitor this pressure level */
return;
}
if ((mem_usage = get_memory_usage(&mem_usage_file_data)) < 0) {
goto do_kill;
}
if ((memsw_usage = get_memory_usage(&memsw_usage_file_data)) < 0) {
goto do_kill;
}
// Calculate percent for swappinness.
mem_pressure = (mem_usage * 100) / memsw_usage;
if (enable_pressure_upgrade && level != VMPRESS_LEVEL_CRITICAL) {
// We are swapping too much.
if (mem_pressure < upgrade_pressure) {
level = upgrade_level(level);
if (debug_process_killing) {
ALOGI("Event upgraded to %s", level_name[level]);
}
}
}
// If the pressure is larger than downgrade_pressure lmk will not
// kill any process, since enough memory is available.
if (mem_pressure > downgrade_pressure) {
if (debug_process_killing) {
ALOGI("Ignore %s memory pressure", level_name[level]);
}
return;
} else if (level == VMPRESS_LEVEL_CRITICAL &&
mem_pressure > upgrade_pressure) {
if (debug_process_killing) {
ALOGI("Downgrade critical memory pressure");
}
// Downgrade event, since enough memory available.
level = downgrade_level(level);
}
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_processes(level, 0) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
}
} else {
/* If pressure level is less than critical and enough free swap then ignore */
if (level < VMPRESS_LEVEL_CRITICAL &&
mi.field.free_swap > low_pressure_mem.max_nr_free_pages) {
if (debug_process_killing) {
ALOGI("Ignoring pressure since %" PRId64
" swap pages are available ",
mi.field.free_swap);
}
return;
}
/* Free up enough memory to downgrate the memory pressure to low level */
if (mi.field.nr_free_pages < low_pressure_mem.max_nr_free_pages) {
int pages_to_free = low_pressure_mem.max_nr_free_pages -
mi.field.nr_free_pages;
if (debug_process_killing) {
ALOGI("Trying to free %d pages", pages_to_free);
}
int pages_freed = find_and_kill_processes(level, pages_to_free);
if (pages_freed < pages_to_free) {
if (debug_process_killing) {
ALOGI("Unable to free enough memory (pages freed=%d)",
pages_freed);
}
} else {
gettimeofday(&last_report_tm, NULL);
}
}
}
}
static bool init_mp_common(enum vmpressure_level level) {
int mpfd;
int evfd;
int evctlfd;
char buf[256];
struct epoll_event epev;
int ret;
int level_idx = (int)level;
const char *levelstr = level_name[level_idx];
mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);
if (mpfd < 0) {
ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
goto err_open_mpfd;
}
evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC);
if (evctlfd < 0) {
ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
goto err_open_evctlfd;
}
evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (evfd < 0) {
ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno);
goto err_eventfd;
}
ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr);
if (ret >= (ssize_t)sizeof(buf)) {
ALOGE("cgroup.event_control line overflow for level %s", levelstr);
goto err;
}
ret = TEMP_FAILURE_RETRY(write(evctlfd, buf, strlen(buf) + 1));
if (ret == -1) {
ALOGE("cgroup.event_control write failed for level %s; errno=%d",
levelstr, errno);
goto err;
}
epev.events = EPOLLIN;
/* use data to store event level */
vmpressure_hinfo[level_idx].data = level_idx;
vmpressure_hinfo[level_idx].handler = mp_event_common;
epev.data.ptr = (void *)&vmpressure_hinfo[level_idx];
ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev);
if (ret == -1) {
ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno);
goto err;
}
maxevents++;
mpevfd[level] = evfd;
close(evctlfd);
return true;
err:
close(evfd);
err_eventfd:
close(evctlfd);
err_open_evctlfd:
close(mpfd);
err_open_mpfd:
return false;
}
static int init(void) {
struct epoll_event epev;
int i;
int ret;
page_k = sysconf(_SC_PAGESIZE);
if (page_k == -1)
page_k = PAGE_SIZE;
page_k /= 1024;
epollfd = epoll_create(MAX_EPOLL_EVENTS);
if (epollfd == -1) {
ALOGE("epoll_create failed (errno=%d)", errno);
return -1;
}
// mark data connections as not connected
for (int i = 0; i < MAX_DATA_CONN; i++) {
data_sock[i].sock = -1;
}
ctrl_sock.sock = android_get_control_socket("lmkd");
if (ctrl_sock.sock < 0) {
ALOGE("get lmkd control socket failed");
return -1;
}
ret = listen(ctrl_sock.sock, MAX_DATA_CONN);
if (ret < 0) {
ALOGE("lmkd control socket listen failed (errno=%d)", errno);
return -1;
}
epev.events = EPOLLIN;
ctrl_sock.handler_info.handler = ctrl_connect_handler;
epev.data.ptr = (void *)&(ctrl_sock.handler_info);
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_sock.sock, &epev) == -1) {
ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno);
return -1;
}
maxevents++;
has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);
use_inkernel_interface = has_inkernel_module;
if (use_inkernel_interface) {
ALOGI("Using in-kernel low memory killer interface");
} else {
if (!init_mp_common(VMPRESS_LEVEL_LOW) ||
!init_mp_common(VMPRESS_LEVEL_MEDIUM) ||
!init_mp_common(VMPRESS_LEVEL_CRITICAL)) {
ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer");
return -1;
}
}
for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) {
procadjslot_list[i].next = &procadjslot_list[i];
procadjslot_list[i].prev = &procadjslot_list[i];
}
return 0;
}
static void mainloop(void) {
struct event_handler_info* handler_info;
struct epoll_event *evt;
while (1) {
struct epoll_event events[maxevents];
int nevents;
int i;
nevents = epoll_wait(epollfd, events, maxevents, -1);
if (nevents == -1) {
if (errno == EINTR)
continue;
ALOGE("epoll_wait failed (errno=%d)", errno);
continue;
}
/*
* First pass to see if any data socket connections were dropped.
* Dropped connection should be handled before any other events
* to deallocate data connection and correctly handle cases when
* connection gets dropped and reestablished in the same epoll cycle.
* In such cases it's essential to handle connection closures first.
*/
for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) {
if ((evt->events & EPOLLHUP) && evt->data.ptr) {
ALOGI("lmkd data connection dropped");
handler_info = (struct event_handler_info*)evt->data.ptr;
ctrl_data_close(handler_info->data);
}
}
/* Second pass to handle all other events */
for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) {
if (evt->events & EPOLLERR)
ALOGD("EPOLLERR on event #%d", i);
if (evt->events & EPOLLHUP) {
/* This case was handled in the first pass */
continue;
}
if (evt->data.ptr) {
handler_info = (struct event_handler_info*)evt->data.ptr;
handler_info->handler(handler_info->data, evt->events);
}
}
}
}
int main(int argc __unused, char **argv __unused) {
struct sched_param param = {
.sched_priority = 1,
};
/* By default disable low level vmpressure events */
level_oomadj[VMPRESS_LEVEL_LOW] =
property_get_int32("ro.lmk.low", OOM_SCORE_ADJ_MAX + 1);
level_oomadj[VMPRESS_LEVEL_MEDIUM] =
property_get_int32("ro.lmk.medium", 800);
level_oomadj[VMPRESS_LEVEL_CRITICAL] =
property_get_int32("ro.lmk.critical", 0);
debug_process_killing = property_get_bool("ro.lmk.debug", false);
/* By default disable upgrade/downgrade logic */
enable_pressure_upgrade =
property_get_bool("ro.lmk.critical_upgrade", false);
upgrade_pressure =
(int64_t)property_get_int32("ro.lmk.upgrade_pressure", 100);
downgrade_pressure =
(int64_t)property_get_int32("ro.lmk.downgrade_pressure", 100);
kill_heaviest_task =
property_get_bool("ro.lmk.kill_heaviest_task", false);
low_ram_device = property_get_bool("ro.config.low_ram", false);
kill_timeout_ms =
(unsigned long)property_get_int32("ro.lmk.kill_timeout_ms", 0);
if (!init()) {
if (!use_inkernel_interface) {
/*
* MCL_ONFAULT pins pages as they fault instead of loading
* everything immediately all at once. (Which would be bad,
* because as of this writing, we have a lot of mapped pages we
* never use.) Old kernels will see MCL_ONFAULT and fail with
* EINVAL; we ignore this failure.
*
* N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
* pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
* in pages.
*/
if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
ALOGW("mlockall failed %s", strerror(errno));
}
sched_setscheduler(0, SCHED_FIFO, &param);
}
mainloop();
}
ALOGI("exiting");
return 0;
}