Merge changes I172acf0f,I97b6e17a am: 4963b42d7b
am: 09b0da7b10
Change-Id: I8edeebf3761089176b64cee64c1b74b29756d7e2
This commit is contained in:
commit
7c0f21074e
5 changed files with 134 additions and 96 deletions
|
@ -209,21 +209,33 @@ void Service::NotifyStateChange(const std::string& new_state) const {
|
|||
}
|
||||
|
||||
void Service::KillProcessGroup(int signal) {
|
||||
LOG(INFO) << "Sending signal " << signal
|
||||
<< " to service '" << name_
|
||||
<< "' (pid " << pid_ << ") process group...";
|
||||
int r;
|
||||
if (signal == SIGTERM) {
|
||||
r = killProcessGroupOnce(uid_, pid_, signal);
|
||||
} else {
|
||||
r = killProcessGroup(uid_, pid_, signal);
|
||||
}
|
||||
if (r == -1) {
|
||||
LOG(ERROR) << "killProcessGroup(" << uid_ << ", " << pid_ << ", " << signal << ") failed";
|
||||
}
|
||||
if (kill(-pid_, signal) == -1) {
|
||||
// We ignore reporting errors of ESRCH as this commonly happens in the below case,
|
||||
// 1) Terminate() is called, which sends SIGTERM to the process
|
||||
// 2) The process successfully exits
|
||||
// 3) ReapOneProcess() is called, which calls waitpid(-1, ...) which removes the pid entry.
|
||||
// 4) Reap() is called, which sends SIGKILL, but the pid no longer exists.
|
||||
// TODO: sigaction for SIGCHLD reports the pid of the exiting process,
|
||||
// we should do this kill with that pid first before calling waitpid().
|
||||
if (kill(-pid_, signal) == -1 && errno != ESRCH) {
|
||||
PLOG(ERROR) << "kill(" << pid_ << ", " << signal << ") failed";
|
||||
}
|
||||
|
||||
// If we've already seen a successful result from killProcessGroup*(), then we have removed
|
||||
// the cgroup already and calling these functions a second time will simply result in an error.
|
||||
// This is true regardless of which signal was sent.
|
||||
// These functions handle their own logging, so no additional logging is needed.
|
||||
if (!process_cgroup_empty_) {
|
||||
LOG(INFO) << "Sending signal " << signal << " to service '" << name_ << "' (pid " << pid_
|
||||
<< ") process group...";
|
||||
int r;
|
||||
if (signal == SIGTERM) {
|
||||
r = killProcessGroupOnce(uid_, pid_, signal);
|
||||
} else {
|
||||
r = killProcessGroup(uid_, pid_, signal);
|
||||
}
|
||||
|
||||
if (r == 0) process_cgroup_empty_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Service::SetProcessAttributes() {
|
||||
|
@ -766,6 +778,7 @@ bool Service::Start() {
|
|||
time_started_ = boot_clock::now();
|
||||
pid_ = pid;
|
||||
flags_ |= SVC_RUNNING;
|
||||
process_cgroup_empty_ = false;
|
||||
|
||||
errno = -createProcessGroup(uid_, pid_);
|
||||
if (errno != 0) {
|
||||
|
|
|
@ -107,6 +107,7 @@ class Service {
|
|||
int ioprio_pri() const { return ioprio_pri_; }
|
||||
int priority() const { return priority_; }
|
||||
int oom_score_adjust() const { return oom_score_adjust_; }
|
||||
bool process_cgroup_empty() const { return process_cgroup_empty_; }
|
||||
const std::vector<std::string>& args() const { return args_; }
|
||||
|
||||
private:
|
||||
|
@ -179,6 +180,8 @@ class Service {
|
|||
|
||||
int oom_score_adjust_;
|
||||
|
||||
bool process_cgroup_empty_ = false;
|
||||
|
||||
std::vector<std::string> args_;
|
||||
};
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ TEST(service, pod_initialized) {
|
|||
EXPECT_EQ(0, service_in_old_memory->ioprio_pri());
|
||||
EXPECT_EQ(0, service_in_old_memory->priority());
|
||||
EXPECT_EQ(-1000, service_in_old_memory->oom_score_adjust());
|
||||
EXPECT_FALSE(service_in_old_memory->process_cgroup_empty());
|
||||
|
||||
for (std::size_t i = 0; i < memory_size; ++i) {
|
||||
old_memory[i] = 0xFF;
|
||||
|
@ -64,4 +65,5 @@ TEST(service, pod_initialized) {
|
|||
EXPECT_EQ(0, service_in_old_memory2->ioprio_pri());
|
||||
EXPECT_EQ(0, service_in_old_memory2->priority());
|
||||
EXPECT_EQ(-1000, service_in_old_memory2->oom_score_adjust());
|
||||
EXPECT_FALSE(service_in_old_memory->process_cgroup_empty());
|
||||
}
|
||||
|
|
|
@ -22,8 +22,13 @@
|
|||
|
||||
__BEGIN_DECLS
|
||||
|
||||
// Return 0 and removes the cgroup if there are no longer any processes in it.
|
||||
// Returns -1 in the case of an error occurring or if there are processes still running
|
||||
// even after retrying for up to 200ms.
|
||||
int killProcessGroup(uid_t uid, int initialPid, int signal);
|
||||
|
||||
// Returns the same as killProcessGroup(), however it does not retry, which means
|
||||
// that it only returns 0 in the case that the cgroup exists and it contains no processes.
|
||||
int killProcessGroupOnce(uid_t uid, int initialPid, int signal);
|
||||
|
||||
int createProcessGroup(uid_t uid, int initialPid);
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <thread>
|
||||
|
||||
#include <android-base/logging.h>
|
||||
#include <android-base/unique_fd.h>
|
||||
#include <private/android_filesystem_config.h>
|
||||
|
||||
#include <processgroup/processgroup.h>
|
||||
|
@ -64,12 +65,27 @@ using namespace std::chrono_literals;
|
|||
|
||||
std::once_flag init_path_flag;
|
||||
|
||||
struct ctx {
|
||||
bool initialized;
|
||||
int fd;
|
||||
char buf[128];
|
||||
char *buf_ptr;
|
||||
size_t buf_len;
|
||||
class ProcessGroup {
|
||||
public:
|
||||
ProcessGroup() : buf_ptr_(buf_), buf_len_(0) {}
|
||||
|
||||
bool Open(uid_t uid, int pid);
|
||||
|
||||
// Return positive number and sets *pid = next pid in process cgroup on success
|
||||
// Returns 0 if there are no pids left in the process cgroup
|
||||
// Returns -errno if an error was encountered
|
||||
int GetOneAppProcess(pid_t* pid);
|
||||
|
||||
private:
|
||||
// Returns positive number of bytes filled on success
|
||||
// Returns 0 if there was nothing to read
|
||||
// Returns -errno if an error was encountered
|
||||
int RefillBuffer();
|
||||
|
||||
android::base::unique_fd fd_;
|
||||
char buf_[128];
|
||||
char* buf_ptr_;
|
||||
size_t buf_len_;
|
||||
};
|
||||
|
||||
static const char* getCgroupRootPath() {
|
||||
|
@ -105,87 +121,67 @@ static int convertUidPidToPath(char *path, size_t size, uid_t uid, int pid)
|
|||
pid);
|
||||
}
|
||||
|
||||
static int initCtx(uid_t uid, int pid, struct ctx *ctx)
|
||||
{
|
||||
int ret;
|
||||
bool ProcessGroup::Open(uid_t uid, int pid) {
|
||||
char path[PROCESSGROUP_MAX_PATH_LEN] = {0};
|
||||
convertUidPidToPath(path, sizeof(path), uid, pid);
|
||||
strlcat(path, PROCESSGROUP_CGROUP_PROCS_FILE, sizeof(path));
|
||||
|
||||
int fd = open(path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
ret = -errno;
|
||||
PLOG(WARNING) << "failed to open " << path;
|
||||
return ret;
|
||||
}
|
||||
if (fd < 0) return false;
|
||||
|
||||
ctx->fd = fd;
|
||||
ctx->buf_ptr = ctx->buf;
|
||||
ctx->buf_len = 0;
|
||||
ctx->initialized = true;
|
||||
fd_.reset(fd);
|
||||
|
||||
LOG(VERBOSE) << "Initialized context for " << path;
|
||||
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int refillBuffer(struct ctx *ctx)
|
||||
{
|
||||
memmove(ctx->buf, ctx->buf_ptr, ctx->buf_len);
|
||||
ctx->buf_ptr = ctx->buf;
|
||||
int ProcessGroup::RefillBuffer() {
|
||||
memmove(buf_, buf_ptr_, buf_len_);
|
||||
buf_ptr_ = buf_;
|
||||
|
||||
ssize_t ret = read(ctx->fd, ctx->buf_ptr + ctx->buf_len,
|
||||
sizeof(ctx->buf) - ctx->buf_len - 1);
|
||||
ssize_t ret = read(fd_, buf_ptr_ + buf_len_, sizeof(buf_) - buf_len_ - 1);
|
||||
if (ret < 0) {
|
||||
return -errno;
|
||||
} else if (ret == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ctx->buf_len += ret;
|
||||
ctx->buf[ctx->buf_len] = 0;
|
||||
LOG(VERBOSE) << "Read " << ret << " to buffer: " << ctx->buf;
|
||||
buf_len_ += ret;
|
||||
buf_[buf_len_] = 0;
|
||||
LOG(VERBOSE) << "Read " << ret << " to buffer: " << buf_;
|
||||
|
||||
assert(ctx->buf_len <= sizeof(ctx->buf));
|
||||
assert(buf_len_ <= sizeof(buf_));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static pid_t getOneAppProcess(uid_t uid, int appProcessPid, struct ctx *ctx)
|
||||
{
|
||||
if (!ctx->initialized) {
|
||||
int ret = initCtx(uid, appProcessPid, ctx);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
int ProcessGroup::GetOneAppProcess(pid_t* out_pid) {
|
||||
*out_pid = 0;
|
||||
|
||||
char *eptr;
|
||||
while ((eptr = (char *)memchr(ctx->buf_ptr, '\n', ctx->buf_len)) == NULL) {
|
||||
int ret = refillBuffer(ctx);
|
||||
if (ret == 0) {
|
||||
return -ERANGE;
|
||||
}
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
char* eptr;
|
||||
while ((eptr = static_cast<char*>(memchr(buf_ptr_, '\n', buf_len_))) == nullptr) {
|
||||
int ret = RefillBuffer();
|
||||
if (ret <= 0) return ret;
|
||||
}
|
||||
|
||||
*eptr = '\0';
|
||||
char *pid_eptr = NULL;
|
||||
char* pid_eptr = nullptr;
|
||||
errno = 0;
|
||||
long pid = strtol(ctx->buf_ptr, &pid_eptr, 10);
|
||||
long pid = strtol(buf_ptr_, &pid_eptr, 10);
|
||||
if (errno != 0) {
|
||||
return -errno;
|
||||
}
|
||||
if (pid_eptr != eptr) {
|
||||
return -EINVAL;
|
||||
errno = EINVAL;
|
||||
return -errno;
|
||||
}
|
||||
|
||||
ctx->buf_len -= (eptr - ctx->buf_ptr) + 1;
|
||||
ctx->buf_ptr = eptr + 1;
|
||||
buf_len_ -= (eptr - buf_ptr_) + 1;
|
||||
buf_ptr_ = eptr + 1;
|
||||
|
||||
return (pid_t)pid;
|
||||
*out_pid = static_cast<pid_t>(pid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int removeProcessGroup(uid_t uid, int pid)
|
||||
|
@ -219,8 +215,8 @@ static void removeUidProcessGroups(const char *uid_path)
|
|||
}
|
||||
|
||||
snprintf(path, sizeof(path), "%s/%s", uid_path, dir->d_name);
|
||||
LOG(VERBOSE) << "removing " << path;
|
||||
if (rmdir(path) == -1) PLOG(WARNING) << "failed to remove " << path;
|
||||
LOG(VERBOSE) << "Removing " << path;
|
||||
if (rmdir(path) == -1) PLOG(WARNING) << "Failed to remove " << path;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -231,7 +227,7 @@ void removeAllProcessGroups()
|
|||
const char* cgroup_root_path = getCgroupRootPath();
|
||||
std::unique_ptr<DIR, decltype(&closedir)> root(opendir(cgroup_root_path), closedir);
|
||||
if (root == NULL) {
|
||||
PLOG(ERROR) << "failed to open " << cgroup_root_path;
|
||||
PLOG(ERROR) << "Failed to open " << cgroup_root_path;
|
||||
} else {
|
||||
dirent* dir;
|
||||
while ((dir = readdir(root.get())) != nullptr) {
|
||||
|
@ -246,20 +242,26 @@ void removeAllProcessGroups()
|
|||
|
||||
snprintf(path, sizeof(path), "%s/%s", cgroup_root_path, dir->d_name);
|
||||
removeUidProcessGroups(path);
|
||||
LOG(VERBOSE) << "removing " << path;
|
||||
if (rmdir(path) == -1) PLOG(WARNING) << "failed to remove " << path;
|
||||
LOG(VERBOSE) << "Removing " << path;
|
||||
if (rmdir(path) == -1) PLOG(WARNING) << "Failed to remove " << path;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns number of processes killed on success
|
||||
// Returns 0 if there are no processes in the process cgroup left to kill
|
||||
// Returns -errno on error
|
||||
static int doKillProcessGroupOnce(uid_t uid, int initialPid, int signal) {
|
||||
int processes = 0;
|
||||
struct ctx ctx;
|
||||
ProcessGroup process_group;
|
||||
if (!process_group.Open(uid, initialPid)) {
|
||||
PLOG(WARNING) << "Failed to open process cgroup uid " << uid << " pid " << initialPid;
|
||||
return -errno;
|
||||
}
|
||||
|
||||
int ret;
|
||||
pid_t pid;
|
||||
|
||||
ctx.initialized = false;
|
||||
|
||||
while ((pid = getOneAppProcess(uid, initialPid, &ctx)) >= 0) {
|
||||
int processes = 0;
|
||||
while ((ret = process_group.GetOneAppProcess(&pid)) > 0 && pid >= 0) {
|
||||
processes++;
|
||||
if (pid == 0) {
|
||||
// Should never happen... but if it does, trying to kill this
|
||||
|
@ -267,55 +269,68 @@ static int doKillProcessGroupOnce(uid_t uid, int initialPid, int signal) {
|
|||
LOG(WARNING) << "Yikes, we've been told to kill pid 0! How about we don't do that?";
|
||||
continue;
|
||||
}
|
||||
LOG(VERBOSE) << "Killing pid " << pid << " in uid " << uid
|
||||
<< " as part of process group " << initialPid;
|
||||
LOG(VERBOSE) << "Killing pid " << pid << " in uid " << uid << " as part of process cgroup "
|
||||
<< initialPid;
|
||||
if (kill(pid, signal) == -1) {
|
||||
PLOG(WARNING) << "kill(" << pid << ", " << signal << ") failed";
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx.initialized) {
|
||||
close(ctx.fd);
|
||||
}
|
||||
|
||||
return processes;
|
||||
return ret >= 0 ? processes : ret;
|
||||
}
|
||||
|
||||
static int killProcessGroup(uid_t uid, int initialPid, int signal, int retry) {
|
||||
static int killProcessGroup(uid_t uid, int initialPid, int signal, int retries) {
|
||||
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
|
||||
|
||||
int retry = retries;
|
||||
int processes;
|
||||
while ((processes = doKillProcessGroupOnce(uid, initialPid, signal)) > 0) {
|
||||
LOG(VERBOSE) << "killed " << processes << " processes for processgroup " << initialPid;
|
||||
LOG(VERBOSE) << "Killed " << processes << " processes for processgroup " << initialPid;
|
||||
if (retry > 0) {
|
||||
std::this_thread::sleep_for(5ms);
|
||||
--retry;
|
||||
} else {
|
||||
LOG(ERROR) << "failed to kill " << processes << " processes for processgroup "
|
||||
<< initialPid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
if (processes < 0) {
|
||||
PLOG(ERROR) << "Error encountered killing process cgroup uid " << uid << " pid "
|
||||
<< initialPid;
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
|
||||
LOG(VERBOSE) << "Killed process group uid " << uid << " pid " << initialPid << " in "
|
||||
<< static_cast<int>(ms) << "ms, " << processes << " procs remain";
|
||||
|
||||
// We only calculate the number of 'processes' when killing the processes.
|
||||
// In the retries == 0 case, we only kill the processes once and therefore
|
||||
// will not have waited then recalculated how many processes are remaining
|
||||
// after the first signals have been sent.
|
||||
// Logging anything regarding the number of 'processes' here does not make sense.
|
||||
|
||||
if (processes == 0) {
|
||||
if (retries > 0) {
|
||||
LOG(INFO) << "Successfully killed process cgroup uid " << uid << " pid " << initialPid
|
||||
<< " in " << static_cast<int>(ms) << "ms";
|
||||
}
|
||||
return removeProcessGroup(uid, initialPid);
|
||||
} else {
|
||||
if (retries > 0) {
|
||||
LOG(ERROR) << "Failed to kill process cgroup uid " << uid << " pid " << initialPid
|
||||
<< " in " << static_cast<int>(ms) << "ms, " << processes
|
||||
<< " processes remain";
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int killProcessGroup(uid_t uid, int initialPid, int signal) {
|
||||
return killProcessGroup(uid, initialPid, signal, 40 /*maxRetry*/);
|
||||
return killProcessGroup(uid, initialPid, signal, 40 /*retries*/);
|
||||
}
|
||||
|
||||
int killProcessGroupOnce(uid_t uid, int initialPid, int signal) {
|
||||
return killProcessGroup(uid, initialPid, signal, 0 /*maxRetry*/);
|
||||
return killProcessGroup(uid, initialPid, signal, 0 /*retries*/);
|
||||
}
|
||||
|
||||
static bool mkdirAndChown(const char *path, mode_t mode, uid_t uid, gid_t gid)
|
||||
|
@ -341,14 +356,14 @@ int createProcessGroup(uid_t uid, int initialPid)
|
|||
convertUidToPath(path, sizeof(path), uid);
|
||||
|
||||
if (!mkdirAndChown(path, 0750, AID_SYSTEM, AID_SYSTEM)) {
|
||||
PLOG(ERROR) << "failed to make and chown " << path;
|
||||
PLOG(ERROR) << "Failed to make and chown " << path;
|
||||
return -errno;
|
||||
}
|
||||
|
||||
convertUidPidToPath(path, sizeof(path), uid, initialPid);
|
||||
|
||||
if (!mkdirAndChown(path, 0750, AID_SYSTEM, AID_SYSTEM)) {
|
||||
PLOG(ERROR) << "failed to make and chown " << path;
|
||||
PLOG(ERROR) << "Failed to make and chown " << path;
|
||||
return -errno;
|
||||
}
|
||||
|
||||
|
@ -357,7 +372,7 @@ int createProcessGroup(uid_t uid, int initialPid)
|
|||
int fd = open(path, O_WRONLY);
|
||||
if (fd == -1) {
|
||||
int ret = -errno;
|
||||
PLOG(ERROR) << "failed to open " << path;
|
||||
PLOG(ERROR) << "Failed to open " << path;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -367,7 +382,7 @@ int createProcessGroup(uid_t uid, int initialPid)
|
|||
int ret = 0;
|
||||
if (write(fd, pid, len) < 0) {
|
||||
ret = -errno;
|
||||
PLOG(ERROR) << "failed to write '" << pid << "' to " << path;
|
||||
PLOG(ERROR) << "Failed to write '" << pid << "' to " << path;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
|
Loading…
Reference in a new issue