init: change kill order and fix error reporting in KillProcessGroup()

First kill the process group before killing the cgroup to catch
the hopeful case that killing the cgroup becomes a no-op as all of its
processes have already been killed.

Do not report an error if kill fails due to ESRCH, as this happens
often when reaping processes due to the order in which we call
waitpid() and kill().

Do not call killProcessGroup in libprocessgroup if we have already
successfully killed and removed a process group.

Bug: 36661364
Bug: 36701253
Bug: 37540956

Test: Reboot bullhead
Test: Start and stop services
Test: Init unit tests
Change-Id: I172acf0f8e00189f910f865f4635a7b1782fc7e3
This commit is contained in:
Tom Cherry 2017-05-04 11:32:36 -07:00
parent 20514c4411
commit 33838b1156
3 changed files with 31 additions and 13 deletions

View file

@ -209,21 +209,33 @@ void Service::NotifyStateChange(const std::string& new_state) const {
}
void Service::KillProcessGroup(int signal) {
LOG(INFO) << "Sending signal " << signal
<< " to service '" << name_
<< "' (pid " << pid_ << ") process group...";
int r;
if (signal == SIGTERM) {
r = killProcessGroupOnce(uid_, pid_, signal);
} else {
r = killProcessGroup(uid_, pid_, signal);
}
if (r == -1) {
LOG(ERROR) << "killProcessGroup(" << uid_ << ", " << pid_ << ", " << signal << ") failed";
}
if (kill(-pid_, signal) == -1) {
// We ignore reporting errors of ESRCH as this commonly happens in the below case,
// 1) Terminate() is called, which sends SIGTERM to the process
// 2) The process successfully exits
// 3) ReapOneProcess() is called, which calls waitpid(-1, ...) which removes the pid entry.
// 4) Reap() is called, which sends SIGKILL, but the pid no longer exists.
// TODO: sigaction for SIGCHLD reports the pid of the exiting process,
// we should do this kill with that pid first before calling waitpid().
if (kill(-pid_, signal) == -1 && errno != ESRCH) {
PLOG(ERROR) << "kill(" << pid_ << ", " << signal << ") failed";
}
// If we've already seen a successful result from killProcessGroup*(), then we have removed
// the cgroup already and calling these functions a second time will simply result in an error.
// This is true regardless of which signal was sent.
// These functions handle their own logging, so no additional logging is needed.
if (!process_cgroup_empty_) {
LOG(INFO) << "Sending signal " << signal << " to service '" << name_ << "' (pid " << pid_
<< ") process group...";
int r;
if (signal == SIGTERM) {
r = killProcessGroupOnce(uid_, pid_, signal);
} else {
r = killProcessGroup(uid_, pid_, signal);
}
if (r == 0) process_cgroup_empty_ = true;
}
}
void Service::SetProcessAttributes() {
@ -736,6 +748,7 @@ bool Service::Start() {
time_started_ = boot_clock::now();
pid_ = pid;
flags_ |= SVC_RUNNING;
process_cgroup_empty_ = false;
errno = -createProcessGroup(uid_, pid_);
if (errno != 0) {

View file

@ -107,6 +107,7 @@ class Service {
int ioprio_pri() const { return ioprio_pri_; }
int priority() const { return priority_; }
int oom_score_adjust() const { return oom_score_adjust_; }
bool process_cgroup_empty() const { return process_cgroup_empty_; }
const std::vector<std::string>& args() const { return args_; }
private:
@ -179,6 +180,8 @@ class Service {
int oom_score_adjust_;
bool process_cgroup_empty_ = false;
std::vector<std::string> args_;
};

View file

@ -45,6 +45,7 @@ TEST(service, pod_initialized) {
EXPECT_EQ(0, service_in_old_memory->ioprio_pri());
EXPECT_EQ(0, service_in_old_memory->priority());
EXPECT_EQ(-1000, service_in_old_memory->oom_score_adjust());
EXPECT_FALSE(service_in_old_memory->process_cgroup_empty());
for (std::size_t i = 0; i < memory_size; ++i) {
old_memory[i] = 0xFF;
@ -64,4 +65,5 @@ TEST(service, pod_initialized) {
EXPECT_EQ(0, service_in_old_memory2->ioprio_pri());
EXPECT_EQ(0, service_in_old_memory2->priority());
EXPECT_EQ(-1000, service_in_old_memory2->oom_score_adjust());
EXPECT_FALSE(service_in_old_memory->process_cgroup_empty());
}