init: change kill order and fix error reporting in KillProcessGroup()

First kill the process group before killing the cgroup to catch the hopeful case that killing the cgroup becomes a no-op as all of its processes have already been killed. Do not report an error if kill fails due to ESRCH, as this happens often when reaping processes due to the order in which we call waitpid() and kill(). Do not call killProcessGroup in libprocessgroup if we have already successfully killed and removed a process group. Bug: 36661364 Bug: 36701253 Bug: 37540956 Test: Reboot bullhead Test: Start and stop services Test: Init unit tests Change-Id: I172acf0f8e00189f910f865f4635a7b1782fc7e3
2017-05-04 11:32:36 -07:00 · 2017-05-04 11:32:36 -07:00 · 33838b1156
commit 33838b1156
parent 20514c4411
3 changed files with 31 additions and 13 deletions
--- a/init/service.cpp
+++ b/init/service.cpp
@ -209,21 +209,33 @@ void Service::NotifyStateChange(const std::string& new_state) const {
 }

 void Service::KillProcessGroup(int signal) {
-    LOG(INFO) << "Sending signal " << signal
-              << " to service '" << name_
-              << "' (pid " << pid_ << ") process group...";
-    int r;
-    if (signal == SIGTERM) {
-        r = killProcessGroupOnce(uid_, pid_, signal);
-    } else {
-        r = killProcessGroup(uid_, pid_, signal);
-    }
-    if (r == -1) {
-        LOG(ERROR) << "killProcessGroup(" << uid_ << ", " << pid_ << ", " << signal << ") failed";
-    }
-    if (kill(-pid_, signal) == -1) {
+    // We ignore reporting errors of ESRCH as this commonly happens in the below case,
+    // 1) Terminate() is called, which sends SIGTERM to the process
+    // 2) The process successfully exits
+    // 3) ReapOneProcess() is called, which calls waitpid(-1, ...) which removes the pid entry.
+    // 4) Reap() is called, which sends SIGKILL, but the pid no longer exists.
+    // TODO: sigaction for SIGCHLD reports the pid of the exiting process,
+    // we should do this kill with that pid first before calling waitpid().
+    if (kill(-pid_, signal) == -1 && errno != ESRCH) {
        PLOG(ERROR) << "kill(" << pid_ << ", " << signal << ") failed";
    }
+
+    // If we've already seen a successful result from killProcessGroup*(), then we have removed
+    // the cgroup already and calling these functions a second time will simply result in an error.
+    // This is true regardless of which signal was sent.
+    // These functions handle their own logging, so no additional logging is needed.
+    if (!process_cgroup_empty_) {
+        LOG(INFO) << "Sending signal " << signal << " to service '" << name_ << "' (pid " << pid_
+                  << ") process group...";
+        int r;
+        if (signal == SIGTERM) {
+            r = killProcessGroupOnce(uid_, pid_, signal);
+        } else {
+            r = killProcessGroup(uid_, pid_, signal);
+        }
+
+        if (r == 0) process_cgroup_empty_ = true;
+    }
 }

 void Service::SetProcessAttributes() {
@ -736,6 +748,7 @@ bool Service::Start() {
    time_started_ = boot_clock::now();
    pid_ = pid;
    flags_ |= SVC_RUNNING;
+    process_cgroup_empty_ = false;

    errno = -createProcessGroup(uid_, pid_);
    if (errno != 0) {
--- a/init/service.h
+++ b/init/service.h
@ -107,6 +107,7 @@ class Service {
    int ioprio_pri() const { return ioprio_pri_; }
    int priority() const { return priority_; }
    int oom_score_adjust() const { return oom_score_adjust_; }
+    bool process_cgroup_empty() const { return process_cgroup_empty_; }
    const std::vector<std::string>& args() const { return args_; }

  private:
@ -179,6 +180,8 @@ class Service {

    int oom_score_adjust_;

+    bool process_cgroup_empty_ = false;
+
    std::vector<std::string> args_;
 };

--- a/init/service_test.cpp
+++ b/init/service_test.cpp
@ -45,6 +45,7 @@ TEST(service, pod_initialized) {
    EXPECT_EQ(0, service_in_old_memory->ioprio_pri());
    EXPECT_EQ(0, service_in_old_memory->priority());
    EXPECT_EQ(-1000, service_in_old_memory->oom_score_adjust());
+    EXPECT_FALSE(service_in_old_memory->process_cgroup_empty());

    for (std::size_t i = 0; i < memory_size; ++i) {
        old_memory[i] = 0xFF;
@ -64,4 +65,5 @@ TEST(service, pod_initialized) {
    EXPECT_EQ(0, service_in_old_memory2->ioprio_pri());
    EXPECT_EQ(0, service_in_old_memory2->priority());
    EXPECT_EQ(-1000, service_in_old_memory2->oom_score_adjust());
+    EXPECT_FALSE(service_in_old_memory->process_cgroup_empty());
 }