platform_system_core/init/sigchld_handler.cpp
Bart Van Assche a75f210398 init: Make WaitToBeReaped() wait less long
Reduce the time spent in WaitToBeReaped() by waiting for SIGCHLD instead
of waiting for 50 ms.

Bug: 308687042
Change-Id: I5e259fdd22dec68e45d27205def2fc6463c06ca3
Signed-off-by: Bart Van Assche <bvanassche@google.com>
2023-11-07 10:52:26 -08:00

171 lines
5.8 KiB
C++

/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sigchld_handler.h"
#include <signal.h>
#include <string.h>
#include <sys/signalfd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <android-base/chrono_utils.h>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/scopeguard.h>
#include <android-base/stringprintf.h>
#include <thread>
#include "epoll.h"
#include "init.h"
#include "service.h"
#include "service_list.h"
using android::base::boot_clock;
using android::base::make_scope_guard;
using android::base::ReadFileToString;
using android::base::StringPrintf;
using android::base::Timer;
namespace android {
namespace init {
static pid_t ReapOneProcess() {
siginfo_t siginfo = {};
// This returns a zombie pid or informs us that there are no zombies left to be reaped.
// It does NOT reap the pid; that is done below.
if (TEMP_FAILURE_RETRY(waitid(P_ALL, 0, &siginfo, WEXITED | WNOHANG | WNOWAIT)) != 0) {
PLOG(ERROR) << "waitid failed";
return 0;
}
const pid_t pid = siginfo.si_pid;
if (pid == 0) {
DCHECK_EQ(siginfo.si_signo, 0);
return 0;
}
DCHECK_EQ(siginfo.si_signo, SIGCHLD);
// At this point we know we have a zombie pid, so we use this scopeguard to reap the pid
// whenever the function returns from this point forward.
// We do NOT want to reap the zombie earlier as in Service::Reap(), we kill(-pid, ...) and we
// want the pid to remain valid throughout that (and potentially future) usages.
auto reaper = make_scope_guard([pid] { TEMP_FAILURE_RETRY(waitpid(pid, nullptr, WNOHANG)); });
std::string name;
std::string wait_string;
Service* service = nullptr;
if (SubcontextChildReap(pid)) {
name = "Subcontext";
} else {
service = ServiceList::GetInstance().FindService(pid, &Service::pid);
if (service) {
name = StringPrintf("Service '%s' (pid %d)", service->name().c_str(), pid);
if (service->flags() & SVC_EXEC) {
auto exec_duration = boot_clock::now() - service->time_started();
auto exec_duration_ms =
std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration).count();
wait_string = StringPrintf(" waiting took %f seconds", exec_duration_ms / 1000.0f);
} else if (service->flags() & SVC_ONESHOT) {
auto exec_duration = boot_clock::now() - service->time_started();
auto exec_duration_ms =
std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration)
.count();
wait_string = StringPrintf(" oneshot service took %f seconds in background",
exec_duration_ms / 1000.0f);
}
} else {
name = StringPrintf("Untracked pid %d", pid);
}
}
if (siginfo.si_code == CLD_EXITED) {
LOG(INFO) << name << " exited with status " << siginfo.si_status << wait_string;
} else {
LOG(INFO) << name << " received signal " << siginfo.si_status << wait_string;
}
if (!service) {
LOG(INFO) << name << " did not have an associated service entry and will not be reaped";
return pid;
}
service->Reap(siginfo);
if (service->flags() & SVC_TEMPORARY) {
ServiceList::GetInstance().RemoveService(*service);
}
return pid;
}
void ReapAnyOutstandingChildren() {
while (ReapOneProcess() != 0) {
}
}
static void DiscardSiginfo(int signal_fd) {
signalfd_siginfo siginfo;
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
if (bytes_read != sizeof(siginfo)) {
LOG(WARNING) << "Unexpected: " << __func__ << " read " << bytes_read << " bytes instead of "
<< sizeof(siginfo);
}
}
void WaitToBeReaped(int sigchld_fd, const std::vector<pid_t>& pids,
std::chrono::milliseconds timeout) {
Timer t;
Epoll epoll;
// The init process passes a valid sigchld_fd argument but unit tests do not.
if (sigchld_fd >= 0) {
epoll.RegisterHandler(sigchld_fd, [sigchld_fd]() { DiscardSiginfo(sigchld_fd); });
}
std::vector<pid_t> alive_pids(pids.begin(), pids.end());
while (!alive_pids.empty() && t.duration() < timeout) {
pid_t pid;
while ((pid = ReapOneProcess()) != 0) {
auto it = std::find(alive_pids.begin(), alive_pids.end(), pid);
if (it != alive_pids.end()) {
alive_pids.erase(it);
}
}
if (alive_pids.empty()) {
break;
}
if (sigchld_fd >= 0) {
epoll.Wait(std::max(timeout - t.duration(), 0ms));
} else {
std::this_thread::sleep_for(50ms);
}
}
LOG(INFO) << "Waiting for " << pids.size() << " pids to be reaped took " << t << " with "
<< alive_pids.size() << " of them still running";
for (pid_t pid : alive_pids) {
std::string status = "(no-such-pid)";
ReadFileToString(StringPrintf("/proc/%d/status", pid), &status);
LOG(INFO) << "Still running: " << pid << '\n' << status;
}
}
} // namespace init
} // namespace android