Merge "init: Add more diagnostics for signalfd hangs."
This commit is contained in:
commit
fe62ca7165
3 changed files with 37 additions and 6 deletions
|
@ -578,12 +578,29 @@ static void HandleSigtermSignal(const signalfd_siginfo& siginfo) {
|
|||
HandlePowerctlMessage("shutdown,container");
|
||||
}
|
||||
|
||||
static constexpr std::chrono::milliseconds kDiagnosticTimeout = 10s;
|
||||
|
||||
static void HandleSignalFd() {
|
||||
signalfd_siginfo siginfo;
|
||||
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
|
||||
if (bytes_read != sizeof(siginfo)) {
|
||||
PLOG(ERROR) << "Failed to read siginfo from signal_fd";
|
||||
return;
|
||||
auto started = std::chrono::steady_clock::now();
|
||||
for (;;) {
|
||||
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
|
||||
if (bytes_read < 0 && errno == EAGAIN) {
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> waited = now - started;
|
||||
if (waited >= kDiagnosticTimeout) {
|
||||
LOG(ERROR) << "epoll() woke us up, but we waited with no SIGCHLD!";
|
||||
started = now;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(100ms);
|
||||
continue;
|
||||
}
|
||||
if (bytes_read != sizeof(siginfo)) {
|
||||
PLOG(ERROR) << "Failed to read siginfo from signal_fd";
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
switch (siginfo.ssi_signo) {
|
||||
|
@ -639,7 +656,7 @@ static void InstallSignalFdHandler(Epoll* epoll) {
|
|||
LOG(FATAL) << "Failed to register a fork handler: " << strerror(result);
|
||||
}
|
||||
|
||||
signal_fd = signalfd(-1, &mask, SFD_CLOEXEC);
|
||||
signal_fd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK);
|
||||
if (signal_fd == -1) {
|
||||
PLOG(FATAL) << "failed to create signalfd";
|
||||
}
|
||||
|
@ -938,7 +955,7 @@ int SecondStageMain(int argc, char** argv) {
|
|||
setpriority(PRIO_PROCESS, 0, 0);
|
||||
while (true) {
|
||||
// By default, sleep until something happens.
|
||||
auto epoll_timeout = std::optional<std::chrono::milliseconds>{};
|
||||
auto epoll_timeout = std::optional<std::chrono::milliseconds>{kDiagnosticTimeout};
|
||||
|
||||
auto shutdown_command = shutdown_state.CheckShutdown();
|
||||
if (shutdown_command) {
|
||||
|
@ -978,6 +995,13 @@ int SecondStageMain(int argc, char** argv) {
|
|||
for (const auto& function : *pending_functions) {
|
||||
(*function)();
|
||||
}
|
||||
} else if (Service::is_exec_service_running()) {
|
||||
std::chrono::duration<double> waited =
|
||||
std::chrono::steady_clock::now() - Service::exec_service_started();
|
||||
if (waited >= kDiagnosticTimeout) {
|
||||
LOG(ERROR) << "Exec service is hung? Waited " << waited.count()
|
||||
<< " without SIGCHLD";
|
||||
}
|
||||
}
|
||||
if (!IsShuttingDown()) {
|
||||
HandleControlMessages();
|
||||
|
|
|
@ -127,6 +127,7 @@ static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigsto
|
|||
|
||||
unsigned long Service::next_start_order_ = 1;
|
||||
bool Service::is_exec_service_running_ = false;
|
||||
std::chrono::time_point<std::chrono::steady_clock> Service::exec_service_started_;
|
||||
|
||||
Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands,
|
||||
const std::vector<std::string>& args, bool from_apex)
|
||||
|
@ -388,6 +389,7 @@ Result<void> Service::ExecStart() {
|
|||
|
||||
flags_ |= SVC_EXEC;
|
||||
is_exec_service_running_ = true;
|
||||
exec_service_started_ = std::chrono::steady_clock::now();
|
||||
|
||||
LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << proc_attr_.uid
|
||||
<< " gid " << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context "
|
||||
|
|
|
@ -102,6 +102,9 @@ class Service {
|
|||
size_t CheckAllCommands() const { return onrestart_.CheckAllCommands(); }
|
||||
|
||||
static bool is_exec_service_running() { return is_exec_service_running_; }
|
||||
static std::chrono::time_point<std::chrono::steady_clock> exec_service_started() {
|
||||
return exec_service_started_;
|
||||
}
|
||||
|
||||
const std::string& name() const { return name_; }
|
||||
const std::set<std::string>& classnames() const { return classnames_; }
|
||||
|
@ -154,6 +157,8 @@ class Service {
|
|||
|
||||
static unsigned long next_start_order_;
|
||||
static bool is_exec_service_running_;
|
||||
static std::chrono::time_point<std::chrono::steady_clock> exec_service_started_;
|
||||
static pid_t exec_service_pid_;
|
||||
|
||||
std::string name_;
|
||||
std::set<std::string> classnames_;
|
||||
|
|
Loading…
Reference in a new issue