init: Always reap processes before handling properties

There is a race that manifests like this:

1) A service dies (not processed by init yet).
2) service_manager processes death notification.
3) service_manager gets checkService and calls init to start service.
4) init gets the ctl.start / ctl.interface_start for the service
   but the service already appears started, so it does nothing.
5) init gets sigchld, but doesn't do anything else to restart the
   service

We can avoid all of this if we already reap pending processes before
handling properties in the main loop of init.  Since reaping the
services calls waitid(), there's no race even if the signalfd for
sigchld hasn't triggered yet.  It also won't cost us much efficiency,
since it's only a single system call.

Test: CF boots, init unit tests pass
Change-Id: Ie24ef406055b283797b41b1821c8ebcccead4db4
This commit is contained in:
Tom Cherry 2019-08-30 14:12:56 -07:00
parent 2b73b60c66
commit 905a5df83d
4 changed files with 32 additions and 15 deletions

View file

@ -69,19 +69,24 @@ Result<void> Epoll::UnregisterHandler(int fd) {
return {};
}
Result<void> Epoll::Wait(std::optional<std::chrono::milliseconds> timeout) {
Result<std::vector<std::function<void()>*>> Epoll::Wait(
std::optional<std::chrono::milliseconds> timeout) {
int timeout_ms = -1;
if (timeout && timeout->count() < INT_MAX) {
timeout_ms = timeout->count();
}
epoll_event ev;
auto nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd_, &ev, 1, timeout_ms));
if (nr == -1) {
const auto max_events = epoll_handlers_.size();
epoll_event ev[max_events];
auto num_events = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd_, ev, max_events, timeout_ms));
if (num_events == -1) {
return ErrnoError() << "epoll_wait failed";
} else if (nr == 1) {
std::invoke(*reinterpret_cast<std::function<void()>*>(ev.data.ptr));
}
return {};
std::vector<std::function<void()>*> pending_functions;
for (int i = 0; i < num_events; ++i) {
pending_functions.emplace_back(reinterpret_cast<std::function<void()>*>(ev[i].data.ptr));
}
return pending_functions;
}
} // namespace init

View file

@ -14,8 +14,7 @@
* limitations under the License.
*/
#ifndef _INIT_EPOLL_H
#define _INIT_EPOLL_H
#pragma once
#include <stdint.h>
#include <sys/epoll.h>
@ -24,6 +23,7 @@
#include <functional>
#include <map>
#include <optional>
#include <vector>
#include <android-base/unique_fd.h>
@ -39,7 +39,8 @@ class Epoll {
Result<void> Open();
Result<void> RegisterHandler(int fd, std::function<void()> handler, uint32_t events = EPOLLIN);
Result<void> UnregisterHandler(int fd);
Result<void> Wait(std::optional<std::chrono::milliseconds> timeout);
Result<std::vector<std::function<void()>*>> Wait(
std::optional<std::chrono::milliseconds> timeout);
private:
android::base::unique_fd epoll_fd_;
@ -48,5 +49,3 @@ class Epoll {
} // namespace init
} // namespace android
#endif

View file

@ -787,8 +787,17 @@ int SecondStageMain(int argc, char** argv) {
if (am.HasMoreCommands()) epoll_timeout = 0ms;
}
if (auto result = epoll.Wait(epoll_timeout); !result) {
LOG(ERROR) << result.error();
auto pending_functions = epoll.Wait(epoll_timeout);
if (!pending_functions) {
LOG(ERROR) << pending_functions.error();
} else if (!pending_functions->empty()) {
// We always reap children before responding to the other pending functions. This is to
// prevent a race where other daemons see that a service has exited and ask init to
// start it again via ctl.start before init has reaped it.
ReapAnyOutstandingChildren();
for (const auto& function : *pending_functions) {
(*function)();
}
}
}

View file

@ -212,7 +212,11 @@ TestFrame::TestFrame(const std::vector<const std::vector<int>>& chords, EventHan
}
void TestFrame::RelaxForMs(std::chrono::milliseconds wait) {
epoll_.Wait(wait);
auto pending_functions = epoll_.Wait(wait);
ASSERT_TRUE(pending_functions) << pending_functions.error();
for (const auto& function : *pending_functions) {
(*function)();
}
}
void TestFrame::SetChord(int key, bool value) {