platform_system_core/init/init.cpp
Luis Hector Chavez 9f97f47940 init: Allow clean system shutdown upon SIGTERM
This allows Android to cleanly shutdown when running in a PID namespace
in a way that does not rely on adbd running. This is useful to allow
Android to be running in a container and its lifetime managed by an
OCI-compliant tool.

Bug: 65415372
Test: `kill -TERM 1` as root is correctly dropped.
Test: `kill -TERM 1` from the init PID namespace causes init to cleanly shutdown.
Change-Id: Ia66ebdb436221919081bc4723337c0c7f1e53b09
2017-09-07 10:47:04 -07:00

662 lines
22 KiB
C++

/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "init.h"
#include <dirent.h>
#include <fcntl.h>
#include <paths.h>
#include <seccomp_policy.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/mount.h>
#include <sys/signalfd.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <unistd.h>
#include <android-base/chrono_utils.h>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/properties.h>
#include <android-base/strings.h>
#include <cutils/android_reboot.h>
#include <keyutils.h>
#include <libavb/libavb.h>
#include <private/android_filesystem_config.h>
#include <selinux/android.h>
#include <memory>
#include <optional>
#include "import_parser.h"
#include "init_first_stage.h"
#include "keychords.h"
#include "log.h"
#include "property_service.h"
#include "reboot.h"
#include "security.h"
#include "selinux.h"
#include "sigchld_handler.h"
#include "ueventd.h"
#include "util.h"
#include "watchdogd.h"
using namespace std::string_literals;
using android::base::boot_clock;
using android::base::GetProperty;
using android::base::Timer;
namespace android {
namespace init {
static int property_triggers_enabled = 0;
static char qemu[32];
std::string default_console = "/dev/console";
static int epoll_fd = -1;
static int sigterm_signal_fd = -1;
static std::unique_ptr<Timer> waiting_for_prop(nullptr);
static std::string wait_prop_name;
static std::string wait_prop_value;
static bool shutting_down;
std::vector<std::string> late_import_paths;
void DumpState() {
ServiceList::GetInstance().DumpState();
ActionManager::GetInstance().DumpState();
}
Parser CreateParser(ActionManager& action_manager, ServiceList& service_list) {
Parser parser;
parser.AddSectionParser("service", std::make_unique<ServiceParser>(&service_list));
parser.AddSectionParser("on", std::make_unique<ActionParser>(&action_manager));
parser.AddSectionParser("import", std::make_unique<ImportParser>(&parser));
return parser;
}
static void LoadBootScripts(ActionManager& action_manager, ServiceList& service_list) {
Parser parser = CreateParser(action_manager, service_list);
std::string bootscript = GetProperty("ro.boot.init_rc", "");
if (bootscript.empty()) {
parser.ParseConfig("/init.rc");
if (!parser.ParseConfig("/system/etc/init")) {
late_import_paths.emplace_back("/system/etc/init");
}
if (!parser.ParseConfig("/vendor/etc/init")) {
late_import_paths.emplace_back("/vendor/etc/init");
}
if (!parser.ParseConfig("/odm/etc/init")) {
late_import_paths.emplace_back("/odm/etc/init");
}
} else {
parser.ParseConfig(bootscript);
}
}
void register_epoll_handler(int fd, void (*fn)()) {
epoll_event ev;
ev.events = EPOLLIN;
ev.data.ptr = reinterpret_cast<void*>(fn);
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) == -1) {
PLOG(ERROR) << "epoll_ctl failed";
}
}
bool start_waiting_for_property(const char *name, const char *value)
{
if (waiting_for_prop) {
return false;
}
if (GetProperty(name, "") != value) {
// Current property value is not equal to expected value
wait_prop_name = name;
wait_prop_value = value;
waiting_for_prop.reset(new Timer());
} else {
LOG(INFO) << "start_waiting_for_property(\""
<< name << "\", \"" << value << "\"): already set";
}
return true;
}
void ResetWaitForProp() {
wait_prop_name.clear();
wait_prop_value.clear();
waiting_for_prop.reset();
}
void property_changed(const std::string& name, const std::string& value) {
// If the property is sys.powerctl, we bypass the event queue and immediately handle it.
// This is to ensure that init will always and immediately shutdown/reboot, regardless of
// if there are other pending events to process or if init is waiting on an exec service or
// waiting on a property.
// In non-thermal-shutdown case, 'shutdown' trigger will be fired to let device specific
// commands to be executed.
if (name == "sys.powerctl") {
if (HandlePowerctlMessage(value)) {
shutting_down = true;
}
}
if (property_triggers_enabled) ActionManager::GetInstance().QueuePropertyChange(name, value);
if (waiting_for_prop) {
if (wait_prop_name == name && wait_prop_value == value) {
LOG(INFO) << "Wait for property took " << *waiting_for_prop;
ResetWaitForProp();
}
}
}
static std::optional<boot_clock::time_point> RestartProcesses() {
std::optional<boot_clock::time_point> next_process_restart_time;
for (const auto& s : ServiceList::GetInstance()) {
if (!(s->flags() & SVC_RESTARTING)) continue;
auto restart_time = s->time_started() + 5s;
if (boot_clock::now() > restart_time) {
if (auto result = s->Start(); !result) {
LOG(ERROR) << "Could not restart process '" << s->name() << "': " << result.error();
}
} else {
if (!next_process_restart_time || restart_time < *next_process_restart_time) {
next_process_restart_time = restart_time;
}
}
}
return next_process_restart_time;
}
void handle_control_message(const std::string& msg, const std::string& name) {
Service* svc = ServiceList::GetInstance().FindService(name);
if (svc == nullptr) {
LOG(ERROR) << "no such service '" << name << "'";
return;
}
if (msg == "start") {
if (auto result = svc->Start(); !result) {
LOG(ERROR) << "Could not ctl.start service '" << name << "': " << result.error();
}
} else if (msg == "stop") {
svc->Stop();
} else if (msg == "restart") {
svc->Restart();
} else {
LOG(ERROR) << "unknown control msg '" << msg << "'";
}
}
static Result<Success> wait_for_coldboot_done_action(const std::vector<std::string>& args) {
Timer t;
LOG(VERBOSE) << "Waiting for " COLDBOOT_DONE "...";
// Historically we had a 1s timeout here because we weren't otherwise
// tracking boot time, and many OEMs made their sepolicy regular
// expressions too expensive (http://b/19899875).
// Now we're tracking boot time, just log the time taken to a system
// property. We still panic if it takes more than a minute though,
// because any build that slow isn't likely to boot at all, and we'd
// rather any test lab devices fail back to the bootloader.
if (wait_for_file(COLDBOOT_DONE, 60s) < 0) {
LOG(FATAL) << "Timed out waiting for " COLDBOOT_DONE;
}
property_set("ro.boottime.init.cold_boot_wait", std::to_string(t.duration().count()));
return Success();
}
static Result<Success> keychord_init_action(const std::vector<std::string>& args) {
keychord_init();
return Success();
}
static Result<Success> console_init_action(const std::vector<std::string>& args) {
std::string console = GetProperty("ro.boot.console", "");
if (!console.empty()) {
default_console = "/dev/" + console;
}
return Success();
}
static void import_kernel_nv(const std::string& key, const std::string& value, bool for_emulator) {
if (key.empty()) return;
if (for_emulator) {
// In the emulator, export any kernel option with the "ro.kernel." prefix.
property_set("ro.kernel." + key, value);
return;
}
if (key == "qemu") {
strlcpy(qemu, value.c_str(), sizeof(qemu));
} else if (android::base::StartsWith(key, "androidboot.")) {
property_set("ro.boot." + key.substr(12), value);
}
}
static void export_oem_lock_status() {
if (!android::base::GetBoolProperty("ro.oem_unlock_supported", false)) {
return;
}
std::string value = GetProperty("ro.boot.verifiedbootstate", "");
if (!value.empty()) {
property_set("ro.boot.flash.locked", value == "orange" ? "0" : "1");
}
}
static void export_kernel_boot_props() {
struct {
const char *src_prop;
const char *dst_prop;
const char *default_value;
} prop_map[] = {
{ "ro.boot.serialno", "ro.serialno", "", },
{ "ro.boot.mode", "ro.bootmode", "unknown", },
{ "ro.boot.baseband", "ro.baseband", "unknown", },
{ "ro.boot.bootloader", "ro.bootloader", "unknown", },
{ "ro.boot.hardware", "ro.hardware", "unknown", },
{ "ro.boot.revision", "ro.revision", "0", },
};
for (size_t i = 0; i < arraysize(prop_map); i++) {
std::string value = GetProperty(prop_map[i].src_prop, "");
property_set(prop_map[i].dst_prop, (!value.empty()) ? value : prop_map[i].default_value);
}
}
static void process_kernel_dt() {
if (!is_android_dt_value_expected("compatible", "android,firmware")) {
return;
}
std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(get_android_dt_dir().c_str()), closedir);
if (!dir) return;
std::string dt_file;
struct dirent *dp;
while ((dp = readdir(dir.get())) != NULL) {
if (dp->d_type != DT_REG || !strcmp(dp->d_name, "compatible") || !strcmp(dp->d_name, "name")) {
continue;
}
std::string file_name = get_android_dt_dir() + dp->d_name;
android::base::ReadFileToString(file_name, &dt_file);
std::replace(dt_file.begin(), dt_file.end(), ',', '.');
property_set("ro.boot."s + dp->d_name, dt_file);
}
}
static void process_kernel_cmdline() {
// The first pass does the common stuff, and finds if we are in qemu.
// The second pass is only necessary for qemu to export all kernel params
// as properties.
import_kernel_cmdline(false, import_kernel_nv);
if (qemu[0]) import_kernel_cmdline(true, import_kernel_nv);
}
static Result<Success> property_enable_triggers_action(const std::vector<std::string>& args) {
/* Enable property triggers. */
property_triggers_enabled = 1;
return Success();
}
static Result<Success> queue_property_triggers_action(const std::vector<std::string>& args) {
ActionManager::GetInstance().QueueBuiltinAction(property_enable_triggers_action, "enable_property_trigger");
ActionManager::GetInstance().QueueAllPropertyActions();
return Success();
}
static void global_seccomp() {
import_kernel_cmdline(false, [](const std::string& key, const std::string& value, bool in_qemu) {
if (key == "androidboot.seccomp" && value == "global" && !set_global_seccomp_filter()) {
LOG(FATAL) << "Failed to globally enable seccomp!";
}
});
}
// Set the UDC controller for the ConfigFS USB Gadgets.
// Read the UDC controller in use from "/sys/class/udc".
// In case of multiple UDC controllers select the first one.
static void set_usb_controller() {
std::unique_ptr<DIR, decltype(&closedir)>dir(opendir("/sys/class/udc"), closedir);
if (!dir) return;
dirent* dp;
while ((dp = readdir(dir.get())) != nullptr) {
if (dp->d_name[0] == '.') continue;
property_set("sys.usb.controller", dp->d_name);
break;
}
}
static void InstallRebootSignalHandlers() {
// Instead of panic'ing the kernel as is the default behavior when init crashes,
// we prefer to reboot to bootloader on development builds, as this will prevent
// boot looping bad configurations and allow both developers and test farms to easily
// recover.
struct sigaction action;
memset(&action, 0, sizeof(action));
sigfillset(&action.sa_mask);
action.sa_handler = [](int signal) {
// These signal handlers are also caught for processes forked from init, however we do not
// want them to trigger reboot, so we directly call _exit() for children processes here.
if (getpid() != 1) {
_exit(signal);
}
// Calling DoReboot() or LOG(FATAL) is not a good option as this is a signal handler.
// RebootSystem uses syscall() which isn't actually async-signal-safe, but our only option
// and probably good enough given this is already an error case and only enabled for
// development builds.
RebootSystem(ANDROID_RB_RESTART2, "bootloader");
};
action.sa_flags = SA_RESTART;
sigaction(SIGABRT, &action, nullptr);
sigaction(SIGBUS, &action, nullptr);
sigaction(SIGFPE, &action, nullptr);
sigaction(SIGILL, &action, nullptr);
sigaction(SIGSEGV, &action, nullptr);
#if defined(SIGSTKFLT)
sigaction(SIGSTKFLT, &action, nullptr);
#endif
sigaction(SIGSYS, &action, nullptr);
sigaction(SIGTRAP, &action, nullptr);
}
static void HandleSigtermSignal() {
signalfd_siginfo siginfo;
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(sigterm_signal_fd, &siginfo, sizeof(siginfo)));
if (bytes_read != sizeof(siginfo)) {
PLOG(ERROR) << "Failed to read siginfo from sigterm_signal_fd";
return;
}
if (siginfo.ssi_pid != 0) {
// Drop any userspace SIGTERM requests.
LOG(DEBUG) << "Ignoring SIGTERM from pid " << siginfo.ssi_pid;
return;
}
LOG(INFO) << "Handling SIGTERM, shutting system down";
HandlePowerctlMessage("shutdown");
}
static void InstallSigtermHandler() {
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGTERM);
if (sigprocmask(SIG_BLOCK, &mask, nullptr) == -1) {
PLOG(FATAL) << "failed to block SIGTERM";
}
sigterm_signal_fd = signalfd(-1, &mask, SFD_CLOEXEC);
if (sigterm_signal_fd == -1) {
PLOG(FATAL) << "failed to create signalfd for SIGTERM";
}
register_epoll_handler(sigterm_signal_fd, HandleSigtermSignal);
}
int main(int argc, char** argv) {
if (!strcmp(basename(argv[0]), "ueventd")) {
return ueventd_main(argc, argv);
}
if (!strcmp(basename(argv[0]), "watchdogd")) {
return watchdogd_main(argc, argv);
}
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
}
bool is_first_stage = (getenv("INIT_SECOND_STAGE") == nullptr);
if (is_first_stage) {
boot_clock::time_point start_time = boot_clock::now();
// Clear the umask.
umask(0);
clearenv();
setenv("PATH", _PATH_DEFPATH, 1);
// Get the basic filesystem setup we need put together in the initramdisk
// on / and then we'll let the rc file figure out the rest.
mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755");
mkdir("/dev/pts", 0755);
mkdir("/dev/socket", 0755);
mount("devpts", "/dev/pts", "devpts", 0, NULL);
#define MAKE_STR(x) __STRING(x)
mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC));
// Don't expose the raw commandline to unprivileged processes.
chmod("/proc/cmdline", 0440);
gid_t groups[] = { AID_READPROC };
setgroups(arraysize(groups), groups);
mount("sysfs", "/sys", "sysfs", 0, NULL);
mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL);
mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11));
if constexpr (WORLD_WRITABLE_KMSG) {
mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11));
}
mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8));
mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9));
// Now that tmpfs is mounted on /dev and we have /dev/kmsg, we can actually
// talk to the outside world...
InitKernelLogging(argv);
LOG(INFO) << "init first stage started!";
if (!DoFirstStageMount()) {
LOG(FATAL) << "Failed to mount required partitions early ...";
}
SetInitAvbVersionInRecovery();
// Enable seccomp if global boot option was passed (otherwise it is enabled in zygote).
global_seccomp();
// Set up SELinux, loading the SELinux policy.
SelinuxSetupKernelLogging();
SelinuxInitialize();
// We're in the kernel domain, so re-exec init to transition to the init domain now
// that the SELinux policy has been loaded.
if (selinux_android_restorecon("/init", 0) == -1) {
PLOG(FATAL) << "restorecon failed of /init failed";
}
setenv("INIT_SECOND_STAGE", "true", 1);
static constexpr uint32_t kNanosecondsPerMillisecond = 1e6;
uint64_t start_ms = start_time.time_since_epoch().count() / kNanosecondsPerMillisecond;
setenv("INIT_STARTED_AT", std::to_string(start_ms).c_str(), 1);
char* path = argv[0];
char* args[] = { path, nullptr };
execv(path, args);
// execv() only returns if an error happened, in which case we
// panic and never fall through this conditional.
PLOG(FATAL) << "execv(\"" << path << "\") failed";
}
// At this point we're in the second stage of init.
InitKernelLogging(argv);
LOG(INFO) << "init second stage started!";
// Set up a session keyring that all processes will have access to. It
// will hold things like FBE encryption keys. No process should override
// its session keyring.
keyctl_get_keyring_ID(KEY_SPEC_SESSION_KEYRING, 1);
// Indicate that booting is in progress to background fw loaders, etc.
close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
property_init();
// If arguments are passed both on the command line and in DT,
// properties set in DT always have priority over the command-line ones.
process_kernel_dt();
process_kernel_cmdline();
// Propagate the kernel variables to internal variables
// used by init as well as the current required properties.
export_kernel_boot_props();
// Make the time that init started available for bootstat to log.
property_set("ro.boottime.init", getenv("INIT_STARTED_AT"));
property_set("ro.boottime.init.selinux", getenv("INIT_SELINUX_TOOK"));
// Set libavb version for Framework-only OTA match in Treble build.
const char* avb_version = getenv("INIT_AVB_VERSION");
if (avb_version) property_set("ro.boot.avb_version", avb_version);
// Clean up our environment.
unsetenv("INIT_SECOND_STAGE");
unsetenv("INIT_STARTED_AT");
unsetenv("INIT_SELINUX_TOOK");
unsetenv("INIT_AVB_VERSION");
// Now set up SELinux for second stage.
SelinuxSetupKernelLogging();
SelabelInitialize();
SelinuxRestoreContext();
epoll_fd = epoll_create1(EPOLL_CLOEXEC);
if (epoll_fd == -1) {
PLOG(ERROR) << "epoll_create1 failed";
exit(1);
}
sigchld_handler_init();
if (!IsRebootCapable()) {
// If init does not have the CAP_SYS_BOOT capability, it is running in a container.
// In that case, receiving SIGTERM will cause the system to shut down.
InstallSigtermHandler();
}
property_load_boot_defaults();
export_oem_lock_status();
start_property_service();
set_usb_controller();
const BuiltinFunctionMap function_map;
Action::set_function_map(&function_map);
ActionManager& am = ActionManager::GetInstance();
ServiceList& sm = ServiceList::GetInstance();
LoadBootScripts(am, sm);
// Turning this on and letting the INFO logging be discarded adds 0.2s to
// Nexus 9 boot time, so it's disabled by default.
if (false) DumpState();
am.QueueEventTrigger("early-init");
// Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
am.QueueBuiltinAction(wait_for_coldboot_done_action, "wait_for_coldboot_done");
// ... so that we can start queuing up actions that require stuff from /dev.
am.QueueBuiltinAction(MixHwrngIntoLinuxRngAction, "MixHwrngIntoLinuxRng");
am.QueueBuiltinAction(SetMmapRndBitsAction, "SetMmapRndBits");
am.QueueBuiltinAction(SetKptrRestrictAction, "SetKptrRestrict");
am.QueueBuiltinAction(keychord_init_action, "keychord_init");
am.QueueBuiltinAction(console_init_action, "console_init");
// Trigger all the boot actions to get us started.
am.QueueEventTrigger("init");
// Repeat mix_hwrng_into_linux_rng in case /dev/hw_random or /dev/random
// wasn't ready immediately after wait_for_coldboot_done
am.QueueBuiltinAction(MixHwrngIntoLinuxRngAction, "MixHwrngIntoLinuxRng");
// Don't mount filesystems or start core system services in charger mode.
std::string bootmode = GetProperty("ro.bootmode", "");
if (bootmode == "charger") {
am.QueueEventTrigger("charger");
} else {
am.QueueEventTrigger("late-init");
}
// Run all property triggers based on current state of the properties.
am.QueueBuiltinAction(queue_property_triggers_action, "queue_property_triggers");
while (true) {
// By default, sleep until something happens.
int epoll_timeout_ms = -1;
if (!(waiting_for_prop || Service::is_exec_service_running())) {
am.ExecuteOneCommand();
}
if (!(waiting_for_prop || Service::is_exec_service_running())) {
if (!shutting_down) {
auto next_process_restart_time = RestartProcesses();
// If there's a process that needs restarting, wake up in time for that.
if (next_process_restart_time) {
epoll_timeout_ms = std::chrono::ceil<std::chrono::milliseconds>(
*next_process_restart_time - boot_clock::now())
.count();
if (epoll_timeout_ms < 0) epoll_timeout_ms = 0;
}
}
// If there's more work to do, wake up again immediately.
if (am.HasMoreCommands()) epoll_timeout_ms = 0;
}
epoll_event ev;
int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, epoll_timeout_ms));
if (nr == -1) {
PLOG(ERROR) << "epoll_wait failed";
} else if (nr == 1) {
((void (*)()) ev.data.ptr)();
}
}
return 0;
}
} // namespace init
} // namespace android
int main(int argc, char** argv) {
android::init::main(argc, argv);
}