init/service_parser: Add arguments window' and target' for `critical'

The critical services can now using the interface `critical
[window=<fatal crash window mins>] [target=<fatal reboot target>]` to
setup the timing window that when there are more than 4 crashes in it,
the init will regard it as a fatal system error and reboot the system.

Config `window=${zygote.critical_window.minute:-off}' and
`target=zygote-fatal' for all system-server services, so platform that
configures ro.boot.zygote_critical_window can escape the system-server
crash-loop via init fatal handler.

Bug: 146818493
Change-Id: Ib2dc253616be6935ab9ab52184a1b6394665e813
This commit is contained in:
Woody Lin 2019-12-26 22:22:28 +08:00
parent 45662c8941
commit 45215ae6e5
10 changed files with 65 additions and 11 deletions

View file

@ -172,9 +172,12 @@ runs the service.
This option connects stdin, stdout, and stderr to the console. It is mutually exclusive with the
stdio_to_kmsg option, which only connects stdout and stderr to kmsg.
`critical`
`critical [window=<fatal crash window mins>] [target=<fatal reboot target>]`
> This is a device-critical service. If it exits more than four times in
four minutes or before boot completes, the device will reboot into bootloader.
_fatal crash window mins_ minutes or before boot completes, the device
will reboot into _fatal reboot target_.
The default value of _fatal crash window mins_ is 4, and default value
of _fatal reboot target_ is 'bootloader'.
`disabled`
> This service will not automatically start with its class.

View file

@ -20,6 +20,7 @@
#include <sys/socket.h>
#include <sys/types.h>
#include <optional>
#include <string>
#include <android-base/properties.h>
@ -41,7 +42,7 @@ inline bool CanReadProperty(const std::string&, const std::string&) {
}
// reboot_utils.h
inline void SetFatalRebootTarget() {}
inline void SetFatalRebootTarget(const std::optional<std::string>& = std::nullopt) {}
inline void __attribute__((noreturn)) InitFatalReboot(int signal_number) {
abort();
}

View file

@ -19,6 +19,7 @@
#include <sys/syscall.h>
#include <unistd.h>
#include <optional>
#include <string>
#include <android-base/file.h>
@ -37,7 +38,7 @@ namespace init {
static std::string init_fatal_reboot_target = "bootloader";
static bool init_fatal_panic = false;
void SetFatalRebootTarget() {
void SetFatalRebootTarget(const std::optional<std::string>& reboot_target) {
std::string cmdline;
android::base::ReadFileToString("/proc/cmdline", &cmdline);
cmdline = android::base::Trim(cmdline);
@ -45,6 +46,11 @@ void SetFatalRebootTarget() {
const char kInitFatalPanicString[] = "androidboot.init_fatal_panic=true";
init_fatal_panic = cmdline.find(kInitFatalPanicString) != std::string::npos;
if (reboot_target) {
init_fatal_reboot_target = *reboot_target;
return;
}
const char kRebootTargetString[] = "androidboot.init_fatal_reboot_target=";
auto start_pos = cmdline.find(kRebootTargetString);
if (start_pos == std::string::npos) {

View file

@ -16,6 +16,7 @@
#pragma once
#include <optional>
#include <string>
#define PROC_SYSRQ "/proc/sysrq-trigger"
@ -23,7 +24,7 @@
namespace android {
namespace init {
void SetFatalRebootTarget();
void SetFatalRebootTarget(const std::optional<std::string>& reboot_target = std::nullopt);
// Determines whether the system is capable of rebooting. This is conservative,
// so if any of the attempts to determine this fail, it will still return true.
bool IsRebootCapable();

View file

@ -45,6 +45,7 @@
#include <android/api-level.h>
#include "mount_namespace.h"
#include "reboot_utils.h"
#include "selinux.h"
#else
#include "host_init_stubs.h"
@ -312,20 +313,24 @@ void Service::Reap(const siginfo_t& siginfo) {
#endif
const bool is_process_updatable = !pre_apexd_ && is_apex_updatable;
// If we crash > 4 times in 4 minutes or before boot_completed,
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
// reboot into bootloader or set crashing property
boot_clock::time_point now = boot_clock::now();
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) {
bool boot_completed = android::base::GetBoolProperty("sys.boot_completed", false);
if (now < time_crashed_ + 4min || !boot_completed) {
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
if (++crash_count_ > 4) {
auto exit_reason = boot_completed ?
"in " + std::to_string(fatal_crash_window_.count()) + " minutes" :
"before boot completed";
if (flags_ & SVC_CRITICAL) {
// Aborts into bootloader
// Aborts into `fatal_reboot_target_'.
SetFatalRebootTarget(fatal_reboot_target_);
LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
<< (boot_completed ? "in 4 minutes" : "before boot completed");
<< exit_reason;
} else {
LOG(ERROR) << "updatable process '" << name_ << "' exited 4 times "
<< (boot_completed ? "in 4 minutes" : "before boot completed");
<< exit_reason;
// Notifies update_verifier and apexd
SetProperty("sys.init.updatable_crashing_process_name", name_);
SetProperty("sys.init.updatable_crashing", "1");

View file

@ -155,6 +155,8 @@ class Service {
android::base::boot_clock::time_point time_started_; // time of last start
android::base::boot_clock::time_point time_crashed_; // first crash within inspection window
int crash_count_; // number of times crashed within window
std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it
std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler
std::optional<CapSet> capabilities_;
ProcessAttributes proc_attr_;

View file

@ -93,6 +93,39 @@ Result<void> ServiceParser::ParseConsole(std::vector<std::string>&& args) {
}
Result<void> ServiceParser::ParseCritical(std::vector<std::string>&& args) {
std::optional<std::string> fatal_reboot_target;
std::optional<std::chrono::minutes> fatal_crash_window;
for (auto it = args.begin() + 1; it != args.end(); ++it) {
auto arg = android::base::Split(*it, "=");
if (arg.size() != 2) {
return Error() << "critical: Argument '" << *it << "' is not supported";
} else if (arg[0] == "target") {
fatal_reboot_target = arg[1];
} else if (arg[0] == "window") {
int minutes;
auto window = ExpandProps(arg[1]);
if (!window.ok()) {
return Error() << "critical: Could not expand argument ': " << arg[1];
}
if (*window == "off") {
return {};
}
if (!ParseInt(*window, &minutes, 0)) {
return Error() << "critical: 'fatal_crash_window' must be an integer > 0";
}
fatal_crash_window = std::chrono::minutes(minutes);
} else {
return Error() << "critical: Argument '" << *it << "' is not supported";
}
}
if (fatal_reboot_target) {
service_->fatal_reboot_target_ = *fatal_reboot_target;
}
if (fatal_crash_window) {
service_->fatal_crash_window_ = *fatal_crash_window;
}
service_->flags_ |= SVC_CRITICAL;
return {};
}
@ -506,7 +539,7 @@ const KeywordMap<ServiceParser::OptionParser>& ServiceParser::GetParserMap() con
{"capabilities", {0, kMax, &ServiceParser::ParseCapabilities}},
{"class", {1, kMax, &ServiceParser::ParseClass}},
{"console", {0, 1, &ServiceParser::ParseConsole}},
{"critical", {0, 0, &ServiceParser::ParseCritical}},
{"critical", {0, 2, &ServiceParser::ParseCritical}},
{"disabled", {0, 0, &ServiceParser::ParseDisabled}},
{"enter_namespace", {2, 2, &ServiceParser::ParseEnterNamespace}},
{"file", {2, 2, &ServiceParser::ParseFile}},

View file

@ -13,3 +13,4 @@ service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-sys
onrestart restart netd
onrestart restart wificond
writepid /dev/cpuset/foreground/tasks
critical window=${zygote.critical_window.minute:-off} target=zygote-fatal

View file

@ -13,3 +13,4 @@ service zygote /system/bin/app_process64 -Xzygote /system/bin --zygote --start-s
onrestart restart netd
onrestart restart wificond
writepid /dev/cpuset/foreground/tasks
critical window=${zygote.critical_window.minute:-off} target=zygote-fatal

View file

@ -13,6 +13,7 @@ service zygote /system/bin/app_process64 -Xzygote /system/bin --zygote --start-s
onrestart restart netd
onrestart restart wificond
task_profiles ProcessCapacityHigh MaxPerformance
critical window=${zygote.critical_window.minute:-off} target=zygote-fatal
service zygote_secondary /system/bin/app_process32 -Xzygote /system/bin --zygote --socket-name=zygote_secondary --enable-lazy-preload
class main