Merge "Throttle reboot from native watchdog" into main

This commit is contained in:
Shrinidhi Hegde 2024-04-12 14:30:23 +00:00 committed by Gerrit Code Review
commit 4922633d3f

View file

@ -355,20 +355,35 @@ void Service::Reap(const siginfo_t& siginfo) {
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
// reboot into bootloader or set crashing property
boot_clock::time_point now = boot_clock::now();
constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch";
uint64_t throttle_window =
std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count();
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
!was_last_exit_ok_) {
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
if (++crash_count_ > 4) {
auto exit_reason = boot_completed ?
"in " + std::to_string(fatal_crash_window_.count()) + " minutes" :
"before boot completed";
auto exit_reason =
boot_completed
? "in " + std::to_string(fatal_crash_window_.count()) + " minutes"
: "before boot completed";
if (flags_ & SVC_CRITICAL) {
if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
// Aborts into `fatal_reboot_target_'.
SetFatalRebootTarget(fatal_reboot_target_);
LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
<< exit_reason;
uint64_t epoch_time =
std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
// Do not reboot again If it was already initiated in the last 24hrs
if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) >
throttle_window) {
SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time));
// Aborts into `fatal_reboot_target_'.
SetFatalRebootTarget(fatal_reboot_target_);
LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
<< exit_reason;
} else {
LOG(INFO) << "Reboot already performed in last 24hrs because of crash.";
}
}
} else {
LOG(ERROR) << "process with updatable components '" << name_