platform_system_core/init/reboot.cpp
Keun-young Park 0e90dee9e0 poll umount completion from /proc/mounts
- umount operation is asynchronous except for root partition.
  Returning from umount does not guarantee completion of
  umount. Poll /proc/mounts to confirm completion of umount.
- Treat all devices mounting to /data as emulated devices. This is
  future proof when fs other than sdcardfs is used.
- Drop quota sync from sync step. There is no differences in
  frequencies of quota error.
- Run umount in reverse order from mounting order so that any
  hidden dependency can be auto-resolved.
- Add dump of lsof and /proc/mounts when umount fails. lsof only runs
  when selinux is toggled into permissive mode. The dump is enabled
  only for non-user build.
- Keep logcat until vold shutdown in case vold has any error to report.

bug: 36551218
Test: python packages/services/Car/tools/bootanalyze/bootanalyze.py -r -c packages/services/Car/tools/bootanalyze/config.yaml -n 1000 -f -e 20 -w 30

Change-Id: I87b17b966d7004c205452d81460b02c6acf50d45
(cherry picked from commit 2ba5c8103d)
2017-04-10 17:48:19 -07:00

425 lines
15 KiB
C++

/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <dirent.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <mntent.h>
#include <selinux/selinux.h>
#include <sys/cdefs.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/reboot.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <memory>
#include <set>
#include <string>
#include <thread>
#include <vector>
#include <android-base/file.h>
#include <android-base/macros.h>
#include <android-base/properties.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <bootloader_message/bootloader_message.h>
#include <cutils/android_reboot.h>
#include <fs_mgr.h>
#include <logwrap/logwrap.h>
#include "log.h"
#include "property_service.h"
#include "reboot.h"
#include "service.h"
#include "util.h"
using android::base::StringPrintf;
// represents umount status during reboot / shutdown.
enum UmountStat {
/* umount succeeded. */
UMOUNT_STAT_SUCCESS = 0,
/* umount was not run. */
UMOUNT_STAT_SKIPPED = 1,
/* umount failed with timeout. */
UMOUNT_STAT_TIMEOUT = 2,
/* could not run due to error */
UMOUNT_STAT_ERROR = 3,
/* not used by init but reserved for other part to use this to represent the
the state where umount status before reboot is not found / available. */
UMOUNT_STAT_NOT_AVAILABLE = 4,
};
// Utility for struct mntent
class MountEntry {
public:
explicit MountEntry(const mntent& entry)
: mnt_fsname_(entry.mnt_fsname),
mnt_dir_(entry.mnt_dir),
mnt_type_(entry.mnt_type),
mnt_opts_(entry.mnt_opts) {}
bool Umount() {
int r = umount2(mnt_dir_.c_str(), 0);
if (r == 0) {
LOG(INFO) << "umounted " << mnt_fsname_ << ":" << mnt_dir_ << " opts " << mnt_opts_;
return true;
} else {
PLOG(WARNING) << "cannot umount " << mnt_fsname_ << ":" << mnt_dir_ << " opts "
<< mnt_opts_;
return false;
}
}
void DoFsck() {
int st;
if (IsF2Fs()) {
const char* f2fs_argv[] = {
"/system/bin/fsck.f2fs", "-f", mnt_fsname_.c_str(),
};
android_fork_execvp_ext(arraysize(f2fs_argv), (char**)f2fs_argv, &st, true, LOG_KLOG,
true, nullptr, nullptr, 0);
} else if (IsExt4()) {
const char* ext4_argv[] = {
"/system/bin/e2fsck", "-f", "-y", mnt_fsname_.c_str(),
};
android_fork_execvp_ext(arraysize(ext4_argv), (char**)ext4_argv, &st, true, LOG_KLOG,
true, nullptr, nullptr, 0);
}
}
static bool IsBlockDevice(const struct mntent& mntent) {
return android::base::StartsWith(mntent.mnt_fsname, "/dev/block");
}
static bool IsEmulatedDevice(const struct mntent& mntent) {
return android::base::StartsWith(mntent.mnt_fsname, "/data/");
}
private:
bool IsF2Fs() const { return mnt_type_ == "f2fs"; }
bool IsExt4() const { return mnt_type_ == "ext4"; }
std::string mnt_fsname_;
std::string mnt_dir_;
std::string mnt_type_;
std::string mnt_opts_;
};
// Turn off backlight while we are performing power down cleanup activities.
static void TurnOffBacklight() {
static constexpr char OFF[] = "0";
android::base::WriteStringToFile(OFF, "/sys/class/leds/lcd-backlight/brightness");
static const char backlightDir[] = "/sys/class/backlight";
std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(backlightDir), closedir);
if (!dir) {
return;
}
struct dirent* dp;
while ((dp = readdir(dir.get())) != nullptr) {
if (((dp->d_type != DT_DIR) && (dp->d_type != DT_LNK)) || (dp->d_name[0] == '.')) {
continue;
}
std::string fileName = StringPrintf("%s/%s/brightness", backlightDir, dp->d_name);
android::base::WriteStringToFile(OFF, fileName);
}
}
static void ShutdownVold() {
const char* vdc_argv[] = {"/system/bin/vdc", "volume", "shutdown"};
int status;
android_fork_execvp_ext(arraysize(vdc_argv), (char**)vdc_argv, &status, true, LOG_KLOG, true,
nullptr, nullptr, 0);
}
static void LogShutdownTime(UmountStat stat, Timer* t) {
LOG(WARNING) << "powerctl_shutdown_time_ms:" << std::to_string(t->duration_ms()) << ":" << stat;
}
static void __attribute__((noreturn))
RebootSystem(unsigned int cmd, const std::string& rebootTarget) {
LOG(INFO) << "Reboot ending, jumping to kernel";
switch (cmd) {
case ANDROID_RB_POWEROFF:
reboot(RB_POWER_OFF);
break;
case ANDROID_RB_RESTART2:
syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2,
LINUX_REBOOT_CMD_RESTART2, rebootTarget.c_str());
break;
case ANDROID_RB_THERMOFF:
reboot(RB_POWER_OFF);
break;
}
// In normal case, reboot should not return.
PLOG(FATAL) << "reboot call returned";
abort();
}
/* Find all read+write block devices and emulated devices in /proc/mounts
* and add them to correpsponding list.
*/
static bool FindPartitionsToUmount(std::vector<MountEntry>* blockDevPartitions,
std::vector<MountEntry>* emulatedPartitions, bool dump) {
std::unique_ptr<std::FILE, int (*)(std::FILE*)> fp(setmntent("/proc/mounts", "r"), endmntent);
if (fp == nullptr) {
PLOG(ERROR) << "Failed to open /proc/mounts";
return false;
}
mntent* mentry;
while ((mentry = getmntent(fp.get())) != nullptr) {
if (dump) {
LOG(INFO) << "mount entry " << mentry->mnt_fsname << ":" << mentry->mnt_dir << " opts "
<< mentry->mnt_opts << " type " << mentry->mnt_type;
} else if (MountEntry::IsBlockDevice(*mentry) && hasmntopt(mentry, "rw")) {
blockDevPartitions->emplace(blockDevPartitions->begin(), *mentry);
} else if (MountEntry::IsEmulatedDevice(*mentry)) {
emulatedPartitions->emplace(emulatedPartitions->begin(), *mentry);
}
}
return true;
}
static void DumpUmountDebuggingInfo() {
int status;
if (!security_getenforce()) {
LOG(INFO) << "Run lsof";
const char* lsof_argv[] = {"/system/bin/lsof"};
android_fork_execvp_ext(arraysize(lsof_argv), (char**)lsof_argv, &status, true, LOG_KLOG,
true, nullptr, nullptr, 0);
}
FindPartitionsToUmount(nullptr, nullptr, true);
}
static UmountStat UmountPartitions(int timeoutMs) {
Timer t;
UmountStat stat = UMOUNT_STAT_TIMEOUT;
int retry = 0;
/* data partition needs all pending writes to be completed and all emulated partitions
* umounted.If the current waiting is not good enough, give
* up and leave it to e2fsck after reboot to fix it.
*/
while (true) {
std::vector<MountEntry> block_devices;
std::vector<MountEntry> emulated_devices;
if (!FindPartitionsToUmount(&block_devices, &emulated_devices, false)) {
return UMOUNT_STAT_ERROR;
}
if (block_devices.size() == 0) {
stat = UMOUNT_STAT_SUCCESS;
break;
}
if ((timeoutMs < t.duration_ms()) && retry > 0) { // try umount at least once
stat = UMOUNT_STAT_TIMEOUT;
break;
}
if (emulated_devices.size() > 0 &&
std::all_of(emulated_devices.begin(), emulated_devices.end(),
[](auto& entry) { return entry.Umount(); })) {
sync();
}
for (auto& entry : block_devices) {
entry.Umount();
}
retry++;
std::this_thread::sleep_for(100ms);
}
return stat;
}
static void KillAllProcesses() { android::base::WriteStringToFile("i", "/proc/sysrq-trigger"); }
/* Try umounting all emulated file systems R/W block device cfile systems.
* This will just try umount and give it up if it fails.
* For fs like ext4, this is ok as file system will be marked as unclean shutdown
* and necessary check can be done at the next reboot.
* For safer shutdown, caller needs to make sure that
* all processes / emulated partition for the target fs are all cleaned-up.
*
* return true when umount was successful. false when timed out.
*/
static UmountStat TryUmountAndFsck(bool runFsck, int timeoutMs) {
Timer t;
std::vector<MountEntry> block_devices;
std::vector<MountEntry> emulated_devices;
TurnOffBacklight(); // this part can take time. save power.
if (runFsck && !FindPartitionsToUmount(&block_devices, &emulated_devices, false)) {
return UMOUNT_STAT_ERROR;
}
UmountStat stat = UmountPartitions(timeoutMs - t.duration_ms());
if (stat != UMOUNT_STAT_SUCCESS) {
LOG(INFO) << "umount timeout, last resort, kill all and try";
if (DUMP_ON_UMOUNT_FAILURE) DumpUmountDebuggingInfo();
KillAllProcesses();
// even if it succeeds, still it is timeout and do not run fsck with all processes killed
UmountPartitions(0);
if (DUMP_ON_UMOUNT_FAILURE) DumpUmountDebuggingInfo();
}
if (stat == UMOUNT_STAT_SUCCESS && runFsck) {
// fsck part is excluded from timeout check. It only runs for user initiated shutdown
// and should not affect reboot time.
for (auto& entry : block_devices) {
entry.DoFsck();
}
}
return stat;
}
static void __attribute__((noreturn)) DoThermalOff() {
LOG(WARNING) << "Thermal system shutdown";
sync();
RebootSystem(ANDROID_RB_THERMOFF, "");
abort();
}
void DoReboot(unsigned int cmd, const std::string& reason, const std::string& rebootTarget,
bool runFsck) {
Timer t;
LOG(INFO) << "Reboot start, reason: " << reason << ", rebootTarget: " << rebootTarget;
android::base::WriteStringToFile(StringPrintf("%s\n", reason.c_str()), LAST_REBOOT_REASON_FILE);
if (cmd == ANDROID_RB_THERMOFF) { // do not wait if it is thermal
DoThermalOff();
abort();
}
/* TODO update default waiting time based on usage data */
constexpr unsigned int shutdownTimeoutDefault = 10;
unsigned int shutdownTimeout = shutdownTimeoutDefault;
if (SHUTDOWN_ZERO_TIMEOUT) { // eng build
shutdownTimeout = 0;
} else {
shutdownTimeout =
android::base::GetUintProperty("ro.build.shutdown_timeout", shutdownTimeoutDefault);
}
LOG(INFO) << "Shutdown timeout: " << shutdownTimeout;
// keep debugging tools until non critical ones are all gone.
const std::set<std::string> kill_after_apps{"tombstoned", "logd", "adbd"};
// watchdogd is a vendor specific component but should be alive to complete shutdown safely.
const std::set<std::string> to_starts{"watchdogd", "vold"};
ServiceManager::GetInstance().ForEachService([&kill_after_apps, &to_starts](Service* s) {
if (kill_after_apps.count(s->name())) {
s->SetShutdownCritical();
} else if (to_starts.count(s->name())) {
s->Start();
s->SetShutdownCritical();
}
});
Service* bootAnim = ServiceManager::GetInstance().FindServiceByName("bootanim");
Service* surfaceFlinger = ServiceManager::GetInstance().FindServiceByName("surfaceflinger");
if (bootAnim != nullptr && surfaceFlinger != nullptr && surfaceFlinger->IsRunning()) {
property_set("service.bootanim.exit", "0");
// Could be in the middle of animation. Stop and start so that it can pick
// up the right mode.
bootAnim->Stop();
// start all animation classes if stopped.
ServiceManager::GetInstance().ForEachServiceInClass("animation", [](Service* s) {
s->Start();
s->SetShutdownCritical(); // will not check animation class separately
});
bootAnim->Start();
surfaceFlinger->SetShutdownCritical();
bootAnim->SetShutdownCritical();
}
// optional shutdown step
// 1. terminate all services except shutdown critical ones. wait for delay to finish
if (shutdownTimeout > 0) {
LOG(INFO) << "terminating init services";
// Ask all services to terminate except shutdown critical ones.
ServiceManager::GetInstance().ForEachService([](Service* s) {
if (!s->IsShutdownCritical()) s->Terminate();
});
int service_count = 0;
// Up to half as long as shutdownTimeout or 3 seconds, whichever is lower.
unsigned int terminationWaitTimeout = std::min<unsigned int>((shutdownTimeout + 1) / 2, 3);
while (t.duration_s() < terminationWaitTimeout) {
ServiceManager::GetInstance().ReapAnyOutstandingChildren();
service_count = 0;
ServiceManager::GetInstance().ForEachService([&service_count](Service* s) {
// Count the number of services running except shutdown critical.
// Exclude the console as it will ignore the SIGTERM signal
// and not exit.
// Note: SVC_CONSOLE actually means "requires console" but
// it is only used by the shell.
if (!s->IsShutdownCritical() && s->pid() != 0 && (s->flags() & SVC_CONSOLE) == 0) {
service_count++;
}
});
if (service_count == 0) {
// All terminable services terminated. We can exit early.
break;
}
// Wait a bit before recounting the number or running services.
std::this_thread::sleep_for(50ms);
}
LOG(INFO) << "Terminating running services took " << t
<< " with remaining services:" << service_count;
}
// minimum safety steps before restarting
// 2. kill all services except ones that are necessary for the shutdown sequence.
ServiceManager::GetInstance().ForEachService([](Service* s) {
if (!s->IsShutdownCritical()) s->Stop();
});
ServiceManager::GetInstance().ReapAnyOutstandingChildren();
// 3. send volume shutdown to vold
Service* voldService = ServiceManager::GetInstance().FindServiceByName("vold");
if (voldService != nullptr && voldService->IsRunning()) {
ShutdownVold();
voldService->Stop();
} else {
LOG(INFO) << "vold not running, skipping vold shutdown";
}
// logcat stopped here
ServiceManager::GetInstance().ForEachService([&kill_after_apps](Service* s) {
if (kill_after_apps.count(s->name())) s->Stop();
});
// 4. sync, try umount, and optionally run fsck for user shutdown
sync();
UmountStat stat = TryUmountAndFsck(runFsck, shutdownTimeout * 1000 - t.duration_ms());
// Follow what linux shutdown is doing: one more sync with little bit delay
sync();
std::this_thread::sleep_for(100ms);
LogShutdownTime(stat, &t);
// Reboot regardless of umount status. If umount fails, fsck after reboot will fix it.
RebootSystem(cmd, rebootTarget);
abort();
}