Merge "init start time tracking."

This commit is contained in:
Elliott Hughes 2016-11-14 21:20:59 +00:00 committed by Gerrit Code Review
commit 601bf9e7c9
8 changed files with 121 additions and 97 deletions

View file

@ -68,7 +68,7 @@
#define UNMOUNT_CHECK_MS 5000
#define UNMOUNT_CHECK_TIMES 10
static const int kTerminateServiceDelayMicroSeconds = 50000;
static constexpr std::chrono::nanoseconds kCommandRetryTimeout = 5s;
static int insmod(const char *filename, const char *options, int flags) {
int fd = open(filename, O_RDONLY | O_NOFOLLOW | O_CLOEXEC);
@ -444,7 +444,7 @@ static int do_mount(const std::vector<std::string>& args) {
return -1;
} else {
if (wait)
wait_for_file(source, COMMAND_RETRY_TIMEOUT);
wait_for_file(source, kCommandRetryTimeout);
if (mount(source, target, system, flags, options) < 0) {
return -1;
}
@ -749,7 +749,7 @@ static int do_powerctl(const std::vector<std::string>& args) {
}
// Wait a bit before recounting the number or running services.
usleep(kTerminateServiceDelayMicroSeconds);
usleep(50000 /*us*/);
}
LOG(VERBOSE) << "Terminating running services took " << t.duration() << " seconds";
}
@ -956,11 +956,11 @@ static int do_load_system_props(const std::vector<std::string>& args) {
static int do_wait(const std::vector<std::string>& args) {
if (args.size() == 2) {
return wait_for_file(args[1].c_str(), COMMAND_RETRY_TIMEOUT);
return wait_for_file(args[1].c_str(), kCommandRetryTimeout);
} else if (args.size() == 3) {
int timeout;
if (android::base::ParseInt(args[2], &timeout)) {
return wait_for_file(args[1].c_str(), timeout);
return wait_for_file(args[1].c_str(), std::chrono::seconds(timeout));
}
}
return -1;

View file

@ -18,6 +18,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libgen.h>
#include <paths.h>
#include <signal.h>
@ -67,6 +68,8 @@
#include "util.h"
#include "watchdogd.h"
using android::base::StringPrintf;
struct selabel_handle *sehandle;
struct selabel_handle *sehandle_prop;
@ -75,7 +78,7 @@ static int property_triggers_enabled = 0;
static char qemu[32];
std::string default_console = "/dev/console";
static time_t process_needs_restart;
static time_t process_needs_restart_at;
const char *ENV[32];
@ -132,11 +135,10 @@ void property_changed(const char *name, const char *value)
static void restart_processes()
{
process_needs_restart = 0;
ServiceManager::GetInstance().
ForEachServiceWithFlags(SVC_RESTARTING, [] (Service* s) {
s->RestartIfNeeded(process_needs_restart);
});
process_needs_restart_at = 0;
ServiceManager::GetInstance().ForEachServiceWithFlags(SVC_RESTARTING, [](Service* s) {
s->RestartIfNeeded(&process_needs_restart_at);
});
}
void handle_control_message(const std::string& msg, const std::string& name) {
@ -164,7 +166,7 @@ static int wait_for_coldboot_done_action(const std::vector<std::string>& args) {
// Any longer than 1s is an unreasonable length of time to delay booting.
// If you're hitting this timeout, check that you didn't make your
// sepolicy regular expressions too expensive (http://b/19899875).
if (wait_for_file(COLDBOOT_DONE, 1)) {
if (wait_for_file(COLDBOOT_DONE, 1s)) {
LOG(ERROR) << "Timed out waiting for " COLDBOOT_DONE;
}
@ -268,15 +270,14 @@ static void import_kernel_nv(const std::string& key, const std::string& value, b
if (for_emulator) {
// In the emulator, export any kernel option with the "ro.kernel." prefix.
property_set(android::base::StringPrintf("ro.kernel.%s", key.c_str()).c_str(), value.c_str());
property_set(StringPrintf("ro.kernel.%s", key.c_str()).c_str(), value.c_str());
return;
}
if (key == "qemu") {
strlcpy(qemu, value.c_str(), sizeof(qemu));
} else if (android::base::StartsWith(key, "androidboot.")) {
property_set(android::base::StringPrintf("ro.boot.%s", key.c_str() + 12).c_str(),
value.c_str());
property_set(StringPrintf("ro.boot.%s", key.c_str() + 12).c_str(), value.c_str());
}
}
@ -314,7 +315,7 @@ static void export_kernel_boot_props() {
static void process_kernel_dt() {
static const char android_dir[] = "/proc/device-tree/firmware/android";
std::string file_name = android::base::StringPrintf("%s/compatible", android_dir);
std::string file_name = StringPrintf("%s/compatible", android_dir);
std::string dt_file;
android::base::ReadFileToString(file_name, &dt_file);
@ -332,12 +333,12 @@ static void process_kernel_dt() {
continue;
}
file_name = android::base::StringPrintf("%s/%s", android_dir, dp->d_name);
file_name = StringPrintf("%s/%s", android_dir, dp->d_name);
android::base::ReadFileToString(file_name, &dt_file);
std::replace(dt_file.begin(), dt_file.end(), ',', '.');
std::string property_name = android::base::StringPrintf("ro.boot.%s", dp->d_name);
std::string property_name = StringPrintf("ro.boot.%s", dp->d_name);
property_set(property_name.c_str(), dt_file.c_str());
}
}
@ -566,12 +567,14 @@ int main(int argc, char** argv) {
return watchdogd_main(argc, argv);
}
boot_clock::time_point start_time = boot_clock::now();
// Clear the umask.
umask(0);
add_environment("PATH", _PATH_DEFPATH);
bool is_first_stage = (argc == 1) || (strcmp(argv[1], "--second-stage") != 0);
bool is_first_stage = (getenv("INIT_SECOND_STAGE") == nullptr);
// Don't expose the raw commandline to unprivileged processes.
chmod("/proc/cmdline", 0440);
@ -596,32 +599,34 @@ int main(int argc, char** argv) {
// talk to the outside world...
InitKernelLogging(argv);
if (is_first_stage) {
LOG(INFO) << "init first stage started!";
LOG(INFO) << "init " << (is_first_stage ? "first" : "second") << " stage started!";
if (is_first_stage) {
// Mount devices defined in android.early.* kernel commandline
early_mount();
// Set up SELinux, including loading the SELinux policy if we're in the kernel domain.
// Set up SELinux, loading the SELinux policy.
selinux_initialize(true);
// If we're in the kernel domain, re-exec init to transition to the init domain now
// We're in the kernel domain, so re-exec init to transition to the init domain now
// that the SELinux policy has been loaded.
if (restorecon("/init") == -1) {
PLOG(ERROR) << "restorecon failed";
security_failure();
}
setenv("INIT_SECOND_STAGE", "true", 1);
uint64_t start_ns = start_time.time_since_epoch().count();
setenv("INIT_STARTED_AT", StringPrintf("%" PRIu64, start_ns).c_str(), 1);
char* path = argv[0];
char* args[] = { path, const_cast<char*>("--second-stage"), nullptr };
char* args[] = { path, nullptr };
if (execv(path, args) == -1) {
PLOG(ERROR) << "execv(\"" << path << "\") failed";
security_failure();
}
} else {
LOG(INFO) << "init second stage started!";
// Indicate that booting is in progress to background fw loaders, etc.
close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
@ -636,7 +641,10 @@ int main(int argc, char** argv) {
// used by init as well as the current required properties.
export_kernel_boot_props();
// Now set up SELinux for second stage
// Make the time that init started available for bootstat to log.
property_set("init.start", getenv("INIT_STARTED_AT"));
// Now set up SELinux for second stage.
selinux_initialize(false);
}
@ -710,21 +718,22 @@ int main(int argc, char** argv) {
restart_processes();
}
int timeout = -1;
if (process_needs_restart) {
timeout = (process_needs_restart - gettime()) * 1000;
if (timeout < 0)
timeout = 0;
// By default, sleep until something happens.
int epoll_timeout_ms = -1;
// If there's more work to do, wake up again immediately.
if (am.HasMoreCommands()) epoll_timeout_ms = 0;
// If there's a process that needs restarting, wake up in time for that.
if (process_needs_restart_at != 0) {
epoll_timeout_ms = (process_needs_restart_at - time(nullptr)) * 1000;
if (epoll_timeout_ms < 0) epoll_timeout_ms = 0;
}
if (am.HasMoreCommands()) {
timeout = 0;
}
bootchart_sample(&timeout);
bootchart_sample(&epoll_timeout_ms);
epoll_event ev;
int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, timeout));
int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, epoll_timeout_ms));
if (nr == -1) {
PLOG(ERROR) << "epoll_wait failed";
} else if (nr == 1) {

View file

@ -22,8 +22,6 @@
class Action;
class Service;
#define COMMAND_RETRY_TIMEOUT 5
extern const char *ENV[32];
extern bool waiting_for_exec;
extern std::string default_console;

View file

@ -440,8 +440,16 @@ Properties
Init provides information about the services that it is responsible
for via the below properties.
init.start
Time after boot in ns (via the CLOCK_BOOTTIME clock) at which the first
stage of init started.
init.svc.<name>
State of a named service ("stopped", "stopping", "running", "restarting")
State of a named service ("stopped", "stopping", "running", "restarting")
init.svc.<name>.start
Time after boot in ns (via the CLOCK_BOOTTIME clock) that the service was
most recently started.
Bootcharting
@ -537,10 +545,10 @@ service akmd /system/bin/logwrapper /sbin/akmd
For quicker turnaround when working on init itself, use:
mm -j
m ramdisk-nodeps
m bootimage-nodeps
adb reboot bootloader
mm -j &&
m ramdisk-nodeps &&
m bootimage-nodeps &&
adb reboot bootloader &&
fastboot boot $ANDROID_PRODUCT_OUT/boot.img
Alternatively, use the emulator:

View file

@ -17,6 +17,7 @@
#include "service.h"
#include <fcntl.h>
#include <inttypes.h>
#include <linux/securebits.h>
#include <sched.h>
#include <sys/mount.h>
@ -51,9 +52,6 @@ using android::base::ParseInt;
using android::base::StringPrintf;
using android::base::WriteStringToFile;
#define CRITICAL_CRASH_THRESHOLD 4 // if we crash >4 times ...
#define CRITICAL_CRASH_WINDOW (4*60) // ... in 4 minutes, goto recovery
static std::string ComputeContextFromExecutable(std::string& service_name,
const std::string& service_path) {
std::string computed_context;
@ -154,8 +152,8 @@ ServiceEnvironmentInfo::ServiceEnvironmentInfo(const std::string& name,
Service::Service(const std::string& name, const std::string& classname,
const std::vector<std::string>& args)
: name_(name), classname_(classname), flags_(0), pid_(0), time_started_(0),
time_crashed_(0), nr_crashed_(0), uid_(0), gid_(0), namespace_flags_(0),
: name_(name), classname_(classname), flags_(0), pid_(0),
crash_count_(0), uid_(0), gid_(0), namespace_flags_(0),
seclabel_(""), ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0),
priority_(0), oom_score_adjust_(-1000), args_(args) {
onrestart_.InitSingleTrigger("onrestart");
@ -168,7 +166,7 @@ Service::Service(const std::string& name, const std::string& classname,
const std::string& seclabel,
const std::vector<std::string>& args)
: name_(name), classname_(classname), flags_(flags), pid_(0),
time_started_(0), time_crashed_(0), nr_crashed_(0), uid_(uid), gid_(gid),
crash_count_(0), uid_(uid), gid_(gid),
supp_gids_(supp_gids), capabilities_(capabilities),
namespace_flags_(namespace_flags), seclabel_(seclabel),
ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0), priority_(0),
@ -190,6 +188,12 @@ void Service::NotifyStateChange(const std::string& new_state) const {
}
property_set(prop_name.c_str(), new_state.c_str());
if (new_state == "running") {
prop_name += ".start";
uint64_t start_ns = time_started_.time_since_epoch().count();
property_set(prop_name.c_str(), StringPrintf("%" PRIu64, start_ns).c_str());
}
}
void Service::KillProcessGroup(int signal) {
@ -274,20 +278,19 @@ bool Service::Reap() {
return false;
}
time_t now = gettime();
// If we crash > 4 times in 4 minutes, reboot into recovery.
boot_clock::time_point now = boot_clock::now();
if ((flags_ & SVC_CRITICAL) && !(flags_ & SVC_RESTART)) {
if (time_crashed_ + CRITICAL_CRASH_WINDOW >= now) {
if (++nr_crashed_ > CRITICAL_CRASH_THRESHOLD) {
LOG(ERROR) << "critical process '" << name_ << "' exited "
<< CRITICAL_CRASH_THRESHOLD << " times in "
<< (CRITICAL_CRASH_WINDOW / 60) << " minutes; "
if (now < time_crashed_ + 4min) {
if (++crash_count_ > 4) {
LOG(ERROR) << "critical process '" << name_ << "' exited 4 times in 4 minutes; "
<< "rebooting into recovery mode";
android_reboot(ANDROID_RB_RESTART2, 0, "recovery");
return false;
}
} else {
time_crashed_ = now;
nr_crashed_ = 1;
crash_count_ = 1;
}
}
@ -553,7 +556,6 @@ bool Service::Start() {
// Starting a service removes it from the disabled or reset state and
// immediately takes it out of the restarting state if it was in there.
flags_ &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));
time_started_ = 0;
// Running processes require no additional work --- if they're in the
// process of exiting, we've ensured that they will immediately restart
@ -667,7 +669,7 @@ bool Service::Start() {
}
}
time_started_ = gettime();
time_started_ = boot_clock::now();
pid_ = pid;
flags_ |= SVC_RUNNING;
@ -731,18 +733,19 @@ void Service::Restart() {
} /* else: Service is restarting anyways. */
}
void Service::RestartIfNeeded(time_t& process_needs_restart) {
time_t next_start_time = time_started_ + 5;
if (next_start_time <= gettime()) {
void Service::RestartIfNeeded(time_t* process_needs_restart_at) {
boot_clock::time_point now = boot_clock::now();
boot_clock::time_point next_start = time_started_ + 5s;
if (now > next_start) {
flags_ &= (~SVC_RESTARTING);
Start();
return;
}
if ((next_start_time < process_needs_restart) ||
(process_needs_restart == 0)) {
process_needs_restart = next_start_time;
time_t next_start_time_t = time(nullptr) +
time_t(std::chrono::duration_cast<std::chrono::seconds>(next_start - now).count());
if (next_start_time_t < *process_needs_restart_at || *process_needs_restart_at == 0) {
*process_needs_restart_at = next_start_time_t;
}
}

View file

@ -30,6 +30,7 @@
#include "descriptors.h"
#include "init_parser.h"
#include "keyword_map.h"
#include "util.h"
#define SVC_DISABLED 0x001 // do not autostart with class
#define SVC_ONESHOT 0x002 // do not restart on exit
@ -75,7 +76,7 @@ public:
void Stop();
void Terminate();
void Restart();
void RestartIfNeeded(time_t& process_needs_restart);
void RestartIfNeeded(time_t* process_needs_restart_at);
bool Reap();
void DumpState() const;
@ -134,9 +135,9 @@ private:
unsigned flags_;
pid_t pid_;
time_t time_started_; // time of last start
time_t time_crashed_; // first crash within inspection window
int nr_crashed_; // number of times crashed within window
boot_clock::time_point time_started_; // time of last start
boot_clock::time_point time_crashed_; // first crash within inspection window
int crash_count_; // number of times crashed within window
uid_t uid_;
gid_t gid_;

View file

@ -258,16 +258,11 @@ int write_file(const char* path, const char* content) {
return result;
}
time_t gettime() {
timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
return now.tv_sec;
}
uint64_t gettime_ns() {
timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_nsec;
boot_clock::time_point boot_clock::now() {
timespec ts;
clock_gettime(CLOCK_BOOTTIME, &ts);
return boot_clock::time_point(std::chrono::seconds(ts.tv_sec) +
std::chrono::nanoseconds(ts.tv_nsec));
}
int mkdir_recursive(const char *pathname, mode_t mode)
@ -325,16 +320,15 @@ void sanitize(char *s)
}
}
int wait_for_file(const char *filename, int timeout)
{
struct stat info;
uint64_t timeout_time_ns = gettime_ns() + timeout * UINT64_C(1000000000);
int ret = -1;
int wait_for_file(const char* filename, std::chrono::nanoseconds timeout) {
boot_clock::time_point timeout_time = boot_clock::now() + timeout;
while (boot_clock::now() < timeout_time) {
struct stat sb;
if (stat(filename, &sb) != -1) return 0;
while (gettime_ns() < timeout_time_ns && ((ret = stat(filename, &info)) < 0))
usleep(10000);
return ret;
}
return -1;
}
void import_kernel_cmdline(bool in_qemu,

View file

@ -20,11 +20,14 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <chrono>
#include <string>
#include <functional>
#define COLDBOOT_DONE "/dev/.coldboot_done"
using namespace std::chrono_literals;
int create_socket(const char *name, int type, mode_t perm,
uid_t uid, gid_t gid, const char *socketcon);
int create_file(const char *path, int mode, mode_t perm,
@ -33,27 +36,35 @@ int create_file(const char *path, int mode, mode_t perm,
bool read_file(const char* path, std::string* content);
int write_file(const char* path, const char* content);
time_t gettime();
uint64_t gettime_ns();
// A std::chrono clock based on CLOCK_BOOTTIME.
class boot_clock {
public:
typedef std::chrono::nanoseconds duration;
typedef std::chrono::time_point<boot_clock, duration> time_point;
static constexpr bool is_steady = true;
static time_point now();
};
class Timer {
public:
Timer() : t0(gettime_ns()) {
Timer() : start_(boot_clock::now()) {
}
double duration() {
return static_cast<double>(gettime_ns() - t0) / 1000000000.0;
typedef std::chrono::duration<double> double_duration;
return std::chrono::duration_cast<double_duration>(boot_clock::now() - start_).count();
}
private:
uint64_t t0;
boot_clock::time_point start_;
};
unsigned int decode_uid(const char *s);
int mkdir_recursive(const char *pathname, mode_t mode);
void sanitize(char *p);
int wait_for_file(const char *filename, int timeout);
int wait_for_file(const char *filename, std::chrono::nanoseconds timeout);
void import_kernel_cmdline(bool in_qemu,
const std::function<void(const std::string&, const std::string&, bool)>&);
int make_dir(const char *path, mode_t mode);