platform_system_core/metrics/metrics_daemon.cc
Luigi Semenzato 942cbabb1c Modify swap metrics to be more useful.
We need to record the amount of swap used, and correlate it
with the total amount of swap.

The metrics added are Platform.MeminfoSwapUsed and
Platform.MeminfoSwapUsedPercent.

BUG=chromium-os:38583
TEST=not really
BRANCH=none

Change-Id: Iaf26c917e3c4d23f3f58f436047f8dd165177960
Reviewed-on: https://gerrit.chromium.org/gerrit/43123
Tested-by: Luigi Semenzato <semenzato@chromium.org>
Reviewed-by: Sonny Rao <sonnyrao@chromium.org>
Commit-Queue: Luigi Semenzato <semenzato@chromium.org>
2013-02-13 13:00:49 -08:00

1008 lines
34 KiB
C++

// Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "metrics_daemon.h"
#include <fcntl.h>
#include <math.h>
#include <string.h>
#include <time.h>
#include <base/file_util.h>
#include <base/logging.h>
#include <base/string_util.h>
#include <base/stringprintf.h>
#include <dbus/dbus-glib-lowlevel.h>
#include "counter.h"
using base::Time;
using base::TimeDelta;
using base::TimeTicks;
using std::map;
using std::string;
using std::vector;
#define SAFE_MESSAGE(e) (e.message ? e.message : "unknown error")
#define DBUS_IFACE_CRASH_REPORTER "org.chromium.CrashReporter"
#define DBUS_IFACE_POWER_MANAGER "org.chromium.PowerManager"
#define DBUS_IFACE_SESSION_MANAGER "org.chromium.SessionManagerInterface"
static const int kSecondsPerMinute = 60;
static const int kMinutesPerHour = 60;
static const int kHoursPerDay = 24;
static const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour;
static const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay;
static const int kDaysPerWeek = 7;
static const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek;
// The daily use monitor is scheduled to a 1-minute interval after
// initial user activity and then it's exponentially backed off to
// 10-minute intervals. Although not required, the back off is
// implemented because the histogram buckets are spaced exponentially
// anyway and to avoid too frequent metrics daemon process wake-ups
// and file I/O.
static const int kUseMonitorIntervalInit = 1 * kSecondsPerMinute;
static const int kUseMonitorIntervalMax = 10 * kSecondsPerMinute;
const char kKernelCrashDetectedFile[] = "/tmp/kernel-crash-detected";
static const char kUncleanShutdownDetectedFile[] =
"/tmp/unclean-shutdown-detected";
// static metrics parameters
const char MetricsDaemon::kMetricDailyUseTimeName[] =
"Logging.DailyUseTime";
const int MetricsDaemon::kMetricDailyUseTimeMin = 1;
const int MetricsDaemon::kMetricDailyUseTimeMax = kMinutesPerDay;
const int MetricsDaemon::kMetricDailyUseTimeBuckets = 50;
// crash interval metrics
const char MetricsDaemon::kMetricKernelCrashIntervalName[] =
"Logging.KernelCrashInterval";
const char MetricsDaemon::kMetricUncleanShutdownIntervalName[] =
"Logging.UncleanShutdownInterval";
const char MetricsDaemon::kMetricUserCrashIntervalName[] =
"Logging.UserCrashInterval";
const int MetricsDaemon::kMetricCrashIntervalMin = 1;
const int MetricsDaemon::kMetricCrashIntervalMax =
4 * kSecondsPerWeek;
const int MetricsDaemon::kMetricCrashIntervalBuckets = 50;
// crash frequency metrics
const char MetricsDaemon::kMetricAnyCrashesDailyName[] =
"Logging.AnyCrashesDaily";
const char MetricsDaemon::kMetricAnyCrashesWeeklyName[] =
"Logging.AnyCrashesWeekly";
const char MetricsDaemon::kMetricKernelCrashesDailyName[] =
"Logging.KernelCrashesDaily";
const char MetricsDaemon::kMetricKernelCrashesWeeklyName[] =
"Logging.KernelCrashesWeekly";
const char MetricsDaemon::kMetricUncleanShutdownsDailyName[] =
"Logging.UncleanShutdownsDaily";
const char MetricsDaemon::kMetricUncleanShutdownsWeeklyName[] =
"Logging.UncleanShutdownsWeekly";
const char MetricsDaemon::kMetricUserCrashesDailyName[] =
"Logging.UserCrashesDaily";
const char MetricsDaemon::kMetricUserCrashesWeeklyName[] =
"Logging.UserCrashesWeekly";
const char MetricsDaemon::kMetricCrashFrequencyMin = 1;
const char MetricsDaemon::kMetricCrashFrequencyMax = 100;
const char MetricsDaemon::kMetricCrashFrequencyBuckets = 50;
// disk stats metrics
// The {Read,Write}Sectors numbers are in sectors/second.
// A sector is usually 512 bytes.
const char MetricsDaemon::kMetricReadSectorsLongName[] =
"Platform.ReadSectorsLong";
const char MetricsDaemon::kMetricWriteSectorsLongName[] =
"Platform.WriteSectorsLong";
const char MetricsDaemon::kMetricReadSectorsShortName[] =
"Platform.ReadSectorsShort";
const char MetricsDaemon::kMetricWriteSectorsShortName[] =
"Platform.WriteSectorsShort";
const int MetricsDaemon::kMetricStatsShortInterval = 1; // seconds
const int MetricsDaemon::kMetricStatsLongInterval = 30; // seconds
const int MetricsDaemon::kMetricMeminfoInterval = 30; // seconds
// Assume a max rate of 250Mb/s for reads (worse for writes) and 512 byte
// sectors.
const int MetricsDaemon::kMetricSectorsIOMax = 500000; // sectors/second
const int MetricsDaemon::kMetricSectorsBuckets = 50; // buckets
// Page size is 4k, sector size is 0.5k. We're not interested in page fault
// rates that the disk cannot sustain.
const int MetricsDaemon::kMetricPageFaultsMax = kMetricSectorsIOMax / 8;
const int MetricsDaemon::kMetricPageFaultsBuckets = 50;
// Major page faults, i.e. the ones that require data to be read from disk.
const char MetricsDaemon::kMetricPageFaultsLongName[] =
"Platform.PageFaultsLong";
const char MetricsDaemon::kMetricPageFaultsShortName[] =
"Platform.PageFaultsShort";
// persistent metrics path
const char MetricsDaemon::kMetricsPath[] = "/var/log/metrics";
// static
const char* MetricsDaemon::kDBusMatches_[] = {
"type='signal',"
"interface='" DBUS_IFACE_CRASH_REPORTER "',"
"path='/',"
"member='UserCrash'",
"type='signal',"
"interface='" DBUS_IFACE_POWER_MANAGER "',"
"path='/'",
"type='signal',"
"sender='org.chromium.SessionManager',"
"interface='" DBUS_IFACE_SESSION_MANAGER "',"
"path='/org/chromium/SessionManager',"
"member='SessionStateChanged'",
};
// static
const char* MetricsDaemon::kPowerStates_[] = {
#define STATE(name, capname) #name,
#include "power_states.h"
};
// static
const char* MetricsDaemon::kSessionStates_[] = {
#define STATE(name, capname) #name,
#include "session_states.h"
};
// Memory use stats collection intervals. We collect some memory use interval
// at these intervals after boot, and we stop collecting after the last one,
// with the assumption that in most cases the memory use won't change much
// after that.
static const int kMemuseIntervals[] = {
1 * kSecondsPerMinute, // 1 minute mark
4 * kSecondsPerMinute, // 5 minute mark
25 * kSecondsPerMinute, // 0.5 hour mark
120 * kSecondsPerMinute, // 2.5 hour mark
600 * kSecondsPerMinute, // 12.5 hour mark
};
MetricsDaemon::MetricsDaemon()
: power_state_(kUnknownPowerState),
session_state_(kUnknownSessionState),
user_active_(false),
usemon_interval_(0),
usemon_source_(NULL),
memuse_initial_time_(0),
memuse_interval_index_(0),
read_sectors_(0),
write_sectors_(0),
page_faults_(0),
stats_state_(kStatsShort),
stats_initial_time_(0) {}
MetricsDaemon::~MetricsDaemon() {
DeleteFrequencyCounters();
}
double MetricsDaemon::GetActiveTime() {
struct timespec ts;
int r = clock_gettime(CLOCK_MONOTONIC, &ts);
if (r < 0) {
PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed";
return 0;
} else {
return ts.tv_sec + ((double) ts.tv_nsec) / (1000 * 1000 * 1000);
}
}
void MetricsDaemon::DeleteFrequencyCounters() {
for (FrequencyCounters::iterator i = frequency_counters_.begin();
i != frequency_counters_.end(); ++i) {
delete i->second;
i->second = NULL;
}
}
void MetricsDaemon::Run(bool run_as_daemon) {
if (run_as_daemon && daemon(0, 0) != 0)
return;
if (CheckSystemCrash(kKernelCrashDetectedFile)) {
ProcessKernelCrash();
}
if (CheckSystemCrash(kUncleanShutdownDetectedFile)) {
ProcessUncleanShutdown();
}
Loop();
}
FilePath MetricsDaemon::GetHistogramPath(const char* histogram_name) {
return FilePath(kMetricsPath).Append(histogram_name);
}
void MetricsDaemon::ConfigureCrashIntervalReporter(
const char* histogram_name,
scoped_ptr<chromeos_metrics::TaggedCounterReporter>* reporter) {
reporter->reset(new chromeos_metrics::TaggedCounterReporter());
FilePath file_path = GetHistogramPath(histogram_name);
(*reporter)->Init(file_path.value().c_str(),
histogram_name,
kMetricCrashIntervalMin,
kMetricCrashIntervalMax,
kMetricCrashIntervalBuckets);
}
void MetricsDaemon::ConfigureCrashFrequencyReporter(
const char* histogram_name) {
scoped_ptr<chromeos_metrics::TaggedCounterReporter> reporter(
new chromeos_metrics::TaggedCounterReporter());
FilePath file_path = GetHistogramPath(histogram_name);
reporter->Init(file_path.value().c_str(),
histogram_name,
kMetricCrashFrequencyMin,
kMetricCrashFrequencyMax,
kMetricCrashFrequencyBuckets);
scoped_ptr<chromeos_metrics::FrequencyCounter> new_counter(
new chromeos_metrics::FrequencyCounter());
time_t cycle_duration = strstr(histogram_name, "Weekly") != NULL ?
chromeos_metrics::kSecondsPerWeek :
chromeos_metrics::kSecondsPerDay;
new_counter->Init(
static_cast<chromeos_metrics::TaggedCounterInterface*>(
reporter.release()),
cycle_duration);
frequency_counters_[histogram_name] = new_counter.release();
}
void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib,
const string& diskstats_path,
const string& vmstats_path) {
testing_ = testing;
DCHECK(metrics_lib != NULL);
metrics_lib_ = metrics_lib;
chromeos_metrics::TaggedCounterReporter::
SetMetricsLibraryInterface(metrics_lib);
static const char kDailyUseRecordFile[] = "/var/log/metrics/daily-usage";
daily_use_.reset(new chromeos_metrics::TaggedCounter());
daily_use_->Init(kDailyUseRecordFile, &ReportDailyUse, this);
ConfigureCrashIntervalReporter(kMetricKernelCrashIntervalName,
&kernel_crash_interval_);
ConfigureCrashIntervalReporter(kMetricUncleanShutdownIntervalName,
&unclean_shutdown_interval_);
ConfigureCrashIntervalReporter(kMetricUserCrashIntervalName,
&user_crash_interval_);
DeleteFrequencyCounters();
ConfigureCrashFrequencyReporter(kMetricAnyCrashesDailyName);
ConfigureCrashFrequencyReporter(kMetricAnyCrashesWeeklyName);
ConfigureCrashFrequencyReporter(kMetricKernelCrashesDailyName);
ConfigureCrashFrequencyReporter(kMetricKernelCrashesWeeklyName);
ConfigureCrashFrequencyReporter(kMetricUncleanShutdownsDailyName);
ConfigureCrashFrequencyReporter(kMetricUncleanShutdownsWeeklyName);
ConfigureCrashFrequencyReporter(kMetricUserCrashesDailyName);
ConfigureCrashFrequencyReporter(kMetricUserCrashesWeeklyName);
diskstats_path_ = diskstats_path;
vmstats_path_ = vmstats_path;
StatsReporterInit();
// Start collecting meminfo stats.
ScheduleMeminfoCallback(kMetricMeminfoInterval);
ScheduleMemuseCallback(true, 0);
// Don't setup D-Bus and GLib in test mode.
if (testing)
return;
g_thread_init(NULL);
g_type_init();
dbus_g_thread_init();
DBusError error;
dbus_error_init(&error);
DBusConnection* connection = dbus_bus_get(DBUS_BUS_SYSTEM, &error);
LOG_IF(FATAL, dbus_error_is_set(&error)) <<
"No D-Bus connection: " << SAFE_MESSAGE(error);
dbus_connection_setup_with_g_main(connection, NULL);
// Registers D-Bus matches for the signals we would like to catch.
for (unsigned int m = 0; m < arraysize(kDBusMatches_); m++) {
const char* match = kDBusMatches_[m];
DLOG(INFO) << "adding dbus match: " << match;
dbus_bus_add_match(connection, match, &error);
LOG_IF(FATAL, dbus_error_is_set(&error)) <<
"unable to add a match: " << SAFE_MESSAGE(error);
}
// Adds the D-Bus filter routine to be called back whenever one of
// the registered D-Bus matches is successful. The daemon is not
// activated for D-Bus messages that don't match.
CHECK(dbus_connection_add_filter(connection, MessageFilter, this, NULL));
}
void MetricsDaemon::Loop() {
GMainLoop* loop = g_main_loop_new(NULL, false);
g_main_loop_run(loop);
}
// static
DBusHandlerResult MetricsDaemon::MessageFilter(DBusConnection* connection,
DBusMessage* message,
void* user_data) {
Time now = Time::Now();
DLOG(INFO) << "message intercepted @ " << now.ToInternalValue();
int message_type = dbus_message_get_type(message);
if (message_type != DBUS_MESSAGE_TYPE_SIGNAL) {
DLOG(WARNING) << "unexpected message type " << message_type;
return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
}
// Signal messages always have interfaces.
const char* interface = dbus_message_get_interface(message);
CHECK(interface != NULL);
MetricsDaemon* daemon = static_cast<MetricsDaemon*>(user_data);
DBusMessageIter iter;
dbus_message_iter_init(message, &iter);
if (strcmp(interface, DBUS_IFACE_CRASH_REPORTER) == 0) {
CHECK(strcmp(dbus_message_get_member(message),
"UserCrash") == 0);
daemon->ProcessUserCrash();
} else if (strcmp(interface, DBUS_IFACE_POWER_MANAGER) == 0) {
const char* member = dbus_message_get_member(message);
if (strcmp(member, "ScreenIsLocked") == 0) {
daemon->SetUserActiveState(false, now);
} else if (strcmp(member, "ScreenIsUnlocked") == 0) {
daemon->SetUserActiveState(true, now);
} else if (strcmp(member, "PowerStateChanged") == 0) {
char* state_name;
dbus_message_iter_get_basic(&iter, &state_name);
daemon->PowerStateChanged(state_name, now);
}
} else if (strcmp(interface, DBUS_IFACE_SESSION_MANAGER) == 0) {
CHECK(strcmp(dbus_message_get_member(message),
"SessionStateChanged") == 0);
char* state_name;
dbus_message_iter_get_basic(&iter, &state_name);
daemon->SessionStateChanged(state_name, now);
} else {
DLOG(WARNING) << "unexpected interface: " << interface;
return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
}
return DBUS_HANDLER_RESULT_HANDLED;
}
void MetricsDaemon::PowerStateChanged(const char* state_name, Time now) {
DLOG(INFO) << "power state: " << state_name;
power_state_ = LookupPowerState(state_name);
if (power_state_ != kPowerStateOn)
SetUserActiveState(false, now);
}
MetricsDaemon::PowerState
MetricsDaemon::LookupPowerState(const char* state_name) {
for (int i = 0; i < kNumberPowerStates; i++) {
if (strcmp(state_name, kPowerStates_[i]) == 0) {
return static_cast<PowerState>(i);
}
}
DLOG(WARNING) << "unknown power state: " << state_name;
return kUnknownPowerState;
}
void MetricsDaemon::SessionStateChanged(const char* state_name, Time now) {
DLOG(INFO) << "user session state: " << state_name;
session_state_ = LookupSessionState(state_name);
SetUserActiveState(session_state_ == kSessionStateStarted, now);
}
MetricsDaemon::SessionState
MetricsDaemon::LookupSessionState(const char* state_name) {
for (int i = 0; i < kNumberSessionStates; i++) {
if (strcmp(state_name, kSessionStates_[i]) == 0) {
return static_cast<SessionState>(i);
}
}
DLOG(WARNING) << "unknown user session state: " << state_name;
return kUnknownSessionState;
}
void MetricsDaemon::SetUserActiveState(bool active, Time now) {
DLOG(INFO) << "user: " << (active ? "active" : "inactive");
// Calculates the seconds of active use since the last update and
// the day since Epoch, and logs the usage data. Guards against the
// time jumping back and forth due to the user changing it by
// discarding the new use time.
int seconds = 0;
if (user_active_ && now > user_active_last_) {
TimeDelta since_active = now - user_active_last_;
if (since_active < TimeDelta::FromSeconds(
kUseMonitorIntervalMax + kSecondsPerMinute)) {
seconds = static_cast<int>(since_active.InSeconds());
}
}
TimeDelta since_epoch = now - Time();
int day = since_epoch.InDays();
daily_use_->Update(day, seconds);
user_crash_interval_->Update(0, seconds);
kernel_crash_interval_->Update(0, seconds);
// Flush finished cycles of all frequency counters.
for (FrequencyCounters::iterator i = frequency_counters_.begin();
i != frequency_counters_.end(); ++i) {
i->second->FlushFinishedCycles();
}
// Schedules a use monitor on inactive->active transitions and
// unschedules it on active->inactive transitions.
if (!user_active_ && active)
ScheduleUseMonitor(kUseMonitorIntervalInit, /* backoff */ false);
else if (user_active_ && !active)
UnscheduleUseMonitor();
// Remembers the current active state and the time of the last
// activity update.
user_active_ = active;
user_active_last_ = now;
}
void MetricsDaemon::ProcessUserCrash() {
// Counts the active use time up to now.
SetUserActiveState(user_active_, Time::Now());
// Reports the active use time since the last crash and resets it.
user_crash_interval_->Flush();
frequency_counters_[kMetricUserCrashesDailyName]->Update(1);
frequency_counters_[kMetricUserCrashesWeeklyName]->Update(1);
frequency_counters_[kMetricAnyCrashesDailyName]->Update(1);
frequency_counters_[kMetricAnyCrashesWeeklyName]->Update(1);
}
void MetricsDaemon::ProcessKernelCrash() {
// Counts the active use time up to now.
SetUserActiveState(user_active_, Time::Now());
// Reports the active use time since the last crash and resets it.
kernel_crash_interval_->Flush();
frequency_counters_[kMetricKernelCrashesDailyName]->Update(1);
frequency_counters_[kMetricKernelCrashesWeeklyName]->Update(1);
frequency_counters_[kMetricAnyCrashesDailyName]->Update(1);
frequency_counters_[kMetricAnyCrashesWeeklyName]->Update(1);
}
void MetricsDaemon::ProcessUncleanShutdown() {
// Counts the active use time up to now.
SetUserActiveState(user_active_, Time::Now());
// Reports the active use time since the last crash and resets it.
unclean_shutdown_interval_->Flush();
frequency_counters_[kMetricUncleanShutdownsDailyName]->Update(1);
frequency_counters_[kMetricUncleanShutdownsWeeklyName]->Update(1);
frequency_counters_[kMetricAnyCrashesDailyName]->Update(1);
frequency_counters_[kMetricAnyCrashesWeeklyName]->Update(1);
}
bool MetricsDaemon::CheckSystemCrash(const string& crash_file) {
FilePath crash_detected(crash_file);
if (!file_util::PathExists(crash_detected))
return false;
// Deletes the crash-detected file so that the daemon doesn't report
// another kernel crash in case it's restarted.
file_util::Delete(crash_detected,
false); // recursive
return true;
}
// static
gboolean MetricsDaemon::UseMonitorStatic(gpointer data) {
return static_cast<MetricsDaemon*>(data)->UseMonitor() ? TRUE : FALSE;
}
bool MetricsDaemon::UseMonitor() {
SetUserActiveState(user_active_, Time::Now());
// If a new monitor source/instance is scheduled, returns false to
// tell GLib to destroy this monitor source/instance. Returns true
// otherwise to keep calling back this monitor.
return !ScheduleUseMonitor(usemon_interval_ * 2, /* backoff */ true);
}
bool MetricsDaemon::ScheduleUseMonitor(int interval, bool backoff)
{
if (testing_)
return false;
// Caps the interval -- the bigger the interval, the more active use
// time will be potentially dropped on system shutdown.
if (interval > kUseMonitorIntervalMax)
interval = kUseMonitorIntervalMax;
if (backoff) {
// Back-off mode is used by the use monitor to reschedule itself
// with exponential back-off in time. This mode doesn't create a
// new timeout source if the new interval is the same as the old
// one. Also, if a new timeout source is created, the old one is
// not destroyed explicitly here -- it will be destroyed by GLib
// when the monitor returns FALSE (see UseMonitor and
// UseMonitorStatic).
if (interval == usemon_interval_)
return false;
} else {
UnscheduleUseMonitor();
}
// Schedules a new use monitor for |interval| seconds from now.
DLOG(INFO) << "scheduling use monitor in " << interval << " seconds";
usemon_source_ = g_timeout_source_new_seconds(interval);
g_source_set_callback(usemon_source_, UseMonitorStatic, this,
NULL); // No destroy notification.
g_source_attach(usemon_source_,
NULL); // Default context.
usemon_interval_ = interval;
return true;
}
void MetricsDaemon::UnscheduleUseMonitor() {
// If there's a use monitor scheduled already, destroys it.
if (usemon_source_ == NULL)
return;
DLOG(INFO) << "destroying use monitor";
g_source_destroy(usemon_source_);
usemon_source_ = NULL;
usemon_interval_ = 0;
}
void MetricsDaemon::StatsReporterInit() {
DiskStatsReadStats(&read_sectors_, &write_sectors_);
VmStatsReadStats(&page_faults_);
// The first time around just run the long stat, so we don't delay boot.
stats_state_ = kStatsLong;
stats_initial_time_ = GetActiveTime();
if (stats_initial_time_ < 0) {
LOG(WARNING) << "not collecting disk stats";
} else {
ScheduleStatsCallback(kMetricStatsLongInterval);
}
}
void MetricsDaemon::ScheduleStatsCallback(int wait) {
if (testing_) {
return;
}
g_timeout_add_seconds(wait, StatsCallbackStatic, this);
}
bool MetricsDaemon::DiskStatsReadStats(long int* read_sectors,
long int* write_sectors) {
int nchars;
int nitems;
bool success = false;
char line[200];
if (diskstats_path_.empty()) {
return false;
}
int file = HANDLE_EINTR(open(diskstats_path_.c_str(), O_RDONLY));
if (file < 0) {
PLOG(WARNING) << "cannot open " << diskstats_path_;
return false;
}
nchars = HANDLE_EINTR(read(file, line, sizeof(line)));
if (nchars < 0) {
PLOG(WARNING) << "cannot read from " << diskstats_path_;
return false;
} else {
LOG_IF(WARNING, nchars == sizeof(line))
<< "line too long in " << diskstats_path_;
line[nchars] = '\0';
nitems = sscanf(line, "%*d %*d %ld %*d %*d %*d %ld",
read_sectors, write_sectors);
if (nitems == 2) {
success = true;
} else {
LOG(WARNING) << "found " << nitems << " items in "
<< diskstats_path_ << ", expected 2";
}
}
HANDLE_EINTR(close(file));
return success;
}
bool MetricsDaemon::VmStatsParseStats(char* stats, long int* page_faults) {
static const char kPageFaultSearchString[] = "\npgmajfault ";
bool success = false;
/* Each line in the file has the form
* <ID> <VALUE>
* for instance:
* nr_free_pages 213427
*/
char* s = strstr(stats, kPageFaultSearchString);
if (s == NULL) {
LOG(WARNING) << "cannot find page fault entry in vmstats";
} else {
char* endp;
/* Skip <ID> and space. Don't count the terminating null. */
s += sizeof(kPageFaultSearchString) - 1;
*page_faults = strtol(s, &endp, 10);
if (*endp == '\n') {
success = true;
} else {
LOG(WARNING) << "error parsing vmstats";
}
}
return success;
}
bool MetricsDaemon::VmStatsReadStats(long int* page_faults) {
char buffer[4000];
int nchars;
int success = false;
if (testing_) {
return false;
}
int file = HANDLE_EINTR(open(vmstats_path_.c_str(), O_RDONLY));
if (file < 0) {
PLOG(WARNING) << "cannot open " << vmstats_path_;
return false;
}
nchars = HANDLE_EINTR(read(file, buffer, sizeof(buffer) - 1));
LOG_IF(WARNING, nchars == sizeof(buffer) - 1)
<< "file too large in " << vmstats_path_;
if (nchars < 0) {
PLOG(WARNING) << "cannot read from " << vmstats_path_;
} else if (nchars == 0) {
LOG(WARNING) << vmstats_path_ << " is empty";
} else {
buffer[nchars] = '\0';
success = VmStatsParseStats(buffer, page_faults);
}
HANDLE_EINTR(close(file));
return success;
}
// static
gboolean MetricsDaemon::StatsCallbackStatic(void* handle) {
(static_cast<MetricsDaemon*>(handle))->StatsCallback();
return false; // one-time callback
}
// Collects disk and vm stats alternating over a short and a long interval.
void MetricsDaemon::StatsCallback() {
long int read_sectors_now, write_sectors_now, page_faults_now;
double time_now = GetActiveTime();
double delta_time = time_now - stats_initial_time_;
if (testing_) {
// Fake the time when testing.
delta_time = stats_state_ == kStatsShort ?
kMetricStatsShortInterval : kMetricStatsLongInterval;
}
bool diskstats_success = DiskStatsReadStats(&read_sectors_now,
&write_sectors_now);
int delta_read = read_sectors_now - read_sectors_;
int delta_write = write_sectors_now - write_sectors_;
int read_sectors_per_second = delta_read / delta_time;
int write_sectors_per_second = delta_write / delta_time;
bool vmstats_success = VmStatsReadStats(&page_faults_now);
int delta_faults = page_faults_now - page_faults_;
int page_faults_per_second = delta_faults / delta_time;
switch (stats_state_) {
case kStatsShort:
if (diskstats_success) {
SendMetric(kMetricReadSectorsShortName,
read_sectors_per_second,
1,
kMetricSectorsIOMax,
kMetricSectorsBuckets);
SendMetric(kMetricWriteSectorsShortName,
write_sectors_per_second,
1,
kMetricSectorsIOMax,
kMetricSectorsBuckets);
}
if (vmstats_success) {
SendMetric(kMetricPageFaultsShortName,
page_faults_per_second,
1,
kMetricPageFaultsMax,
kMetricPageFaultsBuckets);
}
// Schedule long callback.
stats_state_ = kStatsLong;
ScheduleStatsCallback(kMetricStatsLongInterval -
kMetricStatsShortInterval);
break;
case kStatsLong:
if (diskstats_success) {
SendMetric(kMetricReadSectorsLongName,
read_sectors_per_second,
1,
kMetricSectorsIOMax,
kMetricSectorsBuckets);
SendMetric(kMetricWriteSectorsLongName,
write_sectors_per_second,
1,
kMetricSectorsIOMax,
kMetricSectorsBuckets);
// Reset sector counters.
read_sectors_ = read_sectors_now;
write_sectors_ = write_sectors_now;
}
if (vmstats_success) {
SendMetric(kMetricPageFaultsLongName,
page_faults_per_second,
1,
kMetricPageFaultsMax,
kMetricPageFaultsBuckets);
page_faults_ = page_faults_now;
}
// Set start time for new cycle.
stats_initial_time_ = time_now;
// Schedule short callback.
stats_state_ = kStatsShort;
ScheduleStatsCallback(kMetricStatsShortInterval);
break;
default:
LOG(FATAL) << "Invalid stats state";
}
}
void MetricsDaemon::ScheduleMeminfoCallback(int wait) {
if (testing_) {
return;
}
g_timeout_add_seconds(wait, MeminfoCallbackStatic, this);
}
// static
gboolean MetricsDaemon::MeminfoCallbackStatic(void* handle) {
return (static_cast<MetricsDaemon*>(handle))->MeminfoCallback();
}
bool MetricsDaemon::MeminfoCallback() {
string meminfo_raw;
const FilePath meminfo_path("/proc/meminfo");
if (!file_util::ReadFileToString(meminfo_path, &meminfo_raw)) {
LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
return false;
}
return ProcessMeminfo(meminfo_raw);
}
bool MetricsDaemon::ProcessMeminfo(const string& meminfo_raw) {
static const MeminfoRecord fields_array[] = {
{ "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory
{ "MemFree", "MemFree" },
{ "Buffers", "Buffers" },
{ "Cached", "Cached" },
// { "SwapCached", "SwapCached" },
{ "Active", "Active" },
{ "Inactive", "Inactive" },
{ "ActiveAnon", "Active(anon)" },
{ "InactiveAnon", "Inactive(anon)" },
{ "ActiveFile" , "Active(file)" },
{ "InactiveFile", "Inactive(file)" },
{ "Unevictable", "Unevictable", kMeminfoOp_HistLog },
// { "Mlocked", "Mlocked" },
{ "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal },
{ "SwapFree", "SwapFree", kMeminfoOp_SwapFree },
// { "Dirty", "Dirty" },
// { "Writeback", "Writeback" },
{ "AnonPages", "AnonPages" },
{ "Mapped", "Mapped" },
{ "Shmem", "Shmem", kMeminfoOp_HistLog },
{ "Slab", "Slab", kMeminfoOp_HistLog },
// { "SReclaimable", "SReclaimable" },
// { "SUnreclaim", "SUnreclaim" },
};
vector<MeminfoRecord> fields(fields_array,
fields_array + arraysize(fields_array));
if (!FillMeminfo(meminfo_raw, &fields)) {
return false;
}
int total_memory = fields[0].value;
if (total_memory == 0) {
// this "cannot happen"
LOG(WARNING) << "borked meminfo parser";
return false;
}
int swap_total = 0;
int swap_free = 0;
// Send all fields retrieved, except total memory.
for (unsigned int i = 1; i < fields.size(); i++) {
string metrics_name = StringPrintf("Platform.Meminfo%s", fields[i].name);
int percent;
switch (fields[i].op) {
case kMeminfoOp_HistPercent:
// report value as percent of total memory
percent = fields[i].value * 100 / total_memory;
SendLinearMetric(metrics_name, percent, 100, 101);
break;
case kMeminfoOp_HistLog:
// report value in kbytes, log scale, 4Gb max
SendMetric(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100);
break;
case kMeminfoOp_SwapTotal:
swap_total = fields[i].value;
case kMeminfoOp_SwapFree:
swap_free = fields[i].value;
break;
}
}
if (swap_total > 0) {
int swap_used = swap_total - swap_free;
int swap_used_percent = swap_used * 100 / swap_total;
SendMetric("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100);
SendLinearMetric("Platform.MeminfoSwapUsedPercent", swap_used_percent,
100, 101);
}
return true;
}
bool MetricsDaemon::FillMeminfo(const string& meminfo_raw,
vector<MeminfoRecord>* fields) {
vector<string> lines;
unsigned int nlines = Tokenize(meminfo_raw, "\n", &lines);
// Scan meminfo output and collect field values. Each field name has to
// match a meminfo entry (case insensitive) after removing non-alpha
// characters from the entry.
unsigned int ifield = 0;
for (unsigned int iline = 0;
iline < nlines && ifield < fields->size();
iline++) {
vector<string> tokens;
Tokenize(lines[iline], ": ", &tokens);
if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) {
// Name matches. Parse value and save.
char* rest;
(*fields)[ifield].value =
static_cast<int>(strtol(tokens[1].c_str(), &rest, 10));
if (*rest != '\0') {
LOG(WARNING) << "missing meminfo value";
return false;
}
ifield++;
}
}
if (ifield < fields->size()) {
// End of input reached while scanning.
LOG(WARNING) << "cannot find field " << (*fields)[ifield].match
<< " and following";
return false;
}
return true;
}
void MetricsDaemon::ScheduleMemuseCallback(bool new_callback,
double time_elapsed) {
if (testing_) {
return;
}
int interval = kMemuseIntervals[memuse_interval_index_];
int wait;
if (new_callback) {
memuse_initial_time_ = GetActiveTime();
wait = interval;
} else {
wait = ceil(interval - time_elapsed); // round up
}
g_timeout_add_seconds(wait, MemuseCallbackStatic, this);
}
// static
gboolean MetricsDaemon::MemuseCallbackStatic(void* handle) {
MetricsDaemon* daemon = static_cast<MetricsDaemon*>(handle);
daemon->MemuseCallback();
return false;
}
void MetricsDaemon::MemuseCallback() {
// Since we only care about active time (i.e. uptime minus sleep time) but
// the callbacks are driven by real time (uptime), we check if we should
// reschedule this callback due to intervening sleep periods.
double now = GetActiveTime();
double active_time = now - memuse_initial_time_;
if (active_time < kMemuseIntervals[memuse_interval_index_]) {
// Not enough active time has passed. Reschedule the callback.
ScheduleMemuseCallback(false, active_time);
} else {
// Enough active time has passed. Do the work, and (if we succeed) see if
// we need to do more.
if (MemuseCallbackWork() &&
memuse_interval_index_ < arraysize(kMemuseIntervals)) {
memuse_interval_index_++;
ScheduleMemuseCallback(true, 0);
}
}
}
bool MetricsDaemon::MemuseCallbackWork() {
string meminfo_raw;
const FilePath meminfo_path("/proc/meminfo");
if (!file_util::ReadFileToString(meminfo_path, &meminfo_raw)) {
LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
return false;
}
return ProcessMemuse(meminfo_raw);
}
bool MetricsDaemon::ProcessMemuse(const string& meminfo_raw) {
static const MeminfoRecord fields_array[] = {
{ "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory
{ "ActiveAnon", "Active(anon)" },
{ "InactiveAnon", "Inactive(anon)" },
};
vector<MeminfoRecord> fields(fields_array,
fields_array + arraysize(fields_array));
if (!FillMeminfo(meminfo_raw, &fields)) {
return false;
}
int total = fields[0].value;
int active_anon = fields[1].value;
int inactive_anon = fields[2].value;
if (total == 0) {
// this "cannot happen"
LOG(WARNING) << "borked meminfo parser";
return false;
}
string metrics_name = StringPrintf("Platform.MemuseAnon%d",
memuse_interval_index_);
SendLinearMetric(metrics_name, (active_anon + inactive_anon) * 100 / total,
100, 101);
return true;
}
// static
void MetricsDaemon::ReportDailyUse(void* handle, int tag, int count) {
if (count <= 0)
return;
MetricsDaemon* daemon = static_cast<MetricsDaemon*>(handle);
int minutes = (count + kSecondsPerMinute / 2) / kSecondsPerMinute;
daemon->SendMetric(kMetricDailyUseTimeName, minutes,
kMetricDailyUseTimeMin,
kMetricDailyUseTimeMax,
kMetricDailyUseTimeBuckets);
}
void MetricsDaemon::SendMetric(const string& name, int sample,
int min, int max, int nbuckets) {
DLOG(INFO) << "received metric: " << name << " " << sample << " "
<< min << " " << max << " " << nbuckets;
metrics_lib_->SendToUMA(name, sample, min, max, nbuckets);
}
void MetricsDaemon::SendLinearMetric(const string& name, int sample,
int max, int nbuckets) {
DLOG(INFO) << "received linear metric: " << name << " " << sample << " "
<< max << " " << nbuckets;
// TODO(semenzato): add a proper linear histogram to the Chrome external
// metrics API.
LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale";
metrics_lib_->SendEnumToUMA(name, sample, max);
}