metricsd: Persist the metrics to disk periodically.
Every now and then (5 minutes by default), the uploader will persist the current metrics to disk to avoid losing them in case we exit unexpectedly (reboot or crash). When starting up, metricsd will load the previously saved log and resume the metrics collection from there. Bug: 25670584 Test: Unit tests. Test: manual: restart metricsd. The saved log is detected and parsed correctly. Test: manual: Send a sample to metricsd, send SIGTERM to metricsd, the log is saved to disk, metricsd restarts and picks up the log where it left. Change-Id: I4cefc62c7ea1fa51333d84d8a7ba0a2e9c7fd58f
This commit is contained in:
parent
b6c77af499
commit
0586504e01
7 changed files with 170 additions and 40 deletions
|
@ -26,6 +26,7 @@ static const char kMetricsGUIDFileName[] = "Sysinfo.GUID";
|
|||
static const char kMetricsServer[] = "https://clients4.google.com/uma/v2";
|
||||
static const char kConsentFileName[] = "enabled";
|
||||
static const char kStagedLogName[] = "staged_log";
|
||||
static const char kSavedLogName[] = "saved_log";
|
||||
static const char kFailedUploadCountName[] = "failed_upload_count";
|
||||
static const char kDefaultVersion[] = "0.0.0.0";
|
||||
|
||||
|
|
|
@ -33,10 +33,13 @@ int main(int argc, char** argv) {
|
|||
|
||||
// Upload Service flags.
|
||||
DEFINE_int32(upload_interval_secs, 1800,
|
||||
"Interval at which metrics_daemon sends the metrics. (needs "
|
||||
"-uploader)");
|
||||
"Interval at which metricsd uploads the metrics.");
|
||||
DEFINE_int32(disk_persistence_interval_secs, 300,
|
||||
"Interval at which metricsd saves the aggregated metrics to "
|
||||
"disk to avoid losing them if metricsd stops in between "
|
||||
"two uploads.");
|
||||
DEFINE_string(server, metrics::kMetricsServer,
|
||||
"Server to upload the metrics to. (needs -uploader)");
|
||||
"Server to upload the metrics to.");
|
||||
DEFINE_string(private_directory, metrics::kMetricsdDirectory,
|
||||
"Path to the private directory used by metricsd "
|
||||
"(testing only)");
|
||||
|
@ -72,6 +75,7 @@ int main(int argc, char** argv) {
|
|||
|
||||
UploadService upload_service(
|
||||
FLAGS_server, base::TimeDelta::FromSeconds(FLAGS_upload_interval_secs),
|
||||
base::TimeDelta::FromSeconds(FLAGS_disk_persistence_interval_secs),
|
||||
base::FilePath(FLAGS_private_directory),
|
||||
base::FilePath(FLAGS_shared_directory));
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
#include <base/files/file_util.h>
|
||||
|
||||
#include "uploader/proto/system_profile.pb.h"
|
||||
#include "uploader/system_profile_setter.h"
|
||||
|
||||
|
@ -27,6 +29,40 @@ MetricsLog::MetricsLog()
|
|||
: MetricsLogBase("", 0, metrics::MetricsLogBase::ONGOING_LOG, "") {
|
||||
}
|
||||
|
||||
bool MetricsLog::LoadFromFile(const base::FilePath& saved_log) {
|
||||
std::string encoded_log;
|
||||
if (!base::ReadFileToString(saved_log, &encoded_log)) {
|
||||
LOG(ERROR) << "Failed to read the metrics log backup from "
|
||||
<< saved_log.value();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uma_proto()->ParseFromString(encoded_log)) {
|
||||
LOG(ERROR) << "Failed to parse log from " << saved_log.value()
|
||||
<< ", deleting the log";
|
||||
base::DeleteFile(saved_log, false);
|
||||
uma_proto()->Clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
VLOG(1) << uma_proto()->histogram_event_size() << " histograms loaded from "
|
||||
<< saved_log.value();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetricsLog::SaveToFile(const base::FilePath& path) {
|
||||
std::string encoded_log;
|
||||
GetEncodedLog(&encoded_log);
|
||||
|
||||
if (static_cast<int>(encoded_log.size()) !=
|
||||
base::WriteFile(path, encoded_log.data(), encoded_log.size())) {
|
||||
LOG(ERROR) << "Failed to persist the current log to " << path.value();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void MetricsLog::IncrementUserCrashCount(unsigned int count) {
|
||||
metrics::SystemProfileProto::Stability* stability(
|
||||
uma_proto()->mutable_system_profile()->mutable_stability());
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
#include <base/files/file_path.h>
|
||||
#include <base/macros.h>
|
||||
|
||||
#include "uploader/metrics_log_base.h"
|
||||
|
@ -44,8 +45,15 @@ class MetricsLog : public metrics::MetricsLogBase {
|
|||
// Populate the system profile with system information using setter.
|
||||
bool PopulateSystemProfile(SystemProfileSetter* setter);
|
||||
|
||||
// Load the log from |path|.
|
||||
bool LoadFromFile(const base::FilePath& path);
|
||||
|
||||
// Save this log to |path|.
|
||||
bool SaveToFile(const base::FilePath& path);
|
||||
|
||||
private:
|
||||
friend class UploadServiceTest;
|
||||
FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
|
||||
FRIEND_TEST(UploadServiceTest, LogContainsAggregatedValues);
|
||||
FRIEND_TEST(UploadServiceTest, LogContainsCrashCounts);
|
||||
FRIEND_TEST(UploadServiceTest, LogKernelCrash);
|
||||
|
|
|
@ -42,6 +42,7 @@ const int UploadService::kMaxFailedUpload = 10;
|
|||
|
||||
UploadService::UploadService(const std::string& server,
|
||||
const base::TimeDelta& upload_interval,
|
||||
const base::TimeDelta& disk_persistence_interval,
|
||||
const base::FilePath& private_metrics_directory,
|
||||
const base::FilePath& shared_metrics_directory)
|
||||
: brillo::Daemon(),
|
||||
|
@ -51,11 +52,19 @@ UploadService::UploadService(const std::string& server,
|
|||
private_metrics_directory),
|
||||
counters_(new CrashCounters),
|
||||
upload_interval_(upload_interval),
|
||||
disk_persistence_interval_(disk_persistence_interval),
|
||||
metricsd_service_runner_(counters_) {
|
||||
staged_log_path_ = private_metrics_directory.Append(metrics::kStagedLogName);
|
||||
saved_log_path_ = private_metrics_directory.Append(metrics::kSavedLogName);
|
||||
consent_file_ = shared_metrics_directory.Append(metrics::kConsentFileName);
|
||||
}
|
||||
|
||||
void UploadService::LoadSavedLog() {
|
||||
if (base::PathExists(saved_log_path_)) {
|
||||
GetOrCreateCurrentLog()->LoadFromFile(saved_log_path_);
|
||||
}
|
||||
}
|
||||
|
||||
int UploadService::OnInit() {
|
||||
brillo::Daemon::OnInit();
|
||||
|
||||
|
@ -64,12 +73,18 @@ int UploadService::OnInit() {
|
|||
|
||||
system_profile_setter_.reset(new SystemProfileCache());
|
||||
|
||||
base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
|
||||
base::Bind(&UploadService::UploadEventCallback,
|
||||
base::Unretained(this),
|
||||
upload_interval_),
|
||||
base::MessageLoop::current()->PostDelayedTask(
|
||||
FROM_HERE,
|
||||
base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
|
||||
upload_interval_);
|
||||
|
||||
base::MessageLoop::current()->PostDelayedTask(
|
||||
FROM_HERE,
|
||||
base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
|
||||
disk_persistence_interval_);
|
||||
|
||||
LoadSavedLog();
|
||||
|
||||
return EX_OK;
|
||||
}
|
||||
|
||||
|
@ -78,24 +93,37 @@ void UploadService::OnShutdown(int* exit_code) {
|
|||
}
|
||||
|
||||
void UploadService::InitForTest(SystemProfileSetter* setter) {
|
||||
LoadSavedLog();
|
||||
system_profile_setter_.reset(setter);
|
||||
}
|
||||
|
||||
void UploadService::StartNewLog() {
|
||||
CHECK(!HasStagedLog()) << "the staged log should be discarded before "
|
||||
<< "starting a new metrics log";
|
||||
MetricsLog* log = new MetricsLog();
|
||||
current_log_.reset(log);
|
||||
current_log_.reset(new MetricsLog());
|
||||
}
|
||||
|
||||
void UploadService::UploadEventCallback(const base::TimeDelta& interval) {
|
||||
void UploadService::UploadEventCallback() {
|
||||
UploadEvent();
|
||||
|
||||
base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
|
||||
base::Bind(&UploadService::UploadEventCallback,
|
||||
base::Unretained(this),
|
||||
interval),
|
||||
interval);
|
||||
base::MessageLoop::current()->PostDelayedTask(
|
||||
FROM_HERE,
|
||||
base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
|
||||
upload_interval_);
|
||||
}
|
||||
|
||||
void UploadService::PersistEventCallback() {
|
||||
PersistToDisk();
|
||||
|
||||
base::MessageLoop::current()->PostDelayedTask(
|
||||
FROM_HERE,
|
||||
base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
|
||||
disk_persistence_interval_);
|
||||
}
|
||||
|
||||
void UploadService::PersistToDisk() {
|
||||
GatherHistograms();
|
||||
if (current_log_) {
|
||||
current_log_->SaveToFile(saved_log_path_);
|
||||
}
|
||||
}
|
||||
|
||||
void UploadService::UploadEvent() {
|
||||
|
@ -189,14 +217,16 @@ void UploadService::StageCurrentLog() {
|
|||
<< "log.";
|
||||
return;
|
||||
}
|
||||
std::string encoded_log;
|
||||
staged_log->GetEncodedLog(&encoded_log);
|
||||
|
||||
if (!base::DeleteFile(saved_log_path_, false)) {
|
||||
// There is a chance that we will upload the same metrics twice but, if we
|
||||
// are lucky, the backup should be overridden before that. In doubt, try not
|
||||
// to lose any metrics.
|
||||
LOG(ERROR) << "failed to delete the last backup of the current log.";
|
||||
}
|
||||
|
||||
failed_upload_count_.Set(0);
|
||||
if (static_cast<int>(encoded_log.size()) != base::WriteFile(
|
||||
staged_log_path_, encoded_log.data(), encoded_log.size())) {
|
||||
LOG(ERROR) << "failed to persist to " << staged_log_path_.value();
|
||||
}
|
||||
staged_log->SaveToFile(staged_log_path_);
|
||||
}
|
||||
|
||||
MetricsLog* UploadService::GetOrCreateCurrentLog() {
|
||||
|
|
|
@ -66,6 +66,7 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
|
|||
public:
|
||||
UploadService(const std::string& server,
|
||||
const base::TimeDelta& upload_interval,
|
||||
const base::TimeDelta& disk_persistence_interval,
|
||||
const base::FilePath& private_metrics_directory,
|
||||
const base::FilePath& shared_metrics_directory);
|
||||
|
||||
|
@ -79,8 +80,8 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
|
|||
// launch as it is destroyed when staging the log.
|
||||
void StartNewLog();
|
||||
|
||||
// Event callback for handling MessageLoop events.
|
||||
void UploadEventCallback(const base::TimeDelta& interval);
|
||||
// Saves the current metrics to a file.
|
||||
void PersistToDisk();
|
||||
|
||||
// Triggers an upload event.
|
||||
void UploadEvent();
|
||||
|
@ -100,6 +101,8 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
|
|||
friend class UploadServiceTest;
|
||||
|
||||
FRIEND_TEST(UploadServiceTest, CanSendMultipleTimes);
|
||||
FRIEND_TEST(UploadServiceTest, CorruptedSavedLog);
|
||||
FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
|
||||
FRIEND_TEST(UploadServiceTest, DiscardLogsAfterTooManyFailedUpload);
|
||||
FRIEND_TEST(UploadServiceTest, EmptyLogsAreNotSent);
|
||||
FRIEND_TEST(UploadServiceTest, FailedSendAreRetried);
|
||||
|
@ -111,6 +114,7 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
|
|||
FRIEND_TEST(UploadServiceTest, LogKernelCrash);
|
||||
FRIEND_TEST(UploadServiceTest, LogUncleanShutdown);
|
||||
FRIEND_TEST(UploadServiceTest, LogUserCrash);
|
||||
FRIEND_TEST(UploadServiceTest, PersistEmptyLog);
|
||||
FRIEND_TEST(UploadServiceTest, UnknownCrashIgnored);
|
||||
FRIEND_TEST(UploadServiceTest, ValuesInConfigFileAreSent);
|
||||
|
||||
|
@ -121,12 +125,21 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
|
|||
// will be discarded.
|
||||
static const int kMaxFailedUpload;
|
||||
|
||||
// Loads the log saved to disk if it exists.
|
||||
void LoadSavedLog();
|
||||
|
||||
// Resets the internal state.
|
||||
void Reset();
|
||||
|
||||
// Returns true iff metrics reporting is enabled.
|
||||
bool AreMetricsEnabled();
|
||||
|
||||
// Event callback for handling Upload events.
|
||||
void UploadEventCallback();
|
||||
|
||||
// Event callback for handling Persist events.
|
||||
void PersistEventCallback();
|
||||
|
||||
// Aggregates all histogram available in memory and store them in the current
|
||||
// log.
|
||||
void GatherHistograms();
|
||||
|
@ -156,11 +169,13 @@ class UploadService : public base::HistogramFlattener, public brillo::Daemon {
|
|||
std::shared_ptr<CrashCounters> counters_;
|
||||
|
||||
base::TimeDelta upload_interval_;
|
||||
base::TimeDelta disk_persistence_interval_;
|
||||
|
||||
MetricsdServiceRunner metricsd_service_runner_;
|
||||
|
||||
base::FilePath consent_file_;
|
||||
base::FilePath staged_log_path_;
|
||||
base::FilePath saved_log_path_;
|
||||
|
||||
bool testing_;
|
||||
};
|
||||
|
|
|
@ -45,17 +45,17 @@ class UploadServiceTest : public testing::Test {
|
|||
ASSERT_FALSE(base::StatisticsRecorder::IsActive());
|
||||
base::StatisticsRecorder::Initialize();
|
||||
|
||||
base::FilePath private_dir = dir_.path().Append("private");
|
||||
base::FilePath shared_dir = dir_.path().Append("shared");
|
||||
private_dir_ = dir_.path().Append("private");
|
||||
shared_dir_ = dir_.path().Append("shared");
|
||||
|
||||
EXPECT_TRUE(base::CreateDirectory(private_dir));
|
||||
EXPECT_TRUE(base::CreateDirectory(shared_dir));
|
||||
EXPECT_TRUE(base::CreateDirectory(private_dir_));
|
||||
EXPECT_TRUE(base::CreateDirectory(shared_dir_));
|
||||
|
||||
ASSERT_EQ(0, base::WriteFile(shared_dir.Append(metrics::kConsentFileName),
|
||||
ASSERT_EQ(0, base::WriteFile(shared_dir_.Append(metrics::kConsentFileName),
|
||||
"", 0));
|
||||
|
||||
upload_service_.reset(
|
||||
new UploadService("", base::TimeDelta(), private_dir, shared_dir));
|
||||
upload_service_.reset(new UploadService(
|
||||
"", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
|
||||
counters_ = upload_service_->counters_;
|
||||
|
||||
upload_service_->sender_.reset(new SenderMock);
|
||||
|
@ -81,15 +81,16 @@ class UploadServiceTest : public testing::Test {
|
|||
base::FilePath filepath =
|
||||
dir_.path().Append("etc/os-release.d").Append(name);
|
||||
ASSERT_TRUE(base::CreateDirectory(filepath.DirName()));
|
||||
ASSERT_EQ(
|
||||
value.size(),
|
||||
base::WriteFile(filepath, value.data(), value.size()));
|
||||
ASSERT_EQ(value.size(),
|
||||
base::WriteFile(filepath, value.data(), value.size()));
|
||||
}
|
||||
|
||||
const metrics::SystemProfileProto_Stability GetCurrentStability() {
|
||||
EXPECT_TRUE(upload_service_->current_log_.get());
|
||||
|
||||
return upload_service_->current_log_->uma_proto()->system_profile().stability();
|
||||
return upload_service_->current_log_->uma_proto()
|
||||
->system_profile()
|
||||
.stability();
|
||||
}
|
||||
|
||||
base::ScopedTempDir dir_;
|
||||
|
@ -97,6 +98,8 @@ class UploadServiceTest : public testing::Test {
|
|||
|
||||
std::unique_ptr<base::AtExitManager> exit_manager_;
|
||||
std::shared_ptr<CrashCounters> counters_;
|
||||
base::FilePath private_dir_;
|
||||
base::FilePath shared_dir_;
|
||||
};
|
||||
|
||||
TEST_F(UploadServiceTest, FailedSendAreRetried) {
|
||||
|
@ -219,10 +222,8 @@ TEST_F(UploadServiceTest, LogContainsCrashCounts) {
|
|||
}
|
||||
|
||||
TEST_F(UploadServiceTest, ExtractChannelFromString) {
|
||||
EXPECT_EQ(
|
||||
SystemProfileCache::ProtoChannelFromString(
|
||||
"developer-build"),
|
||||
metrics::SystemProfileProto::CHANNEL_UNKNOWN);
|
||||
EXPECT_EQ(SystemProfileCache::ProtoChannelFromString("developer-build"),
|
||||
metrics::SystemProfileProto::CHANNEL_UNKNOWN);
|
||||
|
||||
EXPECT_EQ(metrics::SystemProfileProto::CHANNEL_DEV,
|
||||
SystemProfileCache::ProtoChannelFromString("dev-channel"));
|
||||
|
@ -297,3 +298,38 @@ TEST_F(UploadServiceTest, ProductIdMandatory) {
|
|||
SetTestingProperty(metrics::kProductId, "hello");
|
||||
ASSERT_TRUE(cache.Initialize());
|
||||
}
|
||||
|
||||
TEST_F(UploadServiceTest, CurrentLogSavedAndResumed) {
|
||||
SendHistogram("hello", 10, 0, 100, 10);
|
||||
upload_service_->PersistToDisk();
|
||||
EXPECT_EQ(
|
||||
1, upload_service_->current_log_->uma_proto()->histogram_event().size());
|
||||
upload_service_.reset(new UploadService(
|
||||
"", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
|
||||
upload_service_->InitForTest(nullptr);
|
||||
|
||||
SendHistogram("hello", 10, 0, 100, 10);
|
||||
upload_service_->GatherHistograms();
|
||||
EXPECT_EQ(2, upload_service_->GetOrCreateCurrentLog()
|
||||
->uma_proto()
|
||||
->histogram_event()
|
||||
.size());
|
||||
}
|
||||
|
||||
TEST_F(UploadServiceTest, PersistEmptyLog) {
|
||||
upload_service_->PersistToDisk();
|
||||
EXPECT_FALSE(base::PathExists(upload_service_->saved_log_path_));
|
||||
}
|
||||
|
||||
TEST_F(UploadServiceTest, CorruptedSavedLog) {
|
||||
// Write a bogus saved log.
|
||||
EXPECT_EQ(5, base::WriteFile(upload_service_->saved_log_path_, "hello", 5));
|
||||
|
||||
upload_service_.reset(new UploadService(
|
||||
"", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
|
||||
|
||||
upload_service_->InitForTest(nullptr);
|
||||
// If the log is unreadable, we drop it and continue execution.
|
||||
ASSERT_NE(nullptr, upload_service_->GetOrCreateCurrentLog());
|
||||
ASSERT_FALSE(base::PathExists(upload_service_->saved_log_path_));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue