Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0586504e authored by Bertrand SIMONNET's avatar Bertrand SIMONNET
Browse files

metricsd: Persist the metrics to disk periodically.

Every now and then (5 minutes by default), the uploader will persist the
current metrics to disk to avoid losing them in case we exit
unexpectedly (reboot or crash).
When starting up, metricsd will load the previously saved log and resume
the metrics collection from there.

Bug: 25670584
Test: Unit tests.
Test: manual: restart metricsd. The saved log is detected and parsed
correctly.
Test: manual: Send a sample to metricsd, send SIGTERM to metricsd, the
log is saved to disk, metricsd restarts and picks up the log where it
left.

Change-Id: I4cefc62c7ea1fa51333d84d8a7ba0a2e9c7fd58f
parent b6c77af4
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ static const char kMetricsGUIDFileName[] = "Sysinfo.GUID";
static const char kMetricsServer[] = "https://clients4.google.com/uma/v2";
static const char kConsentFileName[] = "enabled";
static const char kStagedLogName[] = "staged_log";
static const char kSavedLogName[] = "saved_log";
static const char kFailedUploadCountName[] = "failed_upload_count";
static const char kDefaultVersion[] = "0.0.0.0";

+7 −3
Original line number Diff line number Diff line
@@ -33,10 +33,13 @@ int main(int argc, char** argv) {

  // Upload Service flags.
  DEFINE_int32(upload_interval_secs, 1800,
               "Interval at which metrics_daemon sends the metrics. (needs "
               "-uploader)");
               "Interval at which metricsd uploads the metrics.");
  DEFINE_int32(disk_persistence_interval_secs, 300,
               "Interval at which metricsd saves the aggregated metrics to "
               "disk to avoid losing them if metricsd stops in between "
               "two uploads.");
  DEFINE_string(server, metrics::kMetricsServer,
                "Server to upload the metrics to. (needs -uploader)");
                "Server to upload the metrics to.");
  DEFINE_string(private_directory, metrics::kMetricsdDirectory,
                "Path to the private directory used by metricsd "
                "(testing only)");
@@ -72,6 +75,7 @@ int main(int argc, char** argv) {

  UploadService upload_service(
      FLAGS_server, base::TimeDelta::FromSeconds(FLAGS_upload_interval_secs),
      base::TimeDelta::FromSeconds(FLAGS_disk_persistence_interval_secs),
      base::FilePath(FLAGS_private_directory),
      base::FilePath(FLAGS_shared_directory));

+36 −0
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@

#include <string>

#include <base/files/file_util.h>

#include "uploader/proto/system_profile.pb.h"
#include "uploader/system_profile_setter.h"

@@ -27,6 +29,40 @@ MetricsLog::MetricsLog()
    : MetricsLogBase("", 0, metrics::MetricsLogBase::ONGOING_LOG, "") {
}

bool MetricsLog::LoadFromFile(const base::FilePath& saved_log) {
  std::string encoded_log;
  if (!base::ReadFileToString(saved_log, &encoded_log)) {
    LOG(ERROR) << "Failed to read the metrics log backup from "
               << saved_log.value();
    return false;
  }

  if (!uma_proto()->ParseFromString(encoded_log)) {
    LOG(ERROR) << "Failed to parse log from " << saved_log.value()
               << ", deleting the log";
    base::DeleteFile(saved_log, false);
    uma_proto()->Clear();
    return false;
  }

  VLOG(1) << uma_proto()->histogram_event_size() << " histograms loaded from "
          << saved_log.value();

  return true;
}

bool MetricsLog::SaveToFile(const base::FilePath& path) {
  std::string encoded_log;
  GetEncodedLog(&encoded_log);

  if (static_cast<int>(encoded_log.size()) !=
      base::WriteFile(path, encoded_log.data(), encoded_log.size())) {
    LOG(ERROR) << "Failed to persist the current log to " << path.value();
    return false;
  }
  return true;
}

void MetricsLog::IncrementUserCrashCount(unsigned int count) {
  metrics::SystemProfileProto::Stability* stability(
      uma_proto()->mutable_system_profile()->mutable_stability());
+8 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@

#include <string>

#include <base/files/file_path.h>
#include <base/macros.h>

#include "uploader/metrics_log_base.h"
@@ -44,8 +45,15 @@ class MetricsLog : public metrics::MetricsLogBase {
  // Populate the system profile with system information using setter.
  bool PopulateSystemProfile(SystemProfileSetter* setter);

  // Load the log from |path|.
  bool LoadFromFile(const base::FilePath& path);

  // Save this log to |path|.
  bool SaveToFile(const base::FilePath& path);

 private:
  friend class UploadServiceTest;
  FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
  FRIEND_TEST(UploadServiceTest, LogContainsAggregatedValues);
  FRIEND_TEST(UploadServiceTest, LogContainsCrashCounts);
  FRIEND_TEST(UploadServiceTest, LogKernelCrash);
+50 −20
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@ const int UploadService::kMaxFailedUpload = 10;

UploadService::UploadService(const std::string& server,
                             const base::TimeDelta& upload_interval,
                             const base::TimeDelta& disk_persistence_interval,
                             const base::FilePath& private_metrics_directory,
                             const base::FilePath& shared_metrics_directory)
    : brillo::Daemon(),
@@ -51,11 +52,19 @@ UploadService::UploadService(const std::string& server,
                           private_metrics_directory),
      counters_(new CrashCounters),
      upload_interval_(upload_interval),
      disk_persistence_interval_(disk_persistence_interval),
      metricsd_service_runner_(counters_) {
  staged_log_path_ = private_metrics_directory.Append(metrics::kStagedLogName);
  saved_log_path_ = private_metrics_directory.Append(metrics::kSavedLogName);
  consent_file_ = shared_metrics_directory.Append(metrics::kConsentFileName);
}

void UploadService::LoadSavedLog() {
  if (base::PathExists(saved_log_path_)) {
    GetOrCreateCurrentLog()->LoadFromFile(saved_log_path_);
  }
}

int UploadService::OnInit() {
  brillo::Daemon::OnInit();

@@ -64,12 +73,18 @@ int UploadService::OnInit() {

  system_profile_setter_.reset(new SystemProfileCache());

  base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
      base::Bind(&UploadService::UploadEventCallback,
                 base::Unretained(this),
                 upload_interval_),
  base::MessageLoop::current()->PostDelayedTask(
      FROM_HERE,
      base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
      upload_interval_);

  base::MessageLoop::current()->PostDelayedTask(
      FROM_HERE,
      base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
      disk_persistence_interval_);

  LoadSavedLog();

  return EX_OK;
}

@@ -78,24 +93,37 @@ void UploadService::OnShutdown(int* exit_code) {
}

void UploadService::InitForTest(SystemProfileSetter* setter) {
  LoadSavedLog();
  system_profile_setter_.reset(setter);
}

void UploadService::StartNewLog() {
  CHECK(!HasStagedLog()) << "the staged log should be discarded before "
                         << "starting a new metrics log";
  MetricsLog* log = new MetricsLog();
  current_log_.reset(log);
  current_log_.reset(new MetricsLog());
}

void UploadService::UploadEventCallback(const base::TimeDelta& interval) {
void UploadService::UploadEventCallback() {
  UploadEvent();

  base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
      base::Bind(&UploadService::UploadEventCallback,
                 base::Unretained(this),
                 interval),
      interval);
  base::MessageLoop::current()->PostDelayedTask(
      FROM_HERE,
      base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
      upload_interval_);
}

void UploadService::PersistEventCallback() {
  PersistToDisk();

  base::MessageLoop::current()->PostDelayedTask(
      FROM_HERE,
      base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
      disk_persistence_interval_);
}

void UploadService::PersistToDisk() {
  GatherHistograms();
  if (current_log_) {
    current_log_->SaveToFile(saved_log_path_);
  }
}

void UploadService::UploadEvent() {
@@ -189,14 +217,16 @@ void UploadService::StageCurrentLog() {
                 << "log.";
    return;
  }
  std::string encoded_log;
  staged_log->GetEncodedLog(&encoded_log);

  failed_upload_count_.Set(0);
  if (static_cast<int>(encoded_log.size()) != base::WriteFile(
      staged_log_path_, encoded_log.data(), encoded_log.size())) {
    LOG(ERROR) << "failed to persist to " << staged_log_path_.value();
  if (!base::DeleteFile(saved_log_path_, false)) {
    // There is a chance that we will upload the same metrics twice but, if we
    // are lucky, the backup should be overridden before that. In doubt, try not
    // to lose any metrics.
    LOG(ERROR) << "failed to delete the last backup of the current log.";
  }

  failed_upload_count_.Set(0);
  staged_log->SaveToFile(staged_log_path_);
}

MetricsLog* UploadService::GetOrCreateCurrentLog() {
Loading