Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1bb904ed authored by Darin Petkov's avatar Darin Petkov
Browse files

Measure and report time between user-space process crashes.

BUG=none
TEST=unit tests, gmerged on the device and inspected
logs, about:histograms,etc.

Review URL: http://codereview.chromium.org/2736008
parent f1e85e49
Loading
Loading
Loading
Loading
+21 −12
Original line number Diff line number Diff line
@@ -58,11 +58,16 @@ void TaggedCounter::Flush() {
}

void TaggedCounter::UpdateInternal(int tag, int count, bool flush) {
  if (flush) {
    // Flushing but record is null, so nothing to do.
    if (record_state_ == kRecordNull)
      return;
  } else {
    // If there's no new data and the last record in the aggregation
    // file is with the same tag, there's nothing to do.
  if (!flush && count <= 0 &&
      record_state_ == kRecordValid && record_.tag() == tag)
    if (count <= 0 && record_state_ == kRecordValid && record_.tag() == tag)
      return;
  }

  DLOG(INFO) << "tag: " << tag << " count: " << count << " flush: " << flush;
  DCHECK(filename_);
@@ -78,7 +83,7 @@ void TaggedCounter::UpdateInternal(int tag, int count, bool flush) {

  ReadRecord(fd);
  ReportRecord(tag, flush);
  UpdateRecord(tag, count);
  UpdateRecord(tag, count, flush);
  WriteRecord(fd);

  HANDLE_EINTR(close(fd));
@@ -89,7 +94,7 @@ void TaggedCounter::ReadRecord(int fd) {
    return;

  if (HANDLE_EINTR(read(fd, &record_, sizeof(record_))) == sizeof(record_)) {
    if (record_.count() > 0) {
    if (record_.count() >= 0) {
      record_state_ = kRecordValid;
      return;
    }
@@ -106,7 +111,7 @@ void TaggedCounter::ReadRecord(int fd) {
void TaggedCounter::ReportRecord(int tag, bool flush) {
  // If no valid record, there's nothing to report.
  if (record_state_ != kRecordValid) {
    DCHECK(record_state_ == kRecordNull);
    DCHECK_EQ(record_state_, kRecordNull);
    return;
  }

@@ -121,9 +126,11 @@ void TaggedCounter::ReportRecord(int tag, bool flush) {
  record_state_ = kRecordNullDirty;
}

void TaggedCounter::UpdateRecord(int tag, int count) {
  if (count <= 0)
void TaggedCounter::UpdateRecord(int tag, int count, bool flush) {
  if (flush) {
    DCHECK(record_state_ == kRecordNull || record_state_ == kRecordNullDirty);
    return;
  }

  switch (record_state_) {
    case kRecordNull:
@@ -137,8 +144,10 @@ void TaggedCounter::UpdateRecord(int tag, int count) {
      // If there's an existing record for the current tag,
      // accumulates the counts.
      DCHECK_EQ(record_.tag(), tag);
      if (count > 0) {
        record_.Add(count);
        record_state_ = kRecordValidDirty;
      }
      break;

    default:
+4 −4
Original line number Diff line number Diff line
@@ -22,8 +22,7 @@ class TaggedCounterInterface {
 public:
  // Callback type used for reporting aggregated or flushed data.
  // Once this callback is invoked by the counter, the reported
  // aggregated data is discarded. Only aggregated data with positive
  // counts is reported.
  // aggregated data is discarded.
  //
  // |handle| is the |reporter_handle| pointer passed through Init.
  // |tag| is the tag associated with the aggregated count.
@@ -124,8 +123,9 @@ class TaggedCounter : public TaggedCounterInterface {

  // Updates the cached record given the new |tag| and |count|. This
  // method expects either a null cached record, or a valid cached
  // record with the same tag as |tag|.
  void UpdateRecord(int tag, int count);
  // record with the same tag as |tag|. If |flush| is true, the method
  // asserts that the cached record is null and returns.
  void UpdateRecord(int tag, int count, bool flush);

  // If the cached record state is dirty, updates the persistent
  // storage specified through file descriptor |fd| and switches the
+4 −4
Original line number Diff line number Diff line
@@ -230,8 +230,8 @@ TEST_F(TaggedCounterTest, InitFromFile) {
  ExpectReporterCall(/* tag */ 31, /* count */ 60);
  counter_.Init(kTestRecordFile, &Reporter, this);
  counter_.Update(/* tag */ 32, /* count */ 0);
  EXPECT_TRUE(AssertNoOrEmptyRecordFile());
  EXPECT_EQ(TaggedCounter::kRecordNull, counter_.record_state_);
  EXPECT_PRED_FORMAT2(AssertRecord, /* day */ 32, /* seconds */ 0);
  EXPECT_EQ(TaggedCounter::kRecordValid, counter_.record_state_);
}

TEST_F(TaggedCounterTest, Update) {
@@ -250,8 +250,8 @@ TEST_F(TaggedCounterTest, Update) {

  ExpectReporterCall(/* tag */ 21, /* count */ 15);
  counter_.Update(/* tag */ 22, /* count */ 0);
  EXPECT_TRUE(AssertNoOrEmptyRecordFile());
  EXPECT_EQ(TaggedCounter::kRecordNull, counter_.record_state_);
  EXPECT_PRED_FORMAT2(AssertRecord, /* day */ 22, /* seconds */ 0);
  EXPECT_EQ(TaggedCounter::kRecordValid, counter_.record_state_);
}

}  // namespace chromeos_metrics
+48 −2
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ using base::TimeDelta;
using base::TimeTicks;

#define SAFE_MESSAGE(e) (e.message ? e.message : "unknown error")
#define DBUS_IFACE_CRASH_REPORTER "org.chromium.CrashReporter"
#define DBUS_IFACE_FLIMFLAM_MANAGER "org.chromium.flimflam.Manager"
#define DBUS_IFACE_POWER_MANAGER "org.chromium.PowerManager"
#define DBUS_IFACE_SESSION_MANAGER "org.chromium.SessionManagerInterface"
@@ -23,11 +24,16 @@ using base::TimeTicks;
// TODO(petkov): This file should probably live in a user-specific stateful
// location, e.g., /home/chronos/user.
static const char kDailyUseRecordFile[] = "/var/log/metrics/daily-usage";
static const char kUserCrashIntervalRecordFile[] =
    "/var/log/metrics/user-crash-interval";

static const int kSecondsPerMinute = 60;
static const int kMinutesPerHour = 60;
static const int kHoursPerDay = 24;
static const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour;
static const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay;
static const int kDaysPerWeek = 7;
static const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek;

// The daily use monitor is scheduled to a 1-minute interval after
// initial user activity and then it's exponentially backed off to
@@ -52,8 +58,19 @@ const int MetricsDaemon::kMetricTimeToNetworkDropMax =
    8 /* hours */ * kMinutesPerHour * kSecondsPerMinute;
const int MetricsDaemon::kMetricTimeToNetworkDropBuckets = 50;

const char MetricsDaemon::kMetricUserCrashIntervalName[] =
    "Logging.UserCrashInterval";
const int MetricsDaemon::kMetricUserCrashIntervalMin = 1;
const int MetricsDaemon::kMetricUserCrashIntervalMax = 4 * kSecondsPerWeek;
const int MetricsDaemon::kMetricUserCrashIntervalBuckets = 50;

// static
const char* MetricsDaemon::kDBusMatches_[] = {
  "type='signal',"
  "interface='" DBUS_IFACE_CRASH_REPORTER "',"
  "path='/',"
  "member='UserCrash'",

  "type='signal',"
  "sender='org.chromium.flimflam',"
  "interface='" DBUS_IFACE_FLIMFLAM_MANAGER "',"
@@ -111,6 +128,9 @@ void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib) {
  metrics_lib_ = metrics_lib;
  daily_use_.reset(new chromeos_metrics::TaggedCounter());
  daily_use_->Init(kDailyUseRecordFile, &DailyUseReporter, this);
  user_crash_interval_.reset(new chromeos_metrics::TaggedCounter());
  user_crash_interval_->Init(kUserCrashIntervalRecordFile,
                             &UserCrashIntervalReporter, this);

  // Don't setup D-Bus and GLib in test mode.
  if (testing)
@@ -130,7 +150,7 @@ void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib) {
  dbus_connection_setup_with_g_main(connection, NULL);

  // Registers D-Bus matches for the signals we would like to catch.
  for (unsigned int m = 0; m < sizeof(kDBusMatches_) / sizeof(char*); m++) {
  for (unsigned int m = 0; m < arraysize(kDBusMatches_); m++) {
    const char* match = kDBusMatches_[m];
    DLOG(INFO) << "adding dbus match: " << match;
    dbus_bus_add_match(connection, match, &error);
@@ -171,7 +191,11 @@ DBusHandlerResult MetricsDaemon::MessageFilter(DBusConnection* connection,

  DBusMessageIter iter;
  dbus_message_iter_init(message, &iter);
  if (strcmp(interface, DBUS_IFACE_FLIMFLAM_MANAGER) == 0) {
  if (strcmp(interface, DBUS_IFACE_CRASH_REPORTER) == 0) {
    CHECK(strcmp(dbus_message_get_member(message),
                 "UserCrash") == 0);
    daemon->ProcessUserCrash();
  } else if (strcmp(interface, DBUS_IFACE_FLIMFLAM_MANAGER) == 0) {
    CHECK(strcmp(dbus_message_get_member(message),
                 "StateChanged") == 0);

@@ -295,6 +319,7 @@ void MetricsDaemon::SetUserActiveState(bool active, Time now) {
  TimeDelta since_epoch = now - Time();
  int day = since_epoch.InDays();
  daily_use_->Update(day, seconds);
  user_crash_interval_->Update(0, seconds);

  // Schedules a use monitor on inactive->active transitions and
  // unschedules it on active->inactive transitions.
@@ -309,6 +334,14 @@ void MetricsDaemon::SetUserActiveState(bool active, Time now) {
  user_active_last_ = now;
}

void MetricsDaemon::ProcessUserCrash() {
  // Counts the active use time up to now.
  SetUserActiveState(user_active_, Time::Now());

  // Reports the active use time since the last crash and resets it.
  user_crash_interval_->Flush();
}

// static
gboolean MetricsDaemon::UseMonitorStatic(gpointer data) {
  return static_cast<MetricsDaemon*>(data)->UseMonitor() ? TRUE : FALSE;
@@ -371,6 +404,9 @@ void MetricsDaemon::UnscheduleUseMonitor() {

// static
void MetricsDaemon::DailyUseReporter(void* handle, int tag, int count) {
  if (count <= 0)
    return;

  MetricsDaemon* daemon = static_cast<MetricsDaemon*>(handle);
  int minutes = (count + kSecondsPerMinute / 2) / kSecondsPerMinute;
  daemon->SendMetric(kMetricDailyUseTimeName, minutes,
@@ -379,6 +415,16 @@ void MetricsDaemon::DailyUseReporter(void* handle, int tag, int count) {
                     kMetricDailyUseTimeBuckets);
}

// static
void MetricsDaemon::UserCrashIntervalReporter(void* handle,
                                              int tag, int count) {
  MetricsDaemon* daemon = static_cast<MetricsDaemon*>(handle);
  daemon->SendMetric(kMetricUserCrashIntervalName, count,
                     kMetricUserCrashIntervalMin,
                     kMetricUserCrashIntervalMax,
                     kMetricUserCrashIntervalBuckets);
}

void MetricsDaemon::SendMetric(const std::string& name, int sample,
                               int min, int max, int nbuckets) {
  DLOG(INFO) << "received metric: " << name << " " << sample << " "
+23 −3
Original line number Diff line number Diff line
@@ -40,11 +40,13 @@ class MetricsDaemon {
  FRIEND_TEST(MetricsDaemonTest, NetStateChangedSimpleDrop);
  FRIEND_TEST(MetricsDaemonTest, NetStateChangedSuspend);
  FRIEND_TEST(MetricsDaemonTest, PowerStateChanged);
  FRIEND_TEST(MetricsDaemonTest, ProcessUserCrash);
  FRIEND_TEST(MetricsDaemonTest, ScreenSaverStateChanged);
  FRIEND_TEST(MetricsDaemonTest, SendMetric);
  FRIEND_TEST(MetricsDaemonTest, SessionStateChanged);
  FRIEND_TEST(MetricsDaemonTest, SetUserActiveState);
  FRIEND_TEST(MetricsDaemonTest, SetUserActiveStateTimeJump);
  FRIEND_TEST(MetricsDaemonTest, UserCrashIntervalReporter);

  // The network states (see network_states.h).
  enum NetworkState {
@@ -87,6 +89,10 @@ class MetricsDaemon {
  static const int kMetricTimeToNetworkDropMin;
  static const int kMetricTimeToNetworkDropMax;
  static const int kMetricTimeToNetworkDropBuckets;
  static const char kMetricUserCrashIntervalName[];
  static const int kMetricUserCrashIntervalMin;
  static const int kMetricUserCrashIntervalMax;
  static const int kMetricUserCrashIntervalBuckets;

  // D-Bus message match strings.
  static const char* kDBusMatches_[];
@@ -142,6 +148,10 @@ class MetricsDaemon {
  // the usage file, the |seconds| are accumulated.
  void LogDailyUseRecord(int day, int seconds);

  // Updates the active use time and logs time between user-space
  // process crashes.
  void ProcessUserCrash();

  // Callbacks for the daily use monitor. The daily use monitor uses
  // LogDailyUseRecord to aggregate current usage data and send it to
  // UMA, if necessary. It also reschedules itself using an
@@ -169,8 +179,14 @@ class MetricsDaemon {
  void SendMetric(const std::string& name, int sample,
                  int min, int max, int nbuckets);

  // TaggedCounter callback to process aggregated daily usage data and
  // send to UMA.
  static void DailyUseReporter(void* data, int tag, int count);

  // TaggedCounter callback to process time between user-space process
  // crashes and send to UMA.
  static void UserCrashIntervalReporter(void* data, int tag, int count);

  // Test mode.
  bool testing_;

@@ -195,13 +211,17 @@ class MetricsDaemon {
  // started, screen is not locked.
  bool user_active_;

  // Timestamps last user active update.  Active use time is
  // aggregated each day before sending to UMA so using time since the
  // epoch as the timestamp.
  // Timestamps last user active update. Active use time is aggregated
  // each day before sending to UMA so using time since the epoch as
  // the timestamp.
  base::Time user_active_last_;

  // Daily active use time in seconds.
  scoped_ptr<chromeos_metrics::TaggedCounterInterface> daily_use_;

  // Active use time between user-space process crashes.
  scoped_ptr<chromeos_metrics::TaggedCounterInterface> user_crash_interval_;

  // Sleep period until the next daily usage aggregation performed by
  // the daily use monitor (see ScheduleUseMonitor).
  int usemon_interval_;
Loading