Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2e414b99 authored by David Chen's avatar David Chen
Browse files

Adds anomaly detection to max duration metrics.

Statsd supports anomaly detection for max duration, which is
conceptually like asking for an anomaly on the current duration.
There was a bug previously in the logic for max duration since we
recorded a duration every time a dimension went into pause. Now, we
only record a max duration when the dimension goes into stop.

When a dimension inside the MaxDurationTracker leaves the started
state, we need to check if we still need to keep an anomaly alarm
set. It's possible that we just immediately set an alarm with the
same timestamp as what was just deleted.

Test: Added some unit-tests and tested on marlin-eng.
Change-Id: I09c82cd266b4cc6e1c893ad166e602bf08baca0e
parent f4546316
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -72,7 +72,8 @@ protected:
    FRIEND_TEST(OringDurationTrackerTest, TestAnomalyDetectionExpiredAlarm);
    FRIEND_TEST(OringDurationTrackerTest, TestAnomalyDetectionFiredAlarm);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyPredictedTimestamp);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyPredictedTimestamp_UpdatedOnStop);
};

}  // namespace statsd
+41 −8
Original line number Diff line number Diff line
@@ -93,6 +93,7 @@ void MaxDurationTracker::noteStart(const HashableDimensionKey& key, bool conditi
            } else {
                duration.state = DurationState::kStarted;
                duration.lastStartTime = eventTime;
                startAnomalyAlarm(eventTime);
            }
            duration.startCount = 1;
            break;
@@ -116,12 +117,18 @@ void MaxDurationTracker::noteStop(const HashableDimensionKey& key, const uint64_
        case DurationState::kStarted: {
            duration.startCount--;
            if (forceStop || !mNested || duration.startCount <= 0) {
                stopAnomalyAlarm();
                duration.state = DurationState::kStopped;
                int64_t durationTime = eventTime - duration.lastStartTime;
                VLOG("Max, key %s, Stop %lld %lld %lld", key.c_str(),
                     (long long)duration.lastStartTime, (long long)eventTime,
                     (long long)durationTime);
                duration.lastDuration += durationTime;
                if (anyStarted()) {
                    // In case any other dimensions are still started, we need to keep the alarm
                    // set.
                    startAnomalyAlarm(eventTime);
                }
                VLOG("  record duration: %lld ", (long long)duration.lastDuration);
            }
            break;
@@ -146,6 +153,15 @@ void MaxDurationTracker::noteStop(const HashableDimensionKey& key, const uint64_
    }
}

bool MaxDurationTracker::anyStarted() {
    for (auto& pair : mInfos) {
        if (pair.second.state == kStarted) {
            return true;
        }
    }
    return false;
}

void MaxDurationTracker::noteStopAll(const uint64_t eventTime) {
    std::set<HashableDimensionKey> keys;
    for (const auto& pair : mInfos) {
@@ -251,35 +267,52 @@ void MaxDurationTracker::noteConditionChanged(const HashableDimensionKey& key, b

    switch (it->second.state) {
        case kStarted:
            // if condition becomes false, kStarted -> kPaused. Record the current duration.
            // If condition becomes false, kStarted -> kPaused. Record the current duration and
            // stop anomaly alarm.
            if (!conditionMet) {
                stopAnomalyAlarm();
                it->second.state = DurationState::kPaused;
                it->second.lastDuration += (timestamp - it->second.lastStartTime);
                if (anyStarted()) {
                    // In case any other dimensions are still started, we need to set the alarm.
                    startAnomalyAlarm(timestamp);
                }
                VLOG("MaxDurationTracker Key: %s Started->Paused ", key.c_str());
            }
            break;
        case kStopped:
            // nothing to do if it's stopped.
            // Nothing to do if it's stopped.
            break;
        case kPaused:
            // if condition becomes true, kPaused -> kStarted. and the start time is the condition
            // If condition becomes true, kPaused -> kStarted. and the start time is the condition
            // change time.
            if (conditionMet) {
                it->second.state = DurationState::kStarted;
                it->second.lastStartTime = timestamp;
                startAnomalyAlarm(timestamp);
                VLOG("MaxDurationTracker Key: %s Paused->Started", key.c_str());
            }
            break;
    }
    if (it->second.lastDuration > mDuration) {
        mDuration = it->second.lastDuration;
    }
    // Note that we don't update mDuration here since it's only updated during noteStop.
}

int64_t MaxDurationTracker::predictAnomalyTimestampNs(const DurationAnomalyTracker& anomalyTracker,
                                                      const uint64_t currentTimestamp) const {
    ALOGE("Max duration producer does not support anomaly timestamp prediction!!!");
    return currentTimestamp;
    // The allowed time we can continue in the current state is the
    // (anomaly threshold) - max(elapsed time of the started mInfos).
    int64_t maxElapsed = 0;
    for (auto it = mInfos.begin(); it != mInfos.end(); ++it) {
        if (it->second.state == DurationState::kStarted) {
            int64_t duration =
                    it->second.lastDuration + (currentTimestamp - it->second.lastStartTime);
            if (duration > maxElapsed) {
                maxElapsed = duration;
            }
        }
    }
    int64_t threshold = anomalyTracker.getAnomalyThreshold();
    return currentTimestamp + threshold - maxElapsed;
}

void MaxDurationTracker::dumpStates(FILE* out, bool verbose) const {
+5 −0
Original line number Diff line number Diff line
@@ -60,6 +60,9 @@ public:
    void dumpStates(FILE* out, bool verbose) const override;

private:
    // Returns true if at least one of the mInfos is started.
    bool anyStarted();

    std::unordered_map<HashableDimensionKey, DurationInfo> mInfos;

    void noteConditionChanged(const HashableDimensionKey& key, bool conditionMet,
@@ -72,6 +75,8 @@ private:
    FRIEND_TEST(MaxDurationTrackerTest, TestCrossBucketBoundary);
    FRIEND_TEST(MaxDurationTrackerTest, TestMaxDurationWithCondition);
    FRIEND_TEST(MaxDurationTrackerTest, TestStopAll);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyPredictedTimestamp);
};

}  // namespace statsd
+173 −13
Original line number Diff line number Diff line
@@ -204,8 +204,53 @@ TEST(MaxDurationTrackerTest, TestCrossBucketBoundary_nested) {
}

TEST(MaxDurationTrackerTest, TestMaxDurationWithCondition) {
    const std::vector<HashableDimensionKey> conditionKey = {key1};

    vector<Matcher> dimensionInCondition;
    sp<MockConditionWizard> wizard = new NaggyMock<MockConditionWizard>();

    ConditionKey conditionKey1;
    MetricDimensionKey eventKey = getMockedMetricDimensionKey(TagId, 1, "1");
    conditionKey1[StringToId("APP_BACKGROUND")] = conditionKey;

    /**
    Start in first bucket, stop in second bucket. Condition turns on and off in the first bucket
    and again turns on and off in the second bucket.
    */
    uint64_t bucketStartTimeNs = 10000000000;
    uint64_t bucketEndTimeNs = bucketStartTimeNs + bucketSizeNs;
    uint64_t eventStartTimeNs = bucketStartTimeNs + 1 * NS_PER_SEC;
    uint64_t conditionStarts1 = bucketStartTimeNs + 11 * NS_PER_SEC;
    uint64_t conditionStops1 = bucketStartTimeNs + 14 * NS_PER_SEC;
    uint64_t conditionStarts2 = bucketStartTimeNs + bucketSizeNs + 5 * NS_PER_SEC;
    uint64_t conditionStops2 = conditionStarts2 + 10 * NS_PER_SEC;
    uint64_t eventStopTimeNs = conditionStops2 + 8 * NS_PER_SEC;

    int64_t metricId = 1;
    MaxDurationTracker tracker(kConfigKey, metricId, eventKey, wizard, 1, dimensionInCondition,
                               false, bucketStartTimeNs, 0, bucketStartTimeNs, bucketSizeNs, true,
                               {});
    EXPECT_TRUE(tracker.mAnomalyTrackers.empty());

    tracker.noteStart(key1, false, eventStartTimeNs, conditionKey1);
    tracker.noteConditionChanged(key1, true, conditionStarts1);
    tracker.noteConditionChanged(key1, false, conditionStops1);
    unordered_map<MetricDimensionKey, vector<DurationBucket>> buckets;
    tracker.flushIfNeeded(bucketStartTimeNs + bucketSizeNs + 1, &buckets);
    EXPECT_EQ(0U, buckets.size());

    tracker.noteConditionChanged(key1, true, conditionStarts2);
    tracker.noteConditionChanged(key1, false, conditionStops2);
    tracker.noteStop(key1, eventStopTimeNs, false);
    tracker.flushIfNeeded(bucketStartTimeNs + 2 * bucketSizeNs + 1, &buckets);
    EXPECT_EQ(1U, buckets.size());
    vector<DurationBucket> item = buckets.begin()->second;
    EXPECT_EQ(1UL, item.size());
    EXPECT_EQ(13ULL * NS_PER_SEC, item[0].mDuration);
}

TEST(MaxDurationTrackerTest, TestAnomalyDetection) {
    const std::vector<HashableDimensionKey> conditionKey = {getMockedDimensionKey(TagId, 4, "1")};
    const HashableDimensionKey key1 = getMockedDimensionKey(TagId, 1, "1");

    vector<Matcher> dimensionInCondition;
    sp<MockConditionWizard> wizard = new NaggyMock<MockConditionWizard>();
@@ -214,34 +259,149 @@ TEST(MaxDurationTrackerTest, TestMaxDurationWithCondition) {
    MetricDimensionKey eventKey = getMockedMetricDimensionKey(TagId, 2, "maps");
    conditionKey1[StringToId("APP_BACKGROUND")] = conditionKey;

    EXPECT_CALL(*wizard, query(_, conditionKey1, _, _))  // #4
            .WillOnce(Return(ConditionState::kFalse));

    unordered_map<MetricDimensionKey, vector<DurationBucket>> buckets;

    uint64_t bucketSizeNs = 30 * 1000 * 1000 * 1000LL;
    uint64_t bucketStartTimeNs = 10000000000;
    uint64_t bucketEndTimeNs = bucketStartTimeNs + bucketSizeNs;
    uint64_t bucketNum = 0;
    uint64_t eventStartTimeNs = bucketStartTimeNs + 1;
    uint64_t eventStartTimeNs = 13000000000;
    int64_t durationTimeNs = 2 * 1000;

    int64_t metricId = 1;
    Alert alert;
    alert.set_id(101);
    alert.set_metric_id(1);
    alert.set_trigger_if_sum_gt(40 * NS_PER_SEC);
    alert.set_num_buckets(2);
    const int32_t refPeriodSec = 45;
    alert.set_refractory_period_secs(refPeriodSec);
    sp<DurationAnomalyTracker> anomalyTracker = new DurationAnomalyTracker(alert, kConfigKey);
    MaxDurationTracker tracker(kConfigKey, metricId, eventKey, wizard, 1, dimensionInCondition,
                               false, bucketStartTimeNs, bucketNum, bucketStartTimeNs, bucketSizeNs,
                               true, {});
    EXPECT_TRUE(tracker.mAnomalyTrackers.empty());
                               true, {anomalyTracker});

    tracker.noteStart(key1, true, eventStartTimeNs, conditionKey1);
    sp<const AnomalyAlarm> alarm = anomalyTracker->mAlarms.begin()->second;
    EXPECT_EQ((long long)(53ULL * NS_PER_SEC), (long long)(alarm->timestampSec * NS_PER_SEC));

    // Remove the anomaly alarm when the duration is no longer fully met.
    tracker.noteConditionChanged(key1, false, eventStartTimeNs + 15 * NS_PER_SEC);
    EXPECT_EQ(0U, anomalyTracker->mAlarms.size());

    // Since the condition was off for 10 seconds, the anomaly should trigger 10 sec later.
    tracker.noteConditionChanged(key1, true, eventStartTimeNs + 25 * NS_PER_SEC);
    EXPECT_EQ(1U, anomalyTracker->mAlarms.size());
    alarm = anomalyTracker->mAlarms.begin()->second;
    EXPECT_EQ((long long)(63ULL * NS_PER_SEC), (long long)(alarm->timestampSec * NS_PER_SEC));
}

// This tests that we correctly compute the predicted time of an anomaly assuming that the current
// state continues forward as-is.
TEST(MaxDurationTrackerTest, TestAnomalyPredictedTimestamp) {
    const std::vector<HashableDimensionKey> conditionKey = {getMockedDimensionKey(TagId, 4, "1")};

    tracker.onSlicedConditionMayChange(eventStartTimeNs + 5);
    vector<Matcher> dimensionInCondition;
    sp<MockConditionWizard> wizard = new NaggyMock<MockConditionWizard>();

    tracker.noteStop(key1, eventStartTimeNs + durationTimeNs, false);
    ConditionKey conditionKey1;
    MetricDimensionKey eventKey = getMockedMetricDimensionKey(TagId, 2, "maps");
    conditionKey1[StringToId("APP_BACKGROUND")] = conditionKey;
    ConditionKey conditionKey2;
    conditionKey2[StringToId("APP_BACKGROUND")] = {getMockedDimensionKey(TagId, 4, "2")};

    tracker.flushIfNeeded(bucketStartTimeNs + bucketSizeNs + 1, &buckets);
    EXPECT_TRUE(buckets.find(eventKey) != buckets.end());
    EXPECT_EQ(1u, buckets[eventKey].size());
    EXPECT_EQ(5ULL, buckets[eventKey][0].mDuration);
    unordered_map<MetricDimensionKey, vector<DurationBucket>> buckets;

    /**
     * Suppose we have two sub-dimensions that we're taking the MAX over. In the first of these
     * nested dimensions, we enter the pause state after 3 seconds. When we resume, the second
     * dimension has already been running for 4 seconds. Thus, we have 40-4=36 seconds remaining
     * before we trigger the anomaly.
     */
    uint64_t bucketSizeNs = 30 * 1000 * 1000 * 1000LL;
    uint64_t bucketStartTimeNs = 10000000000;
    uint64_t bucketEndTimeNs = bucketStartTimeNs + bucketSizeNs;
    uint64_t bucketNum = 0;
    uint64_t eventStartTimeNs = bucketStartTimeNs + 5 * NS_PER_SEC;  // Condition is off at start.
    uint64_t conditionStarts1 = bucketStartTimeNs + 11 * NS_PER_SEC;
    uint64_t conditionStops1 = bucketStartTimeNs + 14 * NS_PER_SEC;
    uint64_t conditionStarts2 = bucketStartTimeNs + 20 * NS_PER_SEC;
    uint64_t eventStartTimeNs2 = conditionStarts2 - 4 * NS_PER_SEC;

    int64_t metricId = 1;
    Alert alert;
    alert.set_id(101);
    alert.set_metric_id(1);
    alert.set_trigger_if_sum_gt(40 * NS_PER_SEC);
    alert.set_num_buckets(2);
    const int32_t refPeriodSec = 45;
    alert.set_refractory_period_secs(refPeriodSec);
    sp<DurationAnomalyTracker> anomalyTracker = new DurationAnomalyTracker(alert, kConfigKey);
    MaxDurationTracker tracker(kConfigKey, metricId, eventKey, wizard, 1, dimensionInCondition,
                               false, bucketStartTimeNs, bucketNum, bucketStartTimeNs, bucketSizeNs,
                               true, {anomalyTracker});

    tracker.noteStart(key1, false, eventStartTimeNs, conditionKey1);
    tracker.noteConditionChanged(key1, true, conditionStarts1);
    tracker.noteConditionChanged(key1, false, conditionStops1);
    tracker.noteStart(key2, true, eventStartTimeNs2, conditionKey2);  // Condition is on already.
    tracker.noteConditionChanged(key1, true, conditionStarts2);
    EXPECT_EQ(1U, anomalyTracker->mAlarms.size());
    auto alarm = anomalyTracker->mAlarms.begin()->second;
    EXPECT_EQ(conditionStarts2 + 36 * NS_PER_SEC,
              (unsigned long long)(alarm->timestampSec * NS_PER_SEC));
}

// Suppose that within one tracker there are two dimensions A and B.
// Suppose A starts, then B starts, and then A stops. We still need to set an anomaly based on the
// elapsed duration of B.
TEST(MaxDurationTrackerTest, TestAnomalyPredictedTimestamp_UpdatedOnStop) {
    const std::vector<HashableDimensionKey> conditionKey = {getMockedDimensionKey(TagId, 4, "1")};

    vector<Matcher> dimensionInCondition;
    sp<MockConditionWizard> wizard = new NaggyMock<MockConditionWizard>();

    ConditionKey conditionKey1;
    MetricDimensionKey eventKey = getMockedMetricDimensionKey(TagId, 2, "maps");
    conditionKey1[StringToId("APP_BACKGROUND")] = conditionKey;
    ConditionKey conditionKey2;
    conditionKey2[StringToId("APP_BACKGROUND")] = {getMockedDimensionKey(TagId, 4, "2")};

    unordered_map<MetricDimensionKey, vector<DurationBucket>> buckets;

    /**
     * Suppose we have two sub-dimensions that we're taking the MAX over. In the first of these
     * nested dimensions, are started for 8 seconds. When we stop, the other nested dimension has
     * been started for 5 seconds. So we can only allow 35 more seconds from now.
     */
    uint64_t bucketSizeNs = 30 * 1000 * 1000 * 1000LL;
    uint64_t bucketStartTimeNs = 10000000000;
    uint64_t bucketEndTimeNs = bucketStartTimeNs + bucketSizeNs;
    uint64_t bucketNum = 0;
    uint64_t eventStartTimeNs1 = bucketStartTimeNs + 5 * NS_PER_SEC;  // Condition is off at start.
    uint64_t eventStopTimeNs1 = bucketStartTimeNs + 13 * NS_PER_SEC;
    uint64_t eventStartTimeNs2 = bucketStartTimeNs + 8 * NS_PER_SEC;

    int64_t metricId = 1;
    Alert alert;
    alert.set_id(101);
    alert.set_metric_id(1);
    alert.set_trigger_if_sum_gt(40 * NS_PER_SEC);
    alert.set_num_buckets(2);
    const int32_t refPeriodSec = 45;
    alert.set_refractory_period_secs(refPeriodSec);
    sp<DurationAnomalyTracker> anomalyTracker = new DurationAnomalyTracker(alert, kConfigKey);
    MaxDurationTracker tracker(kConfigKey, metricId, eventKey, wizard, 1, dimensionInCondition,
                               false, bucketStartTimeNs, bucketNum, bucketStartTimeNs, bucketSizeNs,
                               true, {anomalyTracker});

    tracker.noteStart(key1, true, eventStartTimeNs1, conditionKey1);
    tracker.noteStart(key2, true, eventStartTimeNs2, conditionKey2);
    tracker.noteStop(key1, eventStopTimeNs1, false);
    EXPECT_EQ(1U, anomalyTracker->mAlarms.size());
    auto alarm = anomalyTracker->mAlarms.begin()->second;
    EXPECT_EQ(eventStopTimeNs1 + 35 * NS_PER_SEC,
              (unsigned long long)(alarm->timestampSec * NS_PER_SEC));
}

}  // namespace statsd