Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1bf94382 authored by Bookatz's avatar Bookatz
Browse files

Anomaly detection is per dimension

Anomalies are now detected and declared per dimension. This means that
declareAnomaly now gets access to the key that is responsible for the
anomaly. Moreover, the refractory period is per dimension, not overall.
So a second anomaly for the same metric but a different dimension can
fire within the first dimension's refractory period. Thus, if app A
misbehaves and app B misbehaves shortly thereafter, they will both be
detected.

Eventually this key will be passed to the subscribers, although this cl
doesn't do anything with it.

Test: adb shell data/nativetest64/statsd_test/statsd_test
Change-Id: Id76856dc44fe9ecf91ac81a423e84f97c81d30ab
parent 26d5b41f
Loading
Loading
Loading
Loading
+18 −39
Original line number Original line Diff line number Diff line
@@ -162,45 +162,23 @@ int64_t AnomalyTracker::getSumOverPastBuckets(const HashableDimensionKey& key) c
    return 0;
    return 0;
}
}


bool AnomalyTracker::detectAnomaly(const int64_t& currentBucketNum,
                                   const DimToValMap& currentBucket) {
    if (currentBucketNum > mMostRecentBucketNum + 1) {
        addPastBucket(nullptr, currentBucketNum - 1);
    }
    for (auto itr = currentBucket.begin(); itr != currentBucket.end(); itr++) {
        if (itr->second + getSumOverPastBuckets(itr->first) > mAlert.trigger_if_sum_gt()) {
            return true;
        }
    }
    // In theory, we also need to check the dimsions not in the current bucket. In single-thread
    // mode, usually we could avoid the following loops.
    for (auto itr = mSumOverPastBuckets.begin(); itr != mSumOverPastBuckets.end(); itr++) {
        if (itr->second > mAlert.trigger_if_sum_gt()) {
            return true;
        }
    }
    return false;
}

bool AnomalyTracker::detectAnomaly(const int64_t& currentBucketNum, const HashableDimensionKey& key,
bool AnomalyTracker::detectAnomaly(const int64_t& currentBucketNum, const HashableDimensionKey& key,
                                   const int64_t& currentBucketValue) {
                                   const int64_t& currentBucketValue) {
    if (currentBucketNum > mMostRecentBucketNum + 1) {
    if (currentBucketNum > mMostRecentBucketNum + 1) {
        // TODO: This creates a needless 0 entry in mSumOverPastBuckets. Fix this.
        addPastBucket(key, 0, currentBucketNum - 1);
        addPastBucket(key, 0, currentBucketNum - 1);
    }
    }
    return mAlert.has_trigger_if_sum_gt()
    return mAlert.has_trigger_if_sum_gt()
            && getSumOverPastBuckets(key) + currentBucketValue > mAlert.trigger_if_sum_gt();
            && getSumOverPastBuckets(key) + currentBucketValue > mAlert.trigger_if_sum_gt();
}
}


void AnomalyTracker::declareAnomaly(const uint64_t& timestampNs) {
void AnomalyTracker::declareAnomaly(const uint64_t& timestampNs, const HashableDimensionKey& key) {
    // TODO: This should also take in the const HashableDimensionKey& key, to pass
    //       more details to incidentd and to make mRefractoryPeriodEndsSec key-specific.
    // TODO: Why receive timestamp? RefractoryPeriod should always be based on real time right now.
    // TODO: Why receive timestamp? RefractoryPeriod should always be based on real time right now.
    if (isInRefractoryPeriod(timestampNs)) {
    if (isInRefractoryPeriod(timestampNs, key)) {
        VLOG("Skipping anomaly declaration since within refractory period");
        VLOG("Skipping anomaly declaration since within refractory period");
        return;
        return;
    }
    }
    // TODO(guardrail): Consider guarding against too short refractory periods.
    mRefractoryPeriodEndsSec[key] = (timestampNs / NS_PER_SEC) + mAlert.refractory_period_secs();
    mLastAnomalyTimestampNs = timestampNs;


    // TODO: If we had access to the bucket_size_millis, consider calling resetStorage()
    // TODO: If we had access to the bucket_size_millis, consider calling resetStorage()
    // if (mAlert.refractory_period_secs() > mNumOfPastBuckets * bucketSizeNs) { resetStorage(); }
    // if (mAlert.refractory_period_secs() > mNumOfPastBuckets * bucketSizeNs) { resetStorage(); }
@@ -208,7 +186,7 @@ void AnomalyTracker::declareAnomaly(const uint64_t& timestampNs) {
    if (!mSubscriptions.empty()) {
    if (!mSubscriptions.empty()) {
        if (mAlert.has_id()) {
        if (mAlert.has_id()) {
            ALOGI("An anomaly (%llu) has occurred! Informing subscribers.",mAlert.id());
            ALOGI("An anomaly (%llu) has occurred! Informing subscribers.",mAlert.id());
            informSubscribers();
            informSubscribers(key);
        } else {
        } else {
            ALOGI("An anomaly (with no id) has occurred! Not informing any subscribers.");
            ALOGI("An anomaly (with no id) has occurred! Not informing any subscribers.");
        }
        }
@@ -218,6 +196,7 @@ void AnomalyTracker::declareAnomaly(const uint64_t& timestampNs) {


    StatsdStats::getInstance().noteAnomalyDeclared(mConfigKey, mAlert.id());
    StatsdStats::getInstance().noteAnomalyDeclared(mConfigKey, mAlert.id());


    // TODO: This should also take in the const HashableDimensionKey& key?
    android::util::stats_write(android::util::ANOMALY_DETECTED, mConfigKey.GetUid(),
    android::util::stats_write(android::util::ANOMALY_DETECTED, mConfigKey.GetUid(),
                               mConfigKey.GetId(), mAlert.id());
                               mConfigKey.GetId(), mAlert.id());
}
}
@@ -227,24 +206,24 @@ void AnomalyTracker::detectAndDeclareAnomaly(const uint64_t& timestampNs,
                                             const HashableDimensionKey& key,
                                             const HashableDimensionKey& key,
                                             const int64_t& currentBucketValue) {
                                             const int64_t& currentBucketValue) {
    if (detectAnomaly(currBucketNum, key, currentBucketValue)) {
    if (detectAnomaly(currBucketNum, key, currentBucketValue)) {
        declareAnomaly(timestampNs);
        declareAnomaly(timestampNs, key);
    }
    }
}
}


void AnomalyTracker::detectAndDeclareAnomaly(const uint64_t& timestampNs,
bool AnomalyTracker::isInRefractoryPeriod(const uint64_t& timestampNs,
                                             const int64_t& currBucketNum,
                                          const HashableDimensionKey& key) {
                                             const DimToValMap& currentBucket) {
    const auto& it = mRefractoryPeriodEndsSec.find(key);
    if (detectAnomaly(currBucketNum, currentBucket)) {
    if (it != mRefractoryPeriodEndsSec.end()) {
        declareAnomaly(timestampNs);
        if ((timestampNs / NS_PER_SEC) <= it->second) {
            return true;
        } else {
            mRefractoryPeriodEndsSec.erase(key);
        }
        }
    }
    }

    return false;
bool AnomalyTracker::isInRefractoryPeriod(const uint64_t& timestampNs) const {
    return mLastAnomalyTimestampNs >= 0 &&
            timestampNs - mLastAnomalyTimestampNs <= mAlert.refractory_period_secs() * NS_PER_SEC;
}
}


void AnomalyTracker::informSubscribers() {
void AnomalyTracker::informSubscribers(const HashableDimensionKey& key) {
    VLOG("informSubscribers called.");
    VLOG("informSubscribers called.");
    if (mSubscriptions.empty()) {
    if (mSubscriptions.empty()) {
        ALOGE("Attempt to call with no subscribers.");
        ALOGE("Attempt to call with no subscribers.");
+14 −12
Original line number Original line Diff line number Diff line
@@ -52,16 +52,13 @@ public:
                       const int64_t& bucketNum);
                       const int64_t& bucketNum);


    // Returns true if detected anomaly for the existing buckets on one or more dimension keys.
    // Returns true if detected anomaly for the existing buckets on one or more dimension keys.
    bool detectAnomaly(const int64_t& currBucketNum, const DimToValMap& currentBucket);
    bool detectAnomaly(const int64_t& currBucketNum, const HashableDimensionKey& key,
    bool detectAnomaly(const int64_t& currBucketNum, const HashableDimensionKey& key,
                       const int64_t& currentBucketValue);
                       const int64_t& currentBucketValue);


    // Informs incidentd about the detected alert.
    // Informs incidentd about the detected alert.
    void declareAnomaly(const uint64_t& timestampNs);
    void declareAnomaly(const uint64_t& timestampNs, const HashableDimensionKey& key);


    // Detects the alert and informs the incidentd when applicable.
    // Detects the alert and informs the incidentd when applicable.
    void detectAndDeclareAnomaly(const uint64_t& timestampNs, const int64_t& currBucketNum,
                                 const DimToValMap& currentBucket);
    void detectAndDeclareAnomaly(const uint64_t& timestampNs, const int64_t& currBucketNum,
    void detectAndDeclareAnomaly(const uint64_t& timestampNs, const int64_t& currBucketNum,
                                 const HashableDimensionKey& key,
                                 const HashableDimensionKey& key,
                                 const int64_t& currentBucketValue);
                                 const int64_t& currentBucketValue);
@@ -82,9 +79,11 @@ public:
        return mAlert.trigger_if_sum_gt();
        return mAlert.trigger_if_sum_gt();
    }
    }


    // Helper function to return the timestamp of the last detected anomaly.
    // Returns the refractory period timestamp (in seconds) for the given key.
    inline int64_t getLastAnomalyTimestampNs() const {
    // If there is no stored refractory period ending timestamp, returns 0.
        return mLastAnomalyTimestampNs;
    uint32_t getRefractoryPeriodEndsSec(const HashableDimensionKey& key) const {
        const auto& it = mRefractoryPeriodEndsSec.find(key);
        return it != mRefractoryPeriodEndsSec.end() ? it->second : 0;
    }
    }


    inline int getNumOfPastBuckets() const {
    inline int getNumOfPastBuckets() const {
@@ -121,8 +120,11 @@ protected:
    // The bucket number of the last added bucket.
    // The bucket number of the last added bucket.
    int64_t mMostRecentBucketNum = -1;
    int64_t mMostRecentBucketNum = -1;


    // The timestamp when the last anomaly was declared.
    // Map from each dimension to the timestamp that its refractory period (if this anomaly was
    int64_t mLastAnomalyTimestampNs = -1;
    // declared for that dimension) ends, in seconds. Only anomalies that occur after this period
    // ends will be declared.
    // Entries may be, but are not guaranteed to be, removed after the period is finished.
    unordered_map<HashableDimensionKey, uint32_t> mRefractoryPeriodEndsSec;


    void flushPastBuckets(const int64_t& currBucketNum);
    void flushPastBuckets(const int64_t& currBucketNum);


@@ -133,7 +135,7 @@ protected:
    // and remove any items with value 0.
    // and remove any items with value 0.
    void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket);
    void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket);


    bool isInRefractoryPeriod(const uint64_t& timestampNs) const;
    bool isInRefractoryPeriod(const uint64_t& timestampNs, const HashableDimensionKey& key);


    // Calculates the corresponding bucket index within the circular array.
    // Calculates the corresponding bucket index within the circular array.
    size_t index(int64_t bucketNum) const;
    size_t index(int64_t bucketNum) const;
@@ -142,12 +144,12 @@ protected:
    virtual void resetStorage();
    virtual void resetStorage();


    // Informs the subscribers that an anomaly has occurred.
    // Informs the subscribers that an anomaly has occurred.
    void informSubscribers();
    void informSubscribers(const HashableDimensionKey& key);


    FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets);
    FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets);
    FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets);
    FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets);
    FRIEND_TEST(GaugeMetricProducerTest, TestAnomalyDetection);
    FRIEND_TEST(GaugeMetricProducerTest, TestAnomalyDetection);
    FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetection);
    FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced);
};
};


}  // namespace statsd
}  // namespace statsd
+3 −3
Original line number Original line Diff line number Diff line
@@ -46,7 +46,7 @@ void DurationAnomalyTracker::declareAnomalyIfAlarmExpired(const HashableDimensio


    if (itr->second != nullptr &&
    if (itr->second != nullptr &&
        static_cast<uint32_t>(timestampNs / NS_PER_SEC) >= itr->second->timestampSec) {
        static_cast<uint32_t>(timestampNs / NS_PER_SEC) >= itr->second->timestampSec) {
        declareAnomaly(timestampNs);
        declareAnomaly(timestampNs, dimensionKey);
        stopAlarm(dimensionKey);
        stopAlarm(dimensionKey);
    }
    }
}
}
@@ -55,7 +55,7 @@ void DurationAnomalyTracker::startAlarm(const HashableDimensionKey& dimensionKey
                                const uint64_t& timestampNs) {
                                const uint64_t& timestampNs) {


    uint32_t timestampSec = static_cast<uint32_t>(timestampNs / NS_PER_SEC);
    uint32_t timestampSec = static_cast<uint32_t>(timestampNs / NS_PER_SEC);
    if (isInRefractoryPeriod(timestampNs)) {
    if (isInRefractoryPeriod(timestampNs, dimensionKey)) {
        VLOG("Skipping setting anomaly alarm since it'd fall in the refractory period");
        VLOG("Skipping setting anomaly alarm since it'd fall in the refractory period");
        return;
        return;
    }
    }
@@ -104,7 +104,7 @@ void DurationAnomalyTracker::informAlarmsFired(const uint64_t& timestampNs,


    // Now declare each of these alarms to have fired.
    // Now declare each of these alarms to have fired.
    for (const auto& kv : matchedAlarms) {
    for (const auto& kv : matchedAlarms) {
        declareAnomaly(timestampNs /* TODO: , kv.first */);
        declareAnomaly(timestampNs, kv.first);
        mAlarms.erase(kv.first);
        mAlarms.erase(kv.first);
        firedAlarms.erase(kv.second);  // No one else can also own it, so we're done with it.
        firedAlarms.erase(kv.second);  // No one else can also own it, so we're done with it.
    }
    }
+3 −3
Original line number Original line Diff line number Diff line
@@ -50,7 +50,7 @@ public:
                                      const uint64_t& timestampNs);
                                      const uint64_t& timestampNs);


    // Declares an anomaly for each alarm in firedAlarms that belongs to this DurationAnomalyTracker
    // Declares an anomaly for each alarm in firedAlarms that belongs to this DurationAnomalyTracker
    // and removes it from firedAlarms. Does NOT remove the alarm from the AnomalyMonitor.
    // and removes it from firedAlarms.
    // TODO: This will actually be called from a different thread, so make it thread-safe!
    // TODO: This will actually be called from a different thread, so make it thread-safe!
    //          This means that almost every function in DurationAnomalyTracker needs to be locked.
    //          This means that almost every function in DurationAnomalyTracker needs to be locked.
    //          But this should be done at the level of StatsLogProcessor, which needs to lock
    //          But this should be done at the level of StatsLogProcessor, which needs to lock
@@ -70,10 +70,10 @@ protected:
    void resetStorage() override;
    void resetStorage() override;


    FRIEND_TEST(OringDurationTrackerTest, TestPredictAnomalyTimestamp);
    FRIEND_TEST(OringDurationTrackerTest, TestPredictAnomalyTimestamp);
    FRIEND_TEST(OringDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(OringDurationTrackerTest, TestAnomalyDetectionExpiredAlarm);
    FRIEND_TEST(OringDurationTrackerTest, TestAnomalyDetectionFiredAlarm);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(MaxDurationTrackerTest, TestAnomalyDetection);
    FRIEND_TEST(OringDurationTrackerTest, TestAnomalyDetection);
};
};


}  // namespace statsd
}  // namespace statsd
+1 −1
Original line number Original line Diff line number Diff line
@@ -84,7 +84,7 @@ private:
    FRIEND_TEST(CountMetricProducerTest, TestNonDimensionalEvents);
    FRIEND_TEST(CountMetricProducerTest, TestNonDimensionalEvents);
    FRIEND_TEST(CountMetricProducerTest, TestEventsWithNonSlicedCondition);
    FRIEND_TEST(CountMetricProducerTest, TestEventsWithNonSlicedCondition);
    FRIEND_TEST(CountMetricProducerTest, TestEventsWithSlicedCondition);
    FRIEND_TEST(CountMetricProducerTest, TestEventsWithSlicedCondition);
    FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetection);
    FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced);
};
};


}  // namespace statsd
}  // namespace statsd
Loading