Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5222946e authored by Misha Wagner's avatar Misha Wagner Committed by Android (Google) Code Review
Browse files

Merge "Add more statistics on failure cases for value metrics"

parents 131f0814 1eee2210
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -55,6 +55,10 @@ bool StatsCompanionServicePuller::PullInternal(vector<shared_ptr<LogEvent> >* da
        Status status = statsCompanionServiceCopy->pullData(mTagId, &returned_value);
        if (!status.isOk()) {
            ALOGW("StatsCompanionServicePuller::pull failed for %d", mTagId);
            StatsdStats::getInstance().noteStatsCompanionPullFailed(mTagId);
            if (status.exceptionCode() == Status::Exception::EX_TRANSACTION_FAILED) {
                StatsdStats::getInstance().noteStatsCompanionPullBinderTransactionFailed(mTagId);
            }
            return false;
        }
        data->clear();
+3 −0
Original line number Diff line number Diff line
@@ -231,6 +231,9 @@ bool StatsPullerManager::Pull(int tagId, vector<shared_ptr<LogEvent>>* data) {
    if (kAllPullAtomInfo.find(tagId) != kAllPullAtomInfo.end()) {
        bool ret = kAllPullAtomInfo.find(tagId)->second.puller->Pull(data);
        VLOG("pulled %d items", (int)data->size());
        if (!ret) {
            StatsdStats::getInstance().notePullFailed(tagId);
        }
        return ret;
    } else {
        VLOG("Unknown tagId %d", tagId);
+59 −0
Original line number Diff line number Diff line
@@ -403,6 +403,60 @@ void StatsdStats::noteSystemServerRestart(int32_t timeSec) {
    mSystemServerRestartSec.push_back(timeSec);
}

void StatsdStats::notePullFailed(int atomId) {
    lock_guard<std::mutex> lock(mLock);
    mPulledAtomStats[atomId].pullFailed++;
}

void StatsdStats::noteStatsCompanionPullFailed(int atomId) {
    lock_guard<std::mutex> lock(mLock);
    mPulledAtomStats[atomId].statsCompanionPullFailed++;
}

void StatsdStats::noteStatsCompanionPullBinderTransactionFailed(int atomId) {
    lock_guard<std::mutex> lock(mLock);
    mPulledAtomStats[atomId].statsCompanionPullBinderTransactionFailed++;
}

void StatsdStats::noteEmptyData(int atomId) {
    lock_guard<std::mutex> lock(mLock);
    mPulledAtomStats[atomId].emptyData++;
}

void StatsdStats::noteHardDimensionLimitReached(int metricId) {
    lock_guard<std::mutex> lock(mLock);
    getAtomMetricStats(metricId).hardDimensionLimitReached++;
}

void StatsdStats::noteLateLogEventSkipped(int metricId) {
    lock_guard<std::mutex> lock(mLock);
    getAtomMetricStats(metricId).lateLogEventSkipped++;
}

void StatsdStats::noteSkippedForwardBuckets(int metricId) {
    lock_guard<std::mutex> lock(mLock);
    getAtomMetricStats(metricId).skippedForwardBuckets++;
}

void StatsdStats::noteBadValueType(int metricId) {
    lock_guard<std::mutex> lock(mLock);
    getAtomMetricStats(metricId).badValueType++;
}

void StatsdStats::noteConditionChangeInNextBucket(int metricId) {
    lock_guard<std::mutex> lock(mLock);
    getAtomMetricStats(metricId).conditionChangeInNextBucket++;
}

StatsdStats::AtomMetricStats& StatsdStats::getAtomMetricStats(int metricId) {
    auto atomMetricStatsIter = mAtomMetricStats.find(metricId);
    if (atomMetricStatsIter != mAtomMetricStats.end()) {
        return atomMetricStatsIter->second;
    }
    auto emplaceResult = mAtomMetricStats.emplace(metricId, AtomMetricStats());
    return emplaceResult.first->second;
}

void StatsdStats::reset() {
    lock_guard<std::mutex> lock(mLock);
    resetInternalLocked();
@@ -442,6 +496,7 @@ void StatsdStats::resetInternalLocked() {
        pullStats.second.pullTimeout = 0;
        pullStats.second.pullExceedMaxDelay = 0;
    }
    mAtomMetricStats.clear();
}

string buildTimeString(int64_t timeSec) {
@@ -713,6 +768,10 @@ void StatsdStats::dumpStats(std::vector<uint8_t>* output, bool reset) {
        android::os::statsd::writePullerStatsToStream(pair, &proto);
    }

    for (const auto& pair : mAtomMetricStats) {
        android::os::statsd::writeAtomMetricStatsToStream(pair, &proto);
    }

    if (mAnomalyAlarmRegisteredStats > 0) {
        uint64_t token = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_ANOMALY_ALARM_STATS);
        proto.write(FIELD_TYPE_INT32 | FIELD_ID_ANOMALY_ALARMS_REGISTERED,
+68 −0
Original line number Diff line number Diff line
@@ -317,6 +317,53 @@ public:
     */
    void noteLogLost(int32_t wallClockTimeSec, int32_t count, int lastError);

    /**
     * Records that the pull of an atom has failed
     */
    void notePullFailed(int atomId);

    /**
     * Records that the pull of StatsCompanionService atom has failed
     */
    void noteStatsCompanionPullFailed(int atomId);

    /**
     * Records that the pull of a StatsCompanionService atom has failed due to a failed binder
     * transaction. This can happen when StatsCompanionService returns too
     * much data (the max Binder parcel size is 1MB)
     */
    void noteStatsCompanionPullBinderTransactionFailed(int atomId);

    /**
     * A pull with no data occurred
     */
    void noteEmptyData(int atomId);

    /**
     * Hard limit was reached in the cardinality of an atom
     */
    void noteHardDimensionLimitReached(int atomId);

    /**
     * A log event was too late, arrived in the wrong bucket and was skipped
     */
    void noteLateLogEventSkipped(int atomId);

    /**
     * Buckets were skipped as time elapsed without any data for them
     */
    void noteSkippedForwardBuckets(int atomId);

    /**
     * An unsupported value type was received
     */
    void noteBadValueType(int atomId);

    /**
     * A condition change was too late, arrived in the wrong bucket and was skipped
     */
    void noteConditionChangeInNextBucket(int atomId);

    /**
     * Reset the historical stats. Including all stats in icebox, and the tracked stats about
     * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue
@@ -349,8 +396,20 @@ public:
        long dataError = 0;
        long pullTimeout = 0;
        long pullExceedMaxDelay = 0;
        long pullFailed = 0;
        long statsCompanionPullFailed = 0;
        long statsCompanionPullBinderTransactionFailed = 0;
        long emptyData = 0;
    } PulledAtomStats;

    typedef struct {
        long hardDimensionLimitReached = 0;
        long lateLogEventSkipped = 0;
        long skippedForwardBuckets = 0;
        long badValueType = 0;
        long conditionChangeInNextBucket = 0;
    } AtomMetricStats;

private:
    StatsdStats();

@@ -378,6 +437,9 @@ private:
    // Maps PullAtomId to its stats. The size is capped by the puller atom counts.
    std::map<int, PulledAtomStats> mPulledAtomStats;

    // Maps metric ID to its stats. The size is capped by the number of metrics.
    std::map<int, AtomMetricStats> mAtomMetricStats;

    struct LogLossStats {
        LogLossStats(int32_t sec, int32_t count, int32_t error)
            : mWallClockSec(sec), mCount(count), mLastError(error) {
@@ -414,6 +476,12 @@ private:

    void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats);

    /**
     * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference
     * will live as long as `this`.
     */
    StatsdStats::AtomMetricStats& getAtomMetricStats(int metricId);

    FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd);
    FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd);
    FRIEND_TEST(StatsdStatsTest, TestConfigRemove);
+12 −0
Original line number Diff line number Diff line
@@ -322,6 +322,7 @@ void ValueMetricProducer::onConditionChangedLocked(const bool condition,
    if (eventTimeNs < mCurrentBucketStartTimeNs) {
        VLOG("Skip event due to late arrival: %lld vs %lld", (long long)eventTimeNs,
             (long long)mCurrentBucketStartTimeNs);
        StatsdStats::getInstance().noteConditionChangeInNextBucket(mMetricId);
        return;
    }

@@ -359,6 +360,12 @@ void ValueMetricProducer::pullAndMatchEventsLocked(const int64_t timestampNs) {
    }
    StatsdStats::getInstance().notePullDelay(mPullTagId, pullDelayNs);

    if (timestampNs < mCurrentBucketStartTimeNs) {
        // The data will be skipped in onMatchedLogEventInternalLocked, but we don't want to report
        // for every event, just the pull
        StatsdStats::getInstance().noteLateLogEventSkipped(mMetricId);
    }

    for (const auto& data : allData) {
        // make a copy before doing and changes
        LogEvent localCopy = data->makeCopy();
@@ -380,6 +387,7 @@ void ValueMetricProducer::onDataPulled(const std::vector<std::shared_ptr<LogEven
    if (mCondition) {
        if (allData.size() == 0) {
            VLOG("Data pulled is empty");
            StatsdStats::getInstance().noteEmptyData(mPullTagId);
            return;
        }
        // For scheduled pulled data, the effective event time is snap to the nearest
@@ -394,6 +402,7 @@ void ValueMetricProducer::onDataPulled(const std::vector<std::shared_ptr<LogEven
        if (bucketEndTime < mCurrentBucketStartTimeNs) {
            VLOG("Skip bucket end pull due to late arrival: %lld vs %lld", (long long)bucketEndTime,
                 (long long)mCurrentBucketStartTimeNs);
            StatsdStats::getInstance().noteLateLogEventSkipped(mMetricId);
            return;
        }
        for (const auto& data : allData) {
@@ -442,6 +451,7 @@ bool ValueMetricProducer::hitGuardRailLocked(const MetricDimensionKey& newKey) {
        if (newTupleCount > mDimensionHardLimit) {
            ALOGE("ValueMetric %lld dropping data for dimension key %s", (long long)mMetricId,
                  newKey.toString().c_str());
            StatsdStats::getInstance().noteHardDimensionLimitReached(mMetricId);
            return true;
        }
    }
@@ -539,6 +549,7 @@ void ValueMetricProducer::onMatchedLogEventInternalLocked(const size_t matcherIn
        Value value;
        if (!getDoubleOrLong(event, matcher, value)) {
            VLOG("Failed to get value %d from event %s", i, event.ToString().c_str());
            StatsdStats::getInstance().noteBadValueType(mMetricId);
            return;
        }
        interval.seenNewData = true;
@@ -656,6 +667,7 @@ void ValueMetricProducer::flushIfNeededLocked(const int64_t& eventTimeNs) {

    if (numBucketsForward > 1) {
        VLOG("Skipping forward %lld buckets", (long long)numBucketsForward);
        StatsdStats::getInstance().noteSkippedForwardBuckets(mMetricId);
        // take base again in future good bucket.
        resetBase();
    }
Loading