Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 16f58fe4 authored by Parth Sane's avatar Parth Sane Committed by Android (Google) Code Review
Browse files

Merge "Add latency stats to binder stats pusher" into main

parents 368995e6 1bf45bab
Loading
Loading
Loading
Loading
+68 −25
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
namespace android {
// defined in stats/atoms/framework/framework_extension_atoms.proto
constexpr int32_t kBinderSpamAtomId = 1064;
constexpr int32_t kBinderLatencyAtomId = 1090;
[[clang::no_destroy]] static const StaticString16 kStatsBootstrapServiceName(u"statsbootstrap");

sp<os::IStatsBootstrapAtomService> BinderStatsPusher::getBootstrapAtomServiceLocked(
@@ -57,47 +58,89 @@ sp<os::IStatsBootstrapAtomService> BinderStatsPusher::getBootstrapAtomServiceLoc
    return service;
}

void BinderStatsPusher::aggregateBinderSpamLocked(const std::vector<BinderCallData>& data,
__attribute__((no_sanitize("signed-integer-overflow")))
void BinderStatsPusher::aggregateStatsLocked(const std::vector<BinderCallData>& data,
                                             const sp<os::IStatsBootstrapAtomService>& service,
                                             const int64_t nowSec) {
    for (const auto& datum : data) {
        int64_t timeSec = static_cast<int64_t>(datum.startTimeNanos) / 1000'000'000;
        mSpamStatsBuffer[datum][timeSec]++;
        int64_t startTimeSec = datum.startTimeNanos / 1000'000'000;
        // Check if the buffer period has passed.
        auto [it, inserted] = mStatsBuffer[datum].try_emplace(startTimeSec, AidlTargetMetrics());
        it->second.totalCalls++;
        if (datum.hasLatencyData()) {
            it->second.callsWithLatency++;
            it->second.durationSumMicros += (datum.endTimeNanos - datum.startTimeNanos) / 1000;
        }
    }
    // Ensure that if this is a local binder and this thread isn't attached
    // to the VM then skip pushing. This is required since StatsBootstrap is
    // a Java service and needs a JNI interface to be called from native code.
    // TODO(b/299356196): Ensure that mSpamStatsBuffer doesn't grow indefinitely.
    if (!service || (IInterface::asBinder(service)->localBinder() && getJavaVM() == nullptr)) {
        return;
    }

    for (auto outerIt = mSpamStatsBuffer.begin(); outerIt != mSpamStatsBuffer.end();
    for (auto outerIt = mStatsBuffer.begin(); outerIt != mStatsBuffer.end();
         /* no increment */) {
        bool hasSpam = false;
        int32_t secondsWithAtLeast125Calls = 0;
        int32_t secondsWithAtLeast250Calls = 0;
        const BinderCallData& datum = outerIt->first;
        std::unordered_map<int64_t, uint32_t>& innerMap = outerIt->second;
        for (auto innerIt = innerMap.begin(); innerIt != innerMap.end(); /* no increment */) {
            int64_t startTimeSec = innerIt->first;
            uint32_t count = innerIt->second;

            // Check if the delay period has passed.
            if (nowSec - startTimeSec >= kSpamAggregationWindowSec) {
                if (count >= kSpamFirstWatermark) {
                    hasSpam = true;
        uint64_t callsWithLatency = 0;
        uint64_t durationSumMicros = 0;
        uint32_t secondsWithAtLeast10Calls = 0;
        uint32_t secondsWithAtLeast50Calls = 0;
        for (auto innerIt = outerIt->second.begin(); innerIt != outerIt->second.end();
             /* no increment */) {
            // Check if the buffer period has passed.
            int64_t startTimeSec = innerIt->first;
            if (nowSec - startTimeSec >= kAggregationWindowSec) {
                uint64_t totalCalls = innerIt->second.totalCalls;
                if (totalCalls > kSpamFirstWatermark) {
                    secondsWithAtLeast125Calls++;
                    if (count >= kSpamSecondWatermark) {
                    if (totalCalls > kSpamSecondWatermark) {
                        secondsWithAtLeast250Calls++;
                    }
                }
                innerIt = innerMap.erase(innerIt);
                if (innerIt->second.callsWithLatency > 0) {
                    callsWithLatency += innerIt->second.callsWithLatency;
                    durationSumMicros += innerIt->second.durationSumMicros;
                    if (innerIt->second.callsWithLatency >= kLatencyCountFirstWatermark) {
                        secondsWithAtLeast10Calls++;
                        if (innerIt->second.callsWithLatency >= kLatencyCountSecondWatermark) {
                            secondsWithAtLeast50Calls++;
                        }
                    }
                }
                // Erase the datum from the buffer so we don't aggregate it again
                innerIt = outerIt->second.erase(innerIt);
            } else {
                ++innerIt;
            }
        }
        if (hasSpam) {
        if (callsWithLatency > 0) {
            auto datum = outerIt->first;
            auto atom = os::StatsBootstrapAtom();
            atom.atomId = kBinderLatencyAtomId;
            std::vector<os::StatsBootstrapAtomValue> values;
            atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(datum.senderUid)));
            atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(getuid()))); // host uid
            atom.values.push_back(createPrimitiveValue(datum.interfaceDescriptor));
            // TODO (b/299356196): get actual method name.
            atom.values.push_back(
                    createPrimitiveValue(std::to_string(datum.transactionCode))); // aidl method
            atom.values.push_back(
                    createPrimitiveValue(static_cast<int64_t>(callsWithLatency))); // call count
            atom.values.push_back(
                    createPrimitiveValue(static_cast<int64_t>(durationSumMicros))); // dur sum in us
            atom.values.push_back(createPrimitiveValue(static_cast<int32_t>(
                    secondsWithAtLeast10Calls))); // seconds_with_at_least_10_calls
            atom.values.push_back(createPrimitiveValue(static_cast<int32_t>(
                    secondsWithAtLeast50Calls))); // seconds_with_at_least_50_calls

            // TODO(b/414551350): combine multiple binder calls into one.
            service->reportBootstrapAtom(atom);
        }

        if (secondsWithAtLeast125Calls > 0) {
            auto datum = outerIt->first;
            auto atom = os::StatsBootstrapAtom();
            atom.atomId = kBinderSpamAtomId;
            std::vector<os::StatsBootstrapAtomValue> values;
@@ -114,9 +157,9 @@ void BinderStatsPusher::aggregateBinderSpamLocked(const std::vector<BinderCallDa
            // TODO(b/414551350): combine multiple binder calls into one.
            service->reportBootstrapAtom(atom);
        }
        // If the inner map is now empty after removing expired entries, remove the outer entry.
        if (innerMap.empty()) {
            outerIt = mSpamStatsBuffer.erase(outerIt);

        if (outerIt->second.empty()) {
            outerIt = mStatsBuffer.erase(outerIt);
        } else {
            ++outerIt;
        }
@@ -125,7 +168,7 @@ void BinderStatsPusher::aggregateBinderSpamLocked(const std::vector<BinderCallDa

void BinderStatsPusher::pushLocked(const std::vector<BinderCallData>& data, const int64_t nowSec) {
    auto service = getBootstrapAtomServiceLocked(nowSec);
    aggregateBinderSpamLocked(data, service, nowSec);
    aggregateStatsLocked(data, service, nowSec);
}

} // namespace android
+22 −9
Original line number Diff line number Diff line
@@ -42,11 +42,15 @@ private:

    // timeout for checking the service.
    static const int32_t kCheckServiceTimeoutSec = 5;
    static const int32_t kLatencyCountFirstWatermark = 10;
    static const int32_t kLatencyCountSecondWatermark = 50;
    static const int32_t kSpamFirstWatermark = 125;
    static const int32_t kSpamSecondWatermark = 250;
    // Time window to aggregate the data in. Once the data point's startTime
    // is older than the Aggregation window, we will allow the data to be sent.
    static const int64_t kSpamAggregationWindowSec = 5;
    // Time window (in seconds) for aggregating per-second call counts.
    // Data for a specific second (startTimeSec) is processed for spam detection
    // and to update latency-related 'secondsWithAtLeastXCalls' counts
    // once 'nowSec - startTimeSec >= kAggregationWindowSec'.
    static const int64_t kAggregationWindowSec = 5;

    // KeyEqual function for Binder Spam aggregation of BinderCallData
    struct SpamStatsKeyEqual {
@@ -65,9 +69,17 @@ private:
        }
    };

    using SpamStatsMap =
    // Metrics aggregated for reporting.
    struct AidlTargetMetrics {
        AidlTargetMetrics() = default;
        uint32_t totalCalls = 0;
        uint32_t callsWithLatency = 0;
        int64_t durationSumMicros = 0;
    };

    using StatsBufferMap =
            std::unordered_map<BinderCallData,
                               std::unordered_map<int64_t /*startTimeSec*/, uint32_t /*count*/>,
                               std::unordered_map<int64_t /*startTimeSec*/, AidlTargetMetrics>,
                               SpamStatsKeyHash,   // Hash
                               SpamStatsKeyEqual>; // KeyEqual

@@ -78,11 +90,12 @@ private:
    // time of last check
    int64_t mServiceCheckTimeSec = -kCheckServiceTimeoutSec - 1;
    // Aggregates binder transaction data into BinderSpamReport objects.
    void aggregateBinderSpamLocked(const std::vector<BinderCallData>& data,
    void aggregateStatsLocked(const std::vector<BinderCallData>& data,
                              const sp<os::IStatsBootstrapAtomService>& service,
                              const int64_t nowSec);

    // The stats which are not sent to StatsBootStrap
    SpamStatsMap mSpamStatsBuffer;
    StatsBufferMap mStatsBuffer;
};

} // namespace android
+2 −0
Original line number Diff line number Diff line
@@ -63,6 +63,8 @@ struct BinderCallData {
    int64_t startTimeNanos;
    int64_t endTimeNanos;
    uid_t senderUid;

    bool hasLatencyData() const { return endTimeNanos > startTimeNanos; }
};

} // namespace android
 No newline at end of file
+158 −3
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ using namespace testing;

// --- Mocks ---
constexpr int64_t kSpamAggregationWindowSec = 5;
constexpr int64_t kLatencyAggregationWindowSec = 5; // Same as spam for now
// Mock for IStatsBootstrapAtomService
class MockStatsBootstrapAtomService : public os::BnStatsBootstrapAtomService {
public:
@@ -63,11 +64,22 @@ BinderCallData createStatsData(uid_t uid, uint32_t code, const char* desc, int64
    return {.interfaceDescriptor = String16(desc),
            .transactionCode = code,
            .startTimeNanos = startNanos,
            .endTimeNanos = startNanos + 1000,
            .endTimeNanos = 0,
            .senderUid = uid};
}

// Helper function to create a StatsBootstrapAtom for comparison
BinderCallData createStatsData(uid_t uid, const char* desc, uint32_t code, int64_t startNanos,
                               int64_t endNanos) {
    return BinderCallData{
            .interfaceDescriptor = String16(desc),
            .transactionCode = code,
            .startTimeNanos = startNanos,
            .endTimeNanos = endNanos,
            .senderUid = uid,
    };
}

// Helper function to create a StatsBootstrapAtom for spam comparison
os::StatsBootstrapAtom createExpectedAtom(const BinderCallData& datum, int count125, int count250) {
    auto atom = os::StatsBootstrapAtom();
    // TODO: use directly from stats/atoms/framework/framework_extension_atoms.proto
@@ -81,6 +93,25 @@ os::StatsBootstrapAtom createExpectedAtom(const BinderCallData& datum, int count
    return atom;
}

// Helper function to create a StatsBootstrapAtom for latency comparison
os::StatsBootstrapAtom createExpectedLatencyAtom(const BinderCallData& datum, int64_t callCount,
                                                 int64_t durationSumMicros,
                                                 int32_t secondsWithAtLeast10Calls,
                                                 int32_t secondsWithAtLeast50Calls) {
    auto atom = os::StatsBootstrapAtom();
    // TODO: use directly from stats/atoms/framework/framework_extension_atoms.proto
    atom.atomId = 1090; // kBinderLatencyAtomId
    atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(datum.senderUid)));
    atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(getuid()))); // host uid
    atom.values.push_back(createPrimitiveValue(datum.interfaceDescriptor));
    atom.values.push_back(createPrimitiveValue(std::to_string(datum.transactionCode)));
    atom.values.push_back(createPrimitiveValue(callCount));
    atom.values.push_back(createPrimitiveValue(durationSumMicros));
    atom.values.push_back(createPrimitiveValue(secondsWithAtLeast10Calls));
    atom.values.push_back(createPrimitiveValue(secondsWithAtLeast50Calls));
    return atom;
}

// Matcher for StatsBootstrapAtom
MATCHER_P(StatsAtomEq, expectedAtom, "") {
    if (arg.atomId != expectedAtom.atomId) return false;
@@ -135,7 +166,8 @@ TEST_F(BinderStatsPusherTest, AggregateSpamNoSpamBelowThreshold) {
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec); //  pushLocked calls aggregateBinderSpamLocked
    pusher.pushLocked(data, currentTimeSec); //  pushLocked calls
                                             //  aggregateBinderSpamLocked
}

TEST_F(BinderStatsPusherTest, AggregateSpamOneSecondSpam) {
@@ -339,3 +371,126 @@ TEST_F(BinderStatsPusherTest, DataNotDroppedWhenPushIsSkippedThenSucceeds) {

    pusher.pushLocked(spamCallData2, timeSec2);
}

// --- Latency Tests ---

TEST_F(BinderStatsPusherTest, AggregateLatencyNoLatencyBelowThreshold) {
    std::vector<BinderCallData> data;
    int64_t currentTimeSec = 14;
    // Create data within the delay window
    for (int i = 0; i < 5; ++i) { // Less than kLatencyCountFirstWatermark (10)
        data.push_back(
                createStatsData(2001, "ILatencyFoo", 1,
                                (currentTimeSec - kLatencyAggregationWindowSec + 1) * 1000000000LL,
                                (currentTimeSec - kLatencyAggregationWindowSec + 1) * 1000000000LL +
                                        1000000 /* 1ms */));
    }

    // Expect no calls to reportBootstrapAtom
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyOneSecondLatency) {
    std::vector<BinderCallData> data;
    int64_t callTimeNanos = 2'000'000'000LL; // 2s
    int64_t currentTimeSec =
            (callTimeNanos / 1000'000'000LL) + kLatencyAggregationWindowSec + 1; // 2 + 5 + 1 = 8s
    int64_t totalDurationMicros = 0;

    // Create enough data in the *same second* to trigger latency reporting
    for (int i = 0; i < 15; ++i) {              // More than kLatencyCountFirstWatermark (10)
        int64_t durationMicros = (i + 1) * 100; // e.g. 100us, 200us ..
        data.push_back(createStatsData(2002, "ILatencyBar", 2, callTimeNanos,
                                       callTimeNanos + durationMicros * 1000));
        totalDurationMicros += durationMicros;
    }

    auto expectedAtom = createExpectedLatencyAtom(data[0], 15, totalDurationMicros, 1, 0);
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(StatsAtomEq(expectedAtom))).Times(1);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyDelayedLatency) {
    std::vector<BinderCallData> data;
    int64_t currentTimeNanos = 9'100'000'000;

    // Create latency data within the aggregation window
    for (int i = 0; i < 15; ++i) {
        data.push_back(createStatsData(2003, "ILatencyBaz", 3, currentTimeNanos - 1000'000'000,
                                       currentTimeNanos - 1000'000'000 + 500000 /* 0.5ms */));
    }

    // Expect no calls, data is delayed
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);
    pusher.pushLocked(data, currentTimeNanos / 1000'000'000);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyProcessesDelayedDataOnSubsequentCall) {
    std::vector<BinderCallData> callData1;
    int64_t callTimeSec1 = 10;
    int64_t latencyDataTimeNanos = (callTimeSec1 - 2) * 1000'000'000LL; // 8s, will be delayed
    int64_t totalDurationMicros1 = 0;

    for (int i = 0; i < 15; ++i) {
        int64_t durationMicros = (i + 1) * 150;
        callData1.push_back(createStatsData(2004, "ILatencyDelayed", 4, latencyDataTimeNanos,
                                            latencyDataTimeNanos + durationMicros * 1000));
        totalDurationMicros1 += durationMicros;
    }

    // First push: data should be buffered as it's too recent
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);
    pusher.pushLocked(callData1, callTimeSec1);

    // Second push: advance time so the previous data is now outside the aggregation window
    std::vector<BinderCallData> callData2;                                    // Can be empty
    int64_t call2_time_sec = callTimeSec1 + kLatencyAggregationWindowSec + 1; // 10 + 5 + 1 = 16s

    auto expectedAtom = createExpectedLatencyAtom(callData1[0], 15, totalDurationMicros1, 1, 0);
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(StatsAtomEq(expectedAtom))).Times(1);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(callData2, call2_time_sec);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyMultipleSeconds) {
    std::vector<BinderCallData> data;
    int64_t firstCallSecondNanos = 2'000'000'000LL;  // 2s
    int64_t secondCallSecondNanos = 3'000'000'000LL; // 3s
    int64_t currentTimeSec = (secondCallSecondNanos / 1000'000'000LL) +
            kLatencyAggregationWindowSec + 1; // 3 + 5 + 1 = 9s
    int64_t totalDurationMicros = 0;

    // Data for the first second
    for (int i = 0; i < 12; ++i) { // 12 calls
        int64_t durationMicros = (i + 1) * 100;
        data.push_back(createStatsData(2005, "ILatencyMultiSec", 5, firstCallSecondNanos,
                                       firstCallSecondNanos + durationMicros * 1000));
        totalDurationMicros += durationMicros;
    }
    // Data for the second second
    for (int i = 0; i < 8; ++i) { // 8 calls
        int64_t durationMicros = (i + 1) * 120;
        // Use the same UID, code, desc for aggregation
        data.push_back(createStatsData(2005, "ILatencyMultiSec", 5, secondCallSecondNanos,
                                       secondCallSecondNanos + durationMicros * 1000));
        totalDurationMicros += durationMicros;
    }

    // Expect one atom representing aggregated data across 2 seconds
    // Total calls = 12 + 8 = 20
    // secondsWithAtLeast10Calls = 1 (only the first second has >= 10 calls)
    // secondsWithAtLeast50Calls = 0
    auto expectedAtom = createExpectedLatencyAtom(data[0], 20, totalDurationMicros, 1, 0);
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(StatsAtomEq(expectedAtom))).Times(1);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec);
}