Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1bf45bab authored by Parth Sane's avatar Parth Sane
Browse files

Add latency stats to binder stats pusher

Test: atest binderStatsPusherUnitTest
Bug: 299356196
Flag: build.RELEASE_LIBBINDER_BINDER_OBSERVER
Change-Id: I5a3f8914bac3de1e058ada5bc55179e18f41dfe1
parent 9ff64f6a
Loading
Loading
Loading
Loading
+68 −25
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
namespace android {
// defined in stats/atoms/framework/framework_extension_atoms.proto
constexpr int32_t kBinderSpamAtomId = 1064;
constexpr int32_t kBinderLatencyAtomId = 1090;
[[clang::no_destroy]] static const StaticString16 kStatsBootstrapServiceName(u"statsbootstrap");

sp<os::IStatsBootstrapAtomService> BinderStatsPusher::getBootstrapAtomServiceLocked(
@@ -57,47 +58,89 @@ sp<os::IStatsBootstrapAtomService> BinderStatsPusher::getBootstrapAtomServiceLoc
    return service;
}

void BinderStatsPusher::aggregateBinderSpamLocked(const std::vector<BinderCallData>& data,
__attribute__((no_sanitize("signed-integer-overflow")))
void BinderStatsPusher::aggregateStatsLocked(const std::vector<BinderCallData>& data,
                                             const sp<os::IStatsBootstrapAtomService>& service,
                                             const int64_t nowSec) {
    for (const auto& datum : data) {
        int64_t timeSec = static_cast<int64_t>(datum.startTimeNanos) / 1000'000'000;
        mSpamStatsBuffer[datum][timeSec]++;
        int64_t startTimeSec = datum.startTimeNanos / 1000'000'000;
        // Check if the buffer period has passed.
        auto [it, inserted] = mStatsBuffer[datum].try_emplace(startTimeSec, AidlTargetMetrics());
        it->second.totalCalls++;
        if (datum.hasLatencyData()) {
            it->second.callsWithLatency++;
            it->second.durationSumMicros += (datum.endTimeNanos - datum.startTimeNanos) / 1000;
        }
    }
    // Ensure that if this is a local binder and this thread isn't attached
    // to the VM then skip pushing. This is required since StatsBootstrap is
    // a Java service and needs a JNI interface to be called from native code.
    // TODO(b/299356196): Ensure that mSpamStatsBuffer doesn't grow indefinitely.
    if (!service || (IInterface::asBinder(service)->localBinder() && getJavaVM() == nullptr)) {
        return;
    }

    for (auto outerIt = mSpamStatsBuffer.begin(); outerIt != mSpamStatsBuffer.end();
    for (auto outerIt = mStatsBuffer.begin(); outerIt != mStatsBuffer.end();
         /* no increment */) {
        bool hasSpam = false;
        int32_t secondsWithAtLeast125Calls = 0;
        int32_t secondsWithAtLeast250Calls = 0;
        const BinderCallData& datum = outerIt->first;
        std::unordered_map<int64_t, uint32_t>& innerMap = outerIt->second;
        for (auto innerIt = innerMap.begin(); innerIt != innerMap.end(); /* no increment */) {
            int64_t startTimeSec = innerIt->first;
            uint32_t count = innerIt->second;

            // Check if the delay period has passed.
            if (nowSec - startTimeSec >= kSpamAggregationWindowSec) {
                if (count >= kSpamFirstWatermark) {
                    hasSpam = true;
        uint64_t callsWithLatency = 0;
        uint64_t durationSumMicros = 0;
        uint32_t secondsWithAtLeast10Calls = 0;
        uint32_t secondsWithAtLeast50Calls = 0;
        for (auto innerIt = outerIt->second.begin(); innerIt != outerIt->second.end();
             /* no increment */) {
            // Check if the buffer period has passed.
            int64_t startTimeSec = innerIt->first;
            if (nowSec - startTimeSec >= kAggregationWindowSec) {
                uint64_t totalCalls = innerIt->second.totalCalls;
                if (totalCalls > kSpamFirstWatermark) {
                    secondsWithAtLeast125Calls++;
                    if (count >= kSpamSecondWatermark) {
                    if (totalCalls > kSpamSecondWatermark) {
                        secondsWithAtLeast250Calls++;
                    }
                }
                innerIt = innerMap.erase(innerIt);
                if (innerIt->second.callsWithLatency > 0) {
                    callsWithLatency += innerIt->second.callsWithLatency;
                    durationSumMicros += innerIt->second.durationSumMicros;
                    if (innerIt->second.callsWithLatency >= kLatencyCountFirstWatermark) {
                        secondsWithAtLeast10Calls++;
                        if (innerIt->second.callsWithLatency >= kLatencyCountSecondWatermark) {
                            secondsWithAtLeast50Calls++;
                        }
                    }
                }
                // Erase the datum from the buffer so we don't aggregate it again
                innerIt = outerIt->second.erase(innerIt);
            } else {
                ++innerIt;
            }
        }
        if (hasSpam) {
        if (callsWithLatency > 0) {
            auto datum = outerIt->first;
            auto atom = os::StatsBootstrapAtom();
            atom.atomId = kBinderLatencyAtomId;
            std::vector<os::StatsBootstrapAtomValue> values;
            atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(datum.senderUid)));
            atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(getuid()))); // host uid
            atom.values.push_back(createPrimitiveValue(datum.interfaceDescriptor));
            // TODO (b/299356196): get actual method name.
            atom.values.push_back(
                    createPrimitiveValue(std::to_string(datum.transactionCode))); // aidl method
            atom.values.push_back(
                    createPrimitiveValue(static_cast<int64_t>(callsWithLatency))); // call count
            atom.values.push_back(
                    createPrimitiveValue(static_cast<int64_t>(durationSumMicros))); // dur sum in us
            atom.values.push_back(createPrimitiveValue(static_cast<int32_t>(
                    secondsWithAtLeast10Calls))); // seconds_with_at_least_10_calls
            atom.values.push_back(createPrimitiveValue(static_cast<int32_t>(
                    secondsWithAtLeast50Calls))); // seconds_with_at_least_50_calls

            // TODO(b/414551350): combine multiple binder calls into one.
            service->reportBootstrapAtom(atom);
        }

        if (secondsWithAtLeast125Calls > 0) {
            auto datum = outerIt->first;
            auto atom = os::StatsBootstrapAtom();
            atom.atomId = kBinderSpamAtomId;
            std::vector<os::StatsBootstrapAtomValue> values;
@@ -114,9 +157,9 @@ void BinderStatsPusher::aggregateBinderSpamLocked(const std::vector<BinderCallDa
            // TODO(b/414551350): combine multiple binder calls into one.
            service->reportBootstrapAtom(atom);
        }
        // If the inner map is now empty after removing expired entries, remove the outer entry.
        if (innerMap.empty()) {
            outerIt = mSpamStatsBuffer.erase(outerIt);

        if (outerIt->second.empty()) {
            outerIt = mStatsBuffer.erase(outerIt);
        } else {
            ++outerIt;
        }
@@ -125,7 +168,7 @@ void BinderStatsPusher::aggregateBinderSpamLocked(const std::vector<BinderCallDa

void BinderStatsPusher::pushLocked(const std::vector<BinderCallData>& data, const int64_t nowSec) {
    auto service = getBootstrapAtomServiceLocked(nowSec);
    aggregateBinderSpamLocked(data, service, nowSec);
    aggregateStatsLocked(data, service, nowSec);
}

} // namespace android
+22 −9
Original line number Diff line number Diff line
@@ -42,11 +42,15 @@ private:

    // timeout for checking the service.
    static const int32_t kCheckServiceTimeoutSec = 5;
    static const int32_t kLatencyCountFirstWatermark = 10;
    static const int32_t kLatencyCountSecondWatermark = 50;
    static const int32_t kSpamFirstWatermark = 125;
    static const int32_t kSpamSecondWatermark = 250;
    // Time window to aggregate the data in. Once the data point's startTime
    // is older than the Aggregation window, we will allow the data to be sent.
    static const int64_t kSpamAggregationWindowSec = 5;
    // Time window (in seconds) for aggregating per-second call counts.
    // Data for a specific second (startTimeSec) is processed for spam detection
    // and to update latency-related 'secondsWithAtLeastXCalls' counts
    // once 'nowSec - startTimeSec >= kAggregationWindowSec'.
    static const int64_t kAggregationWindowSec = 5;

    // KeyEqual function for Binder Spam aggregation of BinderCallData
    struct SpamStatsKeyEqual {
@@ -65,9 +69,17 @@ private:
        }
    };

    using SpamStatsMap =
    // Metrics aggregated for reporting.
    struct AidlTargetMetrics {
        AidlTargetMetrics() = default;
        uint32_t totalCalls = 0;
        uint32_t callsWithLatency = 0;
        int64_t durationSumMicros = 0;
    };

    using StatsBufferMap =
            std::unordered_map<BinderCallData,
                               std::unordered_map<int64_t /*startTimeSec*/, uint32_t /*count*/>,
                               std::unordered_map<int64_t /*startTimeSec*/, AidlTargetMetrics>,
                               SpamStatsKeyHash,   // Hash
                               SpamStatsKeyEqual>; // KeyEqual

@@ -78,11 +90,12 @@ private:
    // time of last check
    int64_t mServiceCheckTimeSec = -kCheckServiceTimeoutSec - 1;
    // Aggregates binder transaction data into BinderSpamReport objects.
    void aggregateBinderSpamLocked(const std::vector<BinderCallData>& data,
    void aggregateStatsLocked(const std::vector<BinderCallData>& data,
                              const sp<os::IStatsBootstrapAtomService>& service,
                              const int64_t nowSec);

    // The stats which are not sent to StatsBootStrap
    SpamStatsMap mSpamStatsBuffer;
    StatsBufferMap mStatsBuffer;
};

} // namespace android
+2 −0
Original line number Diff line number Diff line
@@ -63,6 +63,8 @@ struct BinderCallData {
    int64_t startTimeNanos;
    int64_t endTimeNanos;
    uid_t senderUid;

    bool hasLatencyData() const { return endTimeNanos > startTimeNanos; }
};

} // namespace android
 No newline at end of file
+158 −3
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ using namespace testing;

// --- Mocks ---
constexpr int64_t kSpamAggregationWindowSec = 5;
constexpr int64_t kLatencyAggregationWindowSec = 5; // Same as spam for now
// Mock for IStatsBootstrapAtomService
class MockStatsBootstrapAtomService : public os::BnStatsBootstrapAtomService {
public:
@@ -63,11 +64,22 @@ BinderCallData createStatsData(uid_t uid, uint32_t code, const char* desc, int64
    return {.interfaceDescriptor = String16(desc),
            .transactionCode = code,
            .startTimeNanos = startNanos,
            .endTimeNanos = startNanos + 1000,
            .endTimeNanos = 0,
            .senderUid = uid};
}

// Helper function to create a StatsBootstrapAtom for comparison
BinderCallData createStatsData(uid_t uid, const char* desc, uint32_t code, int64_t startNanos,
                               int64_t endNanos) {
    return BinderCallData{
            .interfaceDescriptor = String16(desc),
            .transactionCode = code,
            .startTimeNanos = startNanos,
            .endTimeNanos = endNanos,
            .senderUid = uid,
    };
}

// Helper function to create a StatsBootstrapAtom for spam comparison
os::StatsBootstrapAtom createExpectedAtom(const BinderCallData& datum, int count125, int count250) {
    auto atom = os::StatsBootstrapAtom();
    // TODO: use directly from stats/atoms/framework/framework_extension_atoms.proto
@@ -81,6 +93,25 @@ os::StatsBootstrapAtom createExpectedAtom(const BinderCallData& datum, int count
    return atom;
}

// Helper function to create a StatsBootstrapAtom for latency comparison
os::StatsBootstrapAtom createExpectedLatencyAtom(const BinderCallData& datum, int64_t callCount,
                                                 int64_t durationSumMicros,
                                                 int32_t secondsWithAtLeast10Calls,
                                                 int32_t secondsWithAtLeast50Calls) {
    auto atom = os::StatsBootstrapAtom();
    // TODO: use directly from stats/atoms/framework/framework_extension_atoms.proto
    atom.atomId = 1090; // kBinderLatencyAtomId
    atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(datum.senderUid)));
    atom.values.push_back(createPrimitiveValue(static_cast<int64_t>(getuid()))); // host uid
    atom.values.push_back(createPrimitiveValue(datum.interfaceDescriptor));
    atom.values.push_back(createPrimitiveValue(std::to_string(datum.transactionCode)));
    atom.values.push_back(createPrimitiveValue(callCount));
    atom.values.push_back(createPrimitiveValue(durationSumMicros));
    atom.values.push_back(createPrimitiveValue(secondsWithAtLeast10Calls));
    atom.values.push_back(createPrimitiveValue(secondsWithAtLeast50Calls));
    return atom;
}

// Matcher for StatsBootstrapAtom
MATCHER_P(StatsAtomEq, expectedAtom, "") {
    if (arg.atomId != expectedAtom.atomId) return false;
@@ -135,7 +166,8 @@ TEST_F(BinderStatsPusherTest, AggregateSpamNoSpamBelowThreshold) {
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec); //  pushLocked calls aggregateBinderSpamLocked
    pusher.pushLocked(data, currentTimeSec); //  pushLocked calls
                                             //  aggregateBinderSpamLocked
}

TEST_F(BinderStatsPusherTest, AggregateSpamOneSecondSpam) {
@@ -339,3 +371,126 @@ TEST_F(BinderStatsPusherTest, DataNotDroppedWhenPushIsSkippedThenSucceeds) {

    pusher.pushLocked(spamCallData2, timeSec2);
}

// --- Latency Tests ---

TEST_F(BinderStatsPusherTest, AggregateLatencyNoLatencyBelowThreshold) {
    std::vector<BinderCallData> data;
    int64_t currentTimeSec = 14;
    // Create data within the delay window
    for (int i = 0; i < 5; ++i) { // Less than kLatencyCountFirstWatermark (10)
        data.push_back(
                createStatsData(2001, "ILatencyFoo", 1,
                                (currentTimeSec - kLatencyAggregationWindowSec + 1) * 1000000000LL,
                                (currentTimeSec - kLatencyAggregationWindowSec + 1) * 1000000000LL +
                                        1000000 /* 1ms */));
    }

    // Expect no calls to reportBootstrapAtom
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyOneSecondLatency) {
    std::vector<BinderCallData> data;
    int64_t callTimeNanos = 2'000'000'000LL; // 2s
    int64_t currentTimeSec =
            (callTimeNanos / 1000'000'000LL) + kLatencyAggregationWindowSec + 1; // 2 + 5 + 1 = 8s
    int64_t totalDurationMicros = 0;

    // Create enough data in the *same second* to trigger latency reporting
    for (int i = 0; i < 15; ++i) {              // More than kLatencyCountFirstWatermark (10)
        int64_t durationMicros = (i + 1) * 100; // e.g. 100us, 200us ..
        data.push_back(createStatsData(2002, "ILatencyBar", 2, callTimeNanos,
                                       callTimeNanos + durationMicros * 1000));
        totalDurationMicros += durationMicros;
    }

    auto expectedAtom = createExpectedLatencyAtom(data[0], 15, totalDurationMicros, 1, 0);
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(StatsAtomEq(expectedAtom))).Times(1);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyDelayedLatency) {
    std::vector<BinderCallData> data;
    int64_t currentTimeNanos = 9'100'000'000;

    // Create latency data within the aggregation window
    for (int i = 0; i < 15; ++i) {
        data.push_back(createStatsData(2003, "ILatencyBaz", 3, currentTimeNanos - 1000'000'000,
                                       currentTimeNanos - 1000'000'000 + 500000 /* 0.5ms */));
    }

    // Expect no calls, data is delayed
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);
    pusher.pushLocked(data, currentTimeNanos / 1000'000'000);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyProcessesDelayedDataOnSubsequentCall) {
    std::vector<BinderCallData> callData1;
    int64_t callTimeSec1 = 10;
    int64_t latencyDataTimeNanos = (callTimeSec1 - 2) * 1000'000'000LL; // 8s, will be delayed
    int64_t totalDurationMicros1 = 0;

    for (int i = 0; i < 15; ++i) {
        int64_t durationMicros = (i + 1) * 150;
        callData1.push_back(createStatsData(2004, "ILatencyDelayed", 4, latencyDataTimeNanos,
                                            latencyDataTimeNanos + durationMicros * 1000));
        totalDurationMicros1 += durationMicros;
    }

    // First push: data should be buffered as it's too recent
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(_)).Times(0);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);
    pusher.pushLocked(callData1, callTimeSec1);

    // Second push: advance time so the previous data is now outside the aggregation window
    std::vector<BinderCallData> callData2;                                    // Can be empty
    int64_t call2_time_sec = callTimeSec1 + kLatencyAggregationWindowSec + 1; // 10 + 5 + 1 = 16s

    auto expectedAtom = createExpectedLatencyAtom(callData1[0], 15, totalDurationMicros1, 1, 0);
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(StatsAtomEq(expectedAtom))).Times(1);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(callData2, call2_time_sec);
}

TEST_F(BinderStatsPusherTest, AggregateLatencyMultipleSeconds) {
    std::vector<BinderCallData> data;
    int64_t firstCallSecondNanos = 2'000'000'000LL;  // 2s
    int64_t secondCallSecondNanos = 3'000'000'000LL; // 3s
    int64_t currentTimeSec = (secondCallSecondNanos / 1000'000'000LL) +
            kLatencyAggregationWindowSec + 1; // 3 + 5 + 1 = 9s
    int64_t totalDurationMicros = 0;

    // Data for the first second
    for (int i = 0; i < 12; ++i) { // 12 calls
        int64_t durationMicros = (i + 1) * 100;
        data.push_back(createStatsData(2005, "ILatencyMultiSec", 5, firstCallSecondNanos,
                                       firstCallSecondNanos + durationMicros * 1000));
        totalDurationMicros += durationMicros;
    }
    // Data for the second second
    for (int i = 0; i < 8; ++i) { // 8 calls
        int64_t durationMicros = (i + 1) * 120;
        // Use the same UID, code, desc for aggregation
        data.push_back(createStatsData(2005, "ILatencyMultiSec", 5, secondCallSecondNanos,
                                       secondCallSecondNanos + durationMicros * 1000));
        totalDurationMicros += durationMicros;
    }

    // Expect one atom representing aggregated data across 2 seconds
    // Total calls = 12 + 8 = 20
    // secondsWithAtLeast10Calls = 1 (only the first second has >= 10 calls)
    // secondsWithAtLeast50Calls = 0
    auto expectedAtom = createExpectedLatencyAtom(data[0], 20, totalDurationMicros, 1, 0);
    EXPECT_CALL(*mockStatsService, reportBootstrapAtom(StatsAtomEq(expectedAtom))).Times(1);
    EXPECT_CALL(*mockStatsService, localBinder()).Times(1);

    pusher.pushLocked(data, currentTimeSec);
}