Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 57f04172 authored by Luke Huang's avatar Luke Huang Committed by android-build-merger
Browse files

Subsampling DNS events to reduce logs am: 8d246298b7 am: 03ff79ce0f

am: 6ef2820cdc

Change-Id: Ibbe3c653bdf45fc03b56a61e3401fad11c05a010
parents 99f1fe56 ae05cd66
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -203,9 +203,11 @@ cc_test {
    shared_libs: [
        "libbase",
        "libcrypto",
        "libcutils",
        "libssl",
    ],
    static_libs: [
        "libgmock",
        "libnetd_resolv",
        "libnetd_test_dnsresponder",
        "libnetdutils",
+20 −9
Original line number Diff line number Diff line
@@ -32,7 +32,6 @@
#define LOG_TAG "resolv"

#include <algorithm>
#include <list>
#include <vector>

#include <NetdClient.h>  // NETID_USE_LOCAL_NAMESERVERS
@@ -57,6 +56,7 @@
#include "gethnamaddr.h"
#include "netd_resolv/stats.h"  // RCODE_TIMEOUT
#include "res_send.h"
#include "resolv_cache.h"
#include "resolv_private.h"
#include "stats.pb.h"

@@ -305,17 +305,28 @@ void initDnsEvent(NetworkDnsEventReported* event) {
    event->set_res_nsend_flags(-1);
}

// Return 0 if the event should not be logged.
// Otherwise, return subsampling_denom
uint32_t getDnsEventSubsamplingRate(int netid, int returnCode) {
    uint32_t subsampling_denom = resolv_cache_get_subsampling_denom(netid, returnCode);
    if (subsampling_denom == 0) return 0;
    // Sample the event with a chance of 1 / denom.
    return (arc4random_uniform(subsampling_denom) == 0) ? subsampling_denom : 0;
}

void reportDnsEvent(int eventType, const android_net_context& netContext, int latencyUs,
                    int returnCode, NetworkDnsEventReported& event, const std::string& query_name,
                    const std::vector<std::string>& ip_addrs = {}, int total_ip_addr_count = 0) {
    if (uint32_t rate = getDnsEventSubsamplingRate(netContext.dns_netid, returnCode)) {
        const std::string& dnsQueryStats = event.dns_query_events().SerializeAsString();
        stats::BytesField dnsQueryBytesField{dnsQueryStats.c_str(), dnsQueryStats.size()};
        event.set_return_code(static_cast<ReturnCode>(returnCode));
    android::net::stats::stats_write(
            android::net::stats::NETWORK_DNS_EVENT_REPORTED, event.event_type(),
            event.return_code(), event.latency_micros(), event.hints_ai_flags(),
            event.res_nsend_flags(), event.network_type(), event.private_dns_modes(),
            dnsQueryBytesField, event.sampling_rate_denom());
        android::net::stats::stats_write(android::net::stats::NETWORK_DNS_EVENT_REPORTED,
                                         event.event_type(), event.return_code(),
                                         event.latency_micros(), event.hints_ai_flags(),
                                         event.res_nsend_flags(), event.network_type(),
                                         event.private_dns_modes(), dnsQueryBytesField, rate);
    }

    const auto& listeners = ResolverEventReporter::getInstance().getListeners();
    if (listeners.size() == 0) {
+2 −0
Original line number Diff line number Diff line
@@ -306,6 +306,8 @@ void ResolverController::dump(DumpWriter& dw, unsigned netId) {
        if (servers.empty()) {
            dw.println("No DNS servers defined");
        } else {
            dw.println("DnsEvent subsampling map: " +
                       android::base::Join(resolv_cache_dump_subsampling_map(netId), ' '));
            dw.println(
                    "DNS servers: # IP (total, successes, errors, timeouts, internal errors, "
                    "RTT avg, last sample)");
+84 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <mutex>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>

#include <arpa/inet.h>
@@ -50,6 +51,7 @@

#include <android-base/logging.h>
#include <android-base/parseint.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>
#include <android-base/thread_annotations.h>
#include <android/multinetwork.h>  // ResNsendFlags
@@ -1148,6 +1150,8 @@ struct resolv_cache_info {
    struct res_stats nsstats[MAXNS];
    std::vector<std::string> search_domains;
    int wait_for_pending_req_timeout_count;
    // Map format: ReturnCode:rate_denom
    std::unordered_map<int, uint32_t> dns_event_subsampling_map;
};

// A helper class for the Clang Thread Safety Analysis to deal with
@@ -1606,6 +1610,49 @@ bool resolv_has_nameservers(unsigned netid) {
    return (info != nullptr) && (info->nscount > 0);
}

namespace {

// Map format: ReturnCode:rate_denom
// if the ReturnCode is not associated with any rate_denom, use default
// Sampling rate varies by return code; events to log are chosen randomly, with a
// probability proportional to the sampling rate.
constexpr const char DEFAULT_SUBSAMPLING_MAP[] = "default:1 0:100 7:10";

std::unordered_map<int, uint32_t> resolv_get_dns_event_subsampling_map() {
    using android::base::ParseInt;
    using android::base::ParseUint;
    using android::base::Split;
    using server_configurable_flags::GetServerConfigurableFlag;
    std::unordered_map<int, uint32_t> sampling_rate_map{};
    std::vector<std::string> subsampling_vector =
            Split(GetServerConfigurableFlag("netd_native", "dns_event_subsample_map",
                                            DEFAULT_SUBSAMPLING_MAP),
                  " ");
    for (const auto& pair : subsampling_vector) {
        std::vector<std::string> rate_denom = Split(pair, ":");
        int return_code;
        uint32_t denom;
        if (rate_denom.size() != 2) {
            LOG(ERROR) << __func__ << ": invalid subsampling_pair = " << pair;
            continue;
        }
        if (rate_denom[0] == "default") {
            return_code = DNSEVENT_SUBSAMPLING_MAP_DEFAULT_KEY;
        } else if (!ParseInt(rate_denom[0], &return_code)) {
            LOG(ERROR) << __func__ << ": parse subsampling_pair failed = " << pair;
            continue;
        }
        if (!ParseUint(rate_denom[1], &denom)) {
            LOG(ERROR) << __func__ << ": parse subsampling_pair failed = " << pair;
            continue;
        }
        sampling_rate_map[return_code] = denom;
    }
    return sampling_rate_map;
}

}  // namespace

static int resolv_create_cache_for_net_locked(unsigned netid) {
    resolv_cache* cache = find_named_cache_locked(netid);
    // Should not happen
@@ -1623,6 +1670,7 @@ static int resolv_create_cache_for_net_locked(unsigned netid) {
    }
    cache_info->cache = cache;
    cache_info->netid = netid;
    cache_info->dns_event_subsampling_map = resolv_get_dns_event_subsampling_map();
    insert_cache_info_locked(cache_info);

    return 0;
@@ -1963,6 +2011,42 @@ int android_net_res_stats_get_info_for_net(unsigned netid, int* nscount,
    return revision_id;
}

std::vector<std::string> resolv_cache_dump_subsampling_map(unsigned netid) {
    using android::base::StringPrintf;
    std::lock_guard guard(cache_mutex);
    resolv_cache_info* cache_info = find_cache_info_locked(netid);
    if (cache_info == nullptr) return {};
    std::vector<std::string> result;
    for (const auto& pair : cache_info->dns_event_subsampling_map) {
        result.push_back(StringPrintf("%s:%d",
                                      (pair.first == DNSEVENT_SUBSAMPLING_MAP_DEFAULT_KEY)
                                              ? "default"
                                              : std::to_string(pair.first).c_str(),
                                      pair.second));
    }
    return result;
}

// Decides whether an event should be sampled using a random number generator and
// a sampling factor derived from the netid and the return code.
//
// Returns the subsampling rate if the event should be sampled, or 0 if it should be discarded.
uint32_t resolv_cache_get_subsampling_denom(unsigned netid, int return_code) {
    std::lock_guard guard(cache_mutex);
    resolv_cache_info* cache_info = find_cache_info_locked(netid);
    if (cache_info == nullptr) return 0;  // Don't log anything at all.
    const auto& subsampling_map = cache_info->dns_event_subsampling_map;
    auto search_returnCode = subsampling_map.find(return_code);
    uint32_t denom;
    if (search_returnCode != subsampling_map.end()) {
        denom = search_returnCode->second;
    } else {
        auto search_default = subsampling_map.find(DNSEVENT_SUBSAMPLING_MAP_DEFAULT_KEY);
        denom = (search_default == subsampling_map.end()) ? 0 : search_default->second;
    }
    return denom;
}

int resolv_cache_get_resolver_stats(unsigned netid, res_params* params, res_stats stats[MAXNS]) {
    std::lock_guard guard(cache_mutex);
    resolv_cache_info* info = find_cache_info_locked(netid);
+73 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

#include <gtest/gtest.h>
#include <netdb.h>

#include <array>
#include <atomic>
@@ -25,6 +25,9 @@
#include <android-base/logging.h>
#include <android-base/stringprintf.h>
#include <android/multinetwork.h>
#include <cutils/properties.h>
#include <gmock/gmock-matchers.h>
#include <gtest/gtest.h>

#include "dns_responder/dns_responder.h"
#include "netd_resolv/stats.h"
@@ -711,6 +714,75 @@ TEST_F(ResolvCacheTest, GetStats) {
    expectCacheStats("GetStats", TEST_NETID, cacheStats);
}

namespace {

constexpr int EAI_OK = 0;
constexpr char DNS_EVENT_SUBSAMPLING_MAP_FLAG[] =
        "persist.device_config.netd_native.dns_event_subsample_map";

class ScopedCacheCreate {
  public:
    explicit ScopedCacheCreate(unsigned netid, const char* subsampling_map,
                               const char* property = DNS_EVENT_SUBSAMPLING_MAP_FLAG)
        : mStoredNetId(netid), mStoredProperty(property) {
        property_get(property, mStoredMap, "");
        property_set(property, subsampling_map);
        EXPECT_EQ(0, resolv_create_cache_for_net(netid));
    }
    ~ScopedCacheCreate() {
        resolv_delete_cache_for_net(mStoredNetId);
        property_set(mStoredProperty, mStoredMap);
    }

  private:
    unsigned mStoredNetId;
    const char* mStoredProperty;
    char mStoredMap[PROPERTY_VALUE_MAX]{};
};

}  // namespace

TEST_F(ResolvCacheTest, DnsEventSubsampling) {
    // Test defaults, default flag is "default:1 0:100 7:10" if no experiment flag is set
    {
        ScopedCacheCreate scopedCacheCreate(TEST_NETID, "");
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_NODATA), 10U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_OK), 100U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_BADFLAGS),
                  1U);  // default
        EXPECT_THAT(resolv_cache_dump_subsampling_map(TEST_NETID),
                    testing::UnorderedElementsAreArray({"default:1", "0:100", "7:10"}));
    }
    // Now change the experiment flag to "0:42 default:666"
    {
        ScopedCacheCreate scopedCacheCreate(TEST_NETID, "0:42 default:666");
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_OK), 42U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_NODATA),
                  666U);  // default
        EXPECT_THAT(resolv_cache_dump_subsampling_map(TEST_NETID),
                    testing::UnorderedElementsAreArray({"default:666", "0:42"}));
    }
    // Now change the experiment flag to something illegal
    {
        ScopedCacheCreate scopedCacheCreate(TEST_NETID, "asvaxx");
        // 0(disable log) is the default value if experiment flag is invalid.
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_OK), 0U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_NODATA), 0U);
        EXPECT_TRUE(resolv_cache_dump_subsampling_map(TEST_NETID).empty());
    }
    // Test negative and zero denom
    {
        ScopedCacheCreate scopedCacheCreate(TEST_NETID, "0:-42 default:-666 7:10 10:0");
        // 0(disable log) is the default value if no valid denom is set
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_OK), 0U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_BADFLAGS), 0U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_NODATA), 10U);
        EXPECT_EQ(resolv_cache_get_subsampling_denom(TEST_NETID, EAI_SOCKTYPE), 0U);
        EXPECT_THAT(resolv_cache_dump_subsampling_map(TEST_NETID),
                    testing::UnorderedElementsAreArray({"7:10", "10:0"}));
    }
}

// TODO: Tests for struct resolv_cache_info, including:
//     - res_params
//         -- resolv_cache_get_resolver_stats()
Loading