Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca9c1ad8 authored by Mike Yu's avatar Mike Yu
Browse files

Calculate the stats rtt based on the elapsed time since socket creation time

With keep_listening_udp enabled, it's possible to receive a DNS
response from a previous DNS server. Current calculation of the
stats rtt of DNS response is no longer correct. It's not trivial
to change the current implementation to make the rtt exact
correct because we are unable to know whether a received DNS
answer is a reponse of the first DNS query or a resent DNS query.
For simplicity, we always assume that it's the response of the
first DNS query, and calculate the rtt based on the elapsed time
since the socket was created.

After this CL, every res_sample's rtt field is supposed to be set
with a positive value, while it used to be set with a positive
value only when a DNS response is received. This change should not
be a problem because only the rtt of the res_sample with rcode
NOERROR, NOTAUTH, and NXDOMAIN, which must be from a received DNS
response, is considered effective. (See
android_net_res_stats_aggregate() for more details)

Apart from that, the avg rtt of the stats might be larger than it
used to be. It should not be a problem either because none of the
current features/functions rely on avg rtt.

Only the stats is involved in this CL, not including the metrics.

Bug: 271406438
Test: atest
Change-Id: I3c25ba77c256c5a758d66a1127c397bfd6ab9e90
parent 136ffc17
Loading
Loading
Loading
Loading
+17 −19
Original line number Diff line number Diff line
@@ -156,10 +156,9 @@ const std::vector<IPSockAddr> mdns_addrs = {IPSockAddr::toIPSockAddr("ff02::fb",

static int setupUdpSocket(ResState* statp, const sockaddr* sockap, unique_fd* fd_out, int* terrno);
static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
                   int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, int* rcode,
                   int* delay);
                   int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, int* rcode);
static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
                   int* terrno, size_t ns, int* rcode, int* delay);
                   int* terrno, size_t ns, int* rcode);
static int send_mdns(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* terrno,
                     int* rcode);
static void dump_error(const char*, const struct sockaddr*);
@@ -173,6 +172,7 @@ static int res_private_dns_send(ResState*, const Slice query, const Slice answer
static int res_tls_send(const std::list<DnsTlsServer>& tlsServers, ResState*, const Slice query,
                        const Slice answer, int* rcode, PrivateDnsMode mode);
static ssize_t res_doh_send(ResState*, const Slice query, const Slice answer, int* rcode);
static int elapsedTimeInMs(const timespec& from);

NsType getQueryType(span<const uint8_t> msg) {
    ns_msg handle;
@@ -597,7 +597,8 @@ int res_nsend(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int*
            if (useTcp) {
                // TCP; at most one attempt per server.
                attempt = retryTimes;
                resplen = send_vc(statp, &params, msg, ans, &terrno, ns, rcode, &delay);
                resplen = send_vc(statp, &params, msg, ans, &terrno, ns, rcode);
                delay = elapsedTimeInMs(statp->tcp_nssock_ts);

                if (msg.size() <= PACKETSZ && resplen <= 0 &&
                    statp->tc_mode == aidl::android::net::IDnsResolver::TC_MODE_UDP_TCP) {
@@ -609,7 +610,8 @@ int res_nsend(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int*
            } else {
                // UDP
                resplen = send_dg(statp, &params, msg, ans, &terrno, &actualNs, &useTcp,
                                  &gotsomewhere, rcode, &delay);
                                  &gotsomewhere, rcode);
                delay = elapsedTimeInMs(statp->udpsocks_ts[actualNs]);
                fallbackTCP = useTcp ? true : false;
                retry_count_for_event = attempt;
                LOG(INFO) << __func__ << ": used send_dg " << resplen << " terrno: " << terrno;
@@ -643,9 +645,6 @@ int res_nsend(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int*
                if (!isNetworkRestricted(terrno)) {
                    res_sample sample;
                    res_stats_set_sample(&sample, query_time, *rcode, delay);
                    // KeepListening UDP mechanism is incompatible with usable_servers of legacy
                    // stats, so keep the old logic for now.
                    // TODO: Replace usable_servers of legacy stats with new one.
                    resolv_cache_add_resolver_stats_sample(statp->netid, revision_id,
                                                           receivedServerAddr, sample,
                                                           params.max_samples);
@@ -707,8 +706,7 @@ static struct timespec get_timeout(ResState* statp, const res_params* params, co
}

static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
                   int* terrno, size_t ns, int* rcode, int* delay) {
    *delay = 0;
                   int* terrno, size_t ns, int* rcode) {
    const HEADER* hp = (const HEADER*)(const void*)msg.data();
    HEADER* anhp = (HEADER*)(void*)ans.data();
    struct sockaddr* nsap;
@@ -733,8 +731,6 @@ static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg,
same_ns:
    truncating = 0;

    struct timespec start_time = evNowTime();

    /* Are we still talking to whom we want to talk to? */
    if (statp->tcp_nssock >= 0 && (statp->flags & RES_F_VC) != 0) {
        struct sockaddr_storage peer;
@@ -765,6 +761,7 @@ same_ns:
                    return -1;
            }
        }
        statp->tcp_nssock_ts = evNowTime();
        const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
        resolv_tag_socket(statp->tcp_nssock, uid, statp->pid);
        if (statp->mark != MARK_UNSET) {
@@ -910,8 +907,6 @@ read_len:
     * next nameserver ought not be tried.
     */
    if (resplen > 0) {
        struct timespec done = evNowTime();
        *delay = res_stats_calculate_rtt(&done, &start_time);
        *rcode = anhp->rcode;
    }
    *terrno = 0;
@@ -1088,8 +1083,7 @@ static int setupUdpSocket(ResState* statp, const sockaddr* sockap, unique_fd* fd
}

static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
                   int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, int* rcode,
                   int* delay) {
                   int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, int* rcode) {
    // It should never happen, but just in case.
    if (*ns >= statp->nsaddrs.size()) {
        LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
@@ -1097,13 +1091,13 @@ static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg,
        return -1;
    }

    *delay = 0;
    const sockaddr_storage ss = statp->nsaddrs[*ns];
    const sockaddr* nsap = reinterpret_cast<const sockaddr*>(&ss);

    if (statp->udpsocks[*ns] == -1) {
        int result = setupUdpSocket(statp, nsap, &statp->udpsocks[*ns], terrno);
        if (result <= 0) return result;
        statp->udpsocks_ts[*ns] = evNowTime();

        // Use a "connected" datagram socket to receive an ECONNREFUSED error
        // on the next socket operation when the server responds with an
@@ -1182,8 +1176,6 @@ static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg,
                continue;
            }

            timespec done = evNowTime();
            *delay = res_stats_calculate_rtt(&done, &start_time);
            if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
                LOG(DEBUG) << __func__ << ": server rejected query:";
                res_pquery({ans.data(), (resplen > ans.size()) ? ans.size() : resplen});
@@ -1468,3 +1460,9 @@ int resolv_res_nsend(const android_net_context* netContext, span<const uint8_t>
    *rcode = NOERROR;
    return res_nsend(&res, msg, ans, rcode, flags);
}

// Returns the elapsed time in milliseconds since the given time `from`.
int elapsedTimeInMs(const timespec& from) {
    const timespec now = evNowTime();
    return res_stats_calculate_rtt(&now, &from);
}
+4 −0
Original line number Diff line number Diff line
@@ -107,8 +107,10 @@ struct ResState {
        copy.pid = pid;
        copy.search_domains = search_domains;
        copy.nsaddrs = nsaddrs;
        copy.udpsocks_ts = udpsocks_ts;
        copy.ndots = ndots;
        copy.mark = mark;
        copy.tcp_nssock_ts = tcp_nssock_ts;
        copy.flags = flags;
        copy.event = (dnsEvent == nullptr) ? event : dnsEvent;
        copy.netcontext_flags = netcontext_flags;
@@ -134,10 +136,12 @@ struct ResState {
    pid_t pid;                                  // pid of the app that sent the DNS lookup
    std::vector<std::string> search_domains{};  // domains to search
    std::vector<android::netdutils::IPSockAddr> nsaddrs;
    std::array<timespec, MAXNS> udpsocks_ts;    // The creation time of the UDP sockets
    android::base::unique_fd udpsocks[MAXNS];   // UDP sockets to nameservers
    unsigned ndots : 4 = 1;                     // threshold for initial abs. query
    unsigned mark;                              // Socket mark to be used by all DNS query sockets
    android::base::unique_fd tcp_nssock;        // TCP socket (but why not one per nameserver?)
    timespec tcp_nssock_ts = {};                // The creation time of the TCP socket
    uint32_t flags = 0;                         // See RES_F_* defines below
    android::net::NetworkDnsEventReported* event;
    uint32_t netcontext_flags;
+1 −3
Original line number Diff line number Diff line
@@ -6175,9 +6175,7 @@ TEST_F(ResolverTest, KeepListeningUDP) {
                    NameserverStats(listen_addr1)
                            .setSuccesses(cfg.expectedDns1Successes)
                            .setTimeouts(cfg.expectedDns1Timeouts)
                            // TODO(271406438): Fix the latency bug in the stats and correct the
                            // value to be 1500.
                            .setRttAvg(cfg.retryCount == 1 ? 500 : -1),
                            .setRttAvg(cfg.retryCount == 1 ? 1500 : -1),
                    NameserverStats(listen_addr2)
                            .setTimeouts(cfg.expectedDns2Timeouts)
                            .setRttAvg(-1),