Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 83e0e84d authored by Christopher Ferris's avatar Christopher Ferris
Browse files

Make stack dumping errors display better.

Currently, the code first checks if a process is cached
and then checks if the process should be backtraced. For
example, the code should not backtrace the zygote process.
This CL moves the cached process check until after the
check to see if the process should be backtraced. Before
this change, the zygote was always in the list of processes,
and is always cached so it shows up as this anonymous
process that can't be backtraced.  After this change, since
the zygote is not supposed to be backtraced, it is not present.

For any pids that would have been dumped but are bypassed or have
an error, make the header include the timestamp and the command line.

Improve the message about why a cached process is skipped.

For an unwind that fails:
- If it's a java process, try a native unwind.
- Display the time it took for the failed unwind.
- Check if the process is now cached and display that error message.

Bug: 339473422

Test: Trigger bugreport and verify everything continues to work.
Test: Fake an unwind failure and verify the output.
Test: Fake an unwind failure and fake a cached process and verify the output.
Test: Fake an unwind failure for a java thread, and do a native unwind.
Test: Ran the bugreports from above through the bugreport parser.
Change-Id: I3082f2ac619e179621ad3ff39f2e839d000b2576
parent 77cd3a2b
Loading
Loading
Loading
Loading
+91 −18
Original line number Diff line number Diff line
@@ -2186,6 +2186,74 @@ static void DumpstateOnboardingOnly() {
    ds.AddDir(LOGPERSIST_DATA_DIR, false);
}

static std::string GetTimestamp(const timespec& ts) {
    tm tm;
    localtime_r(&ts.tv_sec, &tm);

    // Reserve enough space for the entire time string, includes the space
    // for the '\0' to make the calculations below easier by using size for
    // the total string size.
    std::string str(sizeof("1970-01-01 00:00:00.123456789+0830"), '\0');
    size_t n = strftime(str.data(), str.size(), "%F %H:%M", &tm);
    if (n == 0) {
        return "TIMESTAMP FAILURE";
    }
    int num_chars = snprintf(&str[n], str.size() - n, ":%02d.%09ld", tm.tm_sec, ts.tv_nsec);
    if (num_chars > str.size() - n) {
        return "TIMESTAMP FAILURE";
    }
    n += static_cast<size_t>(num_chars);
    if (strftime(&str[n], str.size() - n, "%z", &tm) == 0) {
        return "TIMESTAMP FAILURE";
    }
    return str;
}

static std::string GetCmdline(pid_t pid) {
    std::string cmdline;
    if (!android::base::ReadFileToString(android::base::StringPrintf("/proc/%d/cmdline", pid),
                                         &cmdline)) {
        return "UNKNOWN";
    }
    // There are '\0' terminators between arguments, convert them to spaces.
    // But start by skipping all trailing '\0' values.
    size_t cur = cmdline.size() - 1;
    while (cur != 0 && cmdline[cur] == '\0') {
        cur--;
    }
    if (cur == 0) {
        return "UNKNOWN";
    }
    while ((cur = cmdline.rfind('\0', cur)) != std::string::npos) {
        cmdline[cur] = ' ';
    }
    return cmdline;
}

static void DumpPidHeader(int fd, pid_t pid, const timespec& ts) {
    // For consistency, the header to this message matches the one
    // dumped by debuggerd.
    dprintf(fd, "\n----- pid %d at %s -----\n", pid, GetTimestamp(ts).c_str());
    dprintf(fd, "Cmd line: %s\n", GetCmdline(pid).c_str());
}

static void DumpPidFooter(int fd, pid_t pid) {
    // For consistency, the footer to this message matches the one
    // dumped by debuggerd.
    dprintf(fd, "----- end %d -----\n", pid);
}

static bool DumpBacktrace(int fd, pid_t pid, bool is_java_process) {
    int ret = dump_backtrace_to_file_timeout(
        pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd);
    if (ret == -1 && is_java_process) {
        // Tried to unwind as a java process, try a native unwind.
        dprintf(fd, "Java unwind failed for pid %d, trying a native unwind.\n", pid);
        ret = dump_backtrace_to_file_timeout(pid, kDebuggerdNativeBacktrace, 3, fd);
    }
    return ret != -1;
}

Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
    const std::string temp_file_pattern = ds.bugreport_internal_dir_ + "/dumptrace_XXXXXX";
    const size_t buf_size = temp_file_pattern.length() + 1;
@@ -2234,16 +2302,6 @@ Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
            continue;
        }

        // Skip cached processes.
        if (IsCached(pid)) {
            // For consistency, the header and footer to this message match those
            // dumped by debuggerd in the success case.
            dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid);
            dprintf(fd, "Dump skipped for cached process.\n");
            dprintf(fd, "---- end %d ----", pid);
            continue;
        }

        const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
        std::string exe;
        if (!android::base::Readlink(link_name, &exe)) {
@@ -2272,16 +2330,31 @@ Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
            break;
        }

        timespec start_timespec;
        clock_gettime(CLOCK_REALTIME, &start_timespec);
        if (IsCached(pid)) {
            DumpPidHeader(fd, pid, start_timespec);
            dprintf(fd, "Process is cached, skipping backtrace due to high chance of timeout.\n");
            DumpPidFooter(fd, pid);
            continue;
        }

        const uint64_t start = Nanotime();
        const int ret = dump_backtrace_to_file_timeout(
            pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd);
        if (!DumpBacktrace(fd, pid, is_java_process)) {
            if (IsCached(pid)) {
                DumpPidHeader(fd, pid, start_timespec);
                dprintf(fd, "Backtrace failed, but process has become cached.\n");
                DumpPidFooter(fd, pid);
                continue;
            }

        if (ret == -1) {
            // For consistency, the header and footer to this message match those
            // dumped by debuggerd in the success case.
            dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid);
            dprintf(fd, "Dump failed, likely due to a timeout.\n");
            dprintf(fd, "---- end %d ----", pid);
            DumpPidHeader(fd, pid, start_timespec);
            dprintf(fd, "Backtrace gathering failed, likely due to a timeout.\n");
            DumpPidFooter(fd, pid);

            dprintf(fd, "\n[dump %s stack %d: %.3fs elapsed]\n",
                    is_java_process ? "dalvik" : "native", pid,
                    (float)(Nanotime() - start) / NANOS_PER_SEC);
            timeout_failures++;
            continue;
        }