Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8f788290 authored by Narayan Kamath's avatar Narayan Kamath
Browse files

dumpstate: use tombstoned/debuggerd for java traces.

- debuggerd_dump_traces now supports Java traces as well, so
  use that when dalvik.vm.stack-trace-dir is set. The output
  FD we use for the intercept is currently a regular file
  written by mkostemp, but this can be cleaned up once the
  old way of doing things is removed.

- We're no longer writing traces to a global trace file, so
  add bug report entries for all java traces written in the past
  30 minutes just as we do for native traces.

The minor refactoring that this CL undertakes was done under the
assumption that the old way of doing things will be removed in the
near future.

Bug: 32064548
Test: manual

Change-Id: I8af6f0a644115296dc41affc3b7cd98a2db32c48
parent a1af5b5e
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -32,6 +32,12 @@
    ALOGI(__VA_ARGS__);
#endif

#ifndef MYLOGW
#define MYLOGW(...)               \
    fprintf(stderr, __VA_ARGS__); \
    ALOGW(__VA_ARGS__);
#endif

#ifndef MYLOGE
#define MYLOGE(...)               \
    fprintf(stderr, __VA_ARGS__); \
+177 −84
Original line number Diff line number Diff line
@@ -21,13 +21,9 @@
#include <fcntl.h>
#include <libgen.h>
#include <limits.h>
#include <memory>
#include <regex>
#include <set>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string.h>
#include <sys/prctl.h>
#include <sys/resource.h>
@@ -35,6 +31,11 @@
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>
#include <memory>
#include <regex>
#include <set>
#include <string>
#include <vector>

#include <android-base/file.h>
#include <android-base/properties.h>
@@ -78,19 +79,29 @@ void add_mountinfo();
#define LOGPERSIST_DATA_DIR "/data/misc/logd"
#define PROFILE_DATA_DIR_CUR "/data/misc/profiles/cur"
#define PROFILE_DATA_DIR_REF "/data/misc/profiles/ref"
#define TOMBSTONE_DIR "/data/tombstones"
#define TOMBSTONE_FILE_PREFIX TOMBSTONE_DIR "/tombstone_"
/* Can accomodate a tombstone number up to 9999. */
#define TOMBSTONE_MAX_LEN (sizeof(TOMBSTONE_FILE_PREFIX) + 4)
#define NUM_TOMBSTONES  10
#define WLUTIL "/vendor/xbin/wlutil"

typedef struct {
  char name[TOMBSTONE_MAX_LEN];
// TODO(narayan): Since this information has to be kept in sync
// with tombstoned, we should just put it in a common header.
//
// File: system/core/debuggerd/tombstoned/tombstoned.cpp
static const std::string TOMBSTONE_DIR = "/data/tombstones";
static const std::string TOMBSTONE_FILE_PREFIX = "/data/tombstones/tombstone_";
static const std::string ANR_DIR = "/data/anr";
static const std::string ANR_FILE_PREFIX = "/data/anr/anr_";

struct DumpData {
    std::string name;
    int fd;
} tombstone_data_t;
    time_t mtime;
};

static bool operator<(const DumpData& d1, const DumpData& d2) {
    return d1.mtime < d2.mtime;
}

static tombstone_data_t tombstone_data[NUM_TOMBSTONES];
static std::unique_ptr<std::vector<DumpData>> tombstone_data;
static std::unique_ptr<std::vector<DumpData>> anr_data;

// TODO: temporary variables and functions used during C++ refactoring
static Dumpstate& ds = Dumpstate::GetInstance();
@@ -122,23 +133,73 @@ static constexpr char PROPERTY_EXTRA_DESCRIPTION[] = "dumpstate.options.descript

static const CommandOptions AS_ROOT_20 = CommandOptions::WithTimeout(20).AsRoot().Build();

/* gets the tombstone data, according to the bugreport type: if zipped, gets all tombstones;
 * otherwise, gets just those modified in the last half an hour. */
static void get_tombstone_fds(tombstone_data_t data[NUM_TOMBSTONES]) {
    time_t thirty_minutes_ago = ds.now_ - 60 * 30;
    for (size_t i = 0; i < NUM_TOMBSTONES; i++) {
        snprintf(data[i].name, sizeof(data[i].name), "%s%02zu", TOMBSTONE_FILE_PREFIX, i);
        int fd = TEMP_FAILURE_RETRY(open(data[i].name,
                                         O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_NONBLOCK));
/*
 * Returns a vector of dump fds under |file_prefix|. The returned vector
 * is sorted by the mtimes of the dumps. If |limit_by_mtime| is set, the
 * vector only contains files that were written in the last 30 minutes.
 */
static std::vector<DumpData>* GetDumpFds(const std::string& file_prefix, bool limit_by_mtime) {
    const time_t thirty_minutes_ago = ds.now_ - 60 * 30;

    size_t i = 0;
    std::unique_ptr<std::vector<DumpData>> dump_data(new std::vector<DumpData>());
    while (true) {
        const std::string name = android::base::StringPrintf("%s%02zu", file_prefix.c_str(), i++);
        android::base::unique_fd fd(
            TEMP_FAILURE_RETRY(open(name.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_NONBLOCK)));
        if (fd == -1) {
            if (errno != ENOENT) {
                MYLOGW("Unable to open dump file: %s %s\n", name.c_str(), strerror(errno));
            }

            break;
        }

        struct stat st;
        if (fstat(fd, &st) == 0 && S_ISREG(st.st_mode) && st.st_size > 0 &&
            (ds.IsZipping() || st.st_mtime >= thirty_minutes_ago)) {
            data[i].fd = fd;
        if (fstat(fd, &st) == -1) {
            MYLOGW("Unable to stat dump file: %s %s\n", name.c_str(), strerror(errno));
            continue;
        }

        if (!S_ISREG(st.st_mode)) {
            MYLOGW("Unexpected mode for dump file: %s %x\n", name.c_str(), st.st_mode);
            continue;
        }

        if (limit_by_mtime && st.st_mtime >= thirty_minutes_ago) {
            MYLOGI("Excluding stale dump file: %s\n", name.c_str());
            continue;
        }

        DumpData data = {.name = name, .fd = fd.release(), .mtime = st.st_mtime};

        dump_data->push_back(data);
    }

    std::sort(dump_data->begin(), dump_data->end());

    return dump_data.release();
}

static bool AddDumps(const std::vector<DumpData>& dump_list, const char* type_name,
                     const bool add_to_zip) {
    bool dumped = false;
    for (size_t i = 0; i < dump_list.size(); i++) {
        const std::string& name = dump_list[i].name;
        const int fd = dump_list[i].fd;
        dumped = true;
        if (ds.IsZipping() && add_to_zip) {
            if (!ds.AddZipEntryFromFd(ZIP_ROOT_DIR + name, fd)) {
                MYLOGE("Unable to add %s %s to zip file\n", name.c_str(), type_name);
            }
        } else {
            close(fd);
            data[i].fd = -1;
            dump_file_from_fd(type_name, name.c_str(), fd);
        }

        close(fd);
    }

    return dumped;
}

// for_each_pid() callback to get mount info about a process.
@@ -860,11 +921,10 @@ static void DumpIpTables() {
    RunCommand("IP6TABLES RAW", {"ip6tables", "-t", "raw", "-L", "-nvx"});
}

static void AddAnrTraceFiles() {
    bool add_to_zip = ds.IsZipping() && ds.version_ == VERSION_SPLIT_ANR;
static void AddGlobalAnrTraceFile(const bool add_to_zip, const std::string& anr_traces_file,
                                  const std::string& anr_traces_dir) {
    std::string dump_traces_dir;

    /* show the traces we collected in main(), if that was done */
    if (dump_traces_path != nullptr) {
        if (add_to_zip) {
            dump_traces_dir = dirname(dump_traces_path);
@@ -877,8 +937,6 @@ static void AddAnrTraceFiles() {
        }
    }

    std::string anr_traces_path = android::base::GetProperty("dalvik.vm.stack-trace-file", "");
    std::string anr_traces_dir = dirname(anr_traces_path.c_str());

    // Make sure directory is not added twice.
    // TODO: this is an overzealous check because it's relying on dump_traces_path - which is
@@ -888,54 +946,101 @@ static void AddAnrTraceFiles() {
    // be revisited.
    bool already_dumped = anr_traces_dir == dump_traces_dir;

    MYLOGD("AddAnrTraceFiles(): dump_traces_dir=%s, anr_traces_dir=%s, already_dumped=%d\n",
    MYLOGD("AddGlobalAnrTraceFile(): dump_traces_dir=%s, anr_traces_dir=%s, already_dumped=%d\n",
           dump_traces_dir.c_str(), anr_traces_dir.c_str(), already_dumped);

    if (anr_traces_path.empty()) {
        printf("*** NO VM TRACES FILE DEFINED (dalvik.vm.stack-trace-file)\n\n");
    } else {
    int fd = TEMP_FAILURE_RETRY(
            open(anr_traces_path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_NONBLOCK));
        open(anr_traces_file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_NONBLOCK));
    if (fd < 0) {
            printf("*** NO ANR VM TRACES FILE (%s): %s\n\n", anr_traces_path.c_str(),
                   strerror(errno));
        printf("*** NO ANR VM TRACES FILE (%s): %s\n\n", anr_traces_file.c_str(), strerror(errno));
    } else {
        if (add_to_zip) {
            if (!already_dumped) {
                MYLOGD("Adding dalvik ANR traces (directory %s) to the zip file\n",
                       anr_traces_dir.c_str());
                ds.AddDir(anr_traces_dir, true);
                    already_dumped = true;
            }
        } else {
            MYLOGD("Dumping last ANR traces (%s) to the main bugreport entry\n",
                       anr_traces_path.c_str());
                dump_file_from_fd("VM TRACES AT LAST ANR", anr_traces_path.c_str(), fd);
                   anr_traces_file.c_str());
            dump_file_from_fd("VM TRACES AT LAST ANR", anr_traces_file.c_str(), fd);
        }
    }
}

static void AddAnrTraceDir(const bool add_to_zip, const std::string& anr_traces_dir) {
    MYLOGD("AddAnrTraceDir(): dump_traces_file=%s, anr_traces_dir=%s\n", dump_traces_path,
           anr_traces_dir.c_str());

    // If we're here, dump_traces_path will always be a temporary file
    // (created with mkostemp or similar) that contains dumps taken earlier
    // on in the process.
    if (dump_traces_path != nullptr) {
        if (add_to_zip) {
            ds.AddZipEntry(ZIP_ROOT_DIR + anr_traces_dir + "/traces-just-now.txt", dump_traces_path);
        } else {
            MYLOGD("Dumping current ANR traces (%s) to the main bugreport entry\n",
                   dump_traces_path);
            ds.DumpFile("VM TRACES JUST NOW", dump_traces_path);
        }

        const int ret = unlink(dump_traces_path);
        if (ret == -1) {
            MYLOGW("Error unlinking temporary trace path %s: %s\n", dump_traces_path,
                   strerror(errno));
        }
    }

    const bool anr_traces_dumped = AddDumps(*anr_data, "ANR", add_to_zip);
    if (!anr_traces_dumped) {
        printf("*** NO ANRs to dump in %s\n\n", ANR_DIR.c_str());
    }
}

static void AddAnrTraceFiles() {
    const bool add_to_zip = ds.IsZipping() && ds.version_ == VERSION_SPLIT_ANR;

    std::string anr_traces_file;
    std::string anr_traces_dir;
    bool is_global_trace_file = true;

    // First check whether the stack-trace-dir property is set. When it's set,
    // each ANR trace will be written to a separate file and not to a global
    // stack trace file.
    anr_traces_dir = android::base::GetProperty("dalvik.vm.stack-trace-dir", "");
    if (anr_traces_dir.empty()) {
        anr_traces_file = android::base::GetProperty("dalvik.vm.stack-trace-file", "");
        if (!anr_traces_file.empty()) {
            is_global_trace_file = true;
            anr_traces_dir = dirname(anr_traces_file.c_str());
        }
    }

    if (add_to_zip && already_dumped) {
        MYLOGD("Already dumped directory %s to the zip file\n", anr_traces_dir.c_str());
    // We have neither configured a global trace file nor a trace directory,
    // there will be nothing to dump.
    if (anr_traces_file.empty() && anr_traces_dir.empty()) {
        printf("*** NO VM TRACES FILE DEFINED (dalvik.vm.stack-trace-file)\n\n");
        return;
    }

    if (is_global_trace_file) {
        AddGlobalAnrTraceFile(add_to_zip, anr_traces_file, anr_traces_dir);
    } else {
        AddAnrTraceDir(add_to_zip, anr_traces_dir);
    }

    /* slow traces for slow operations */
    struct stat st;
    if (!anr_traces_path.empty()) {
        int tail = anr_traces_path.size() - 1;
        while (tail > 0 && anr_traces_path.at(tail) != '/') {
            tail--;
        }
    if (!anr_traces_dir.empty()) {
        int i = 0;
        while (1) {
            anr_traces_path = anr_traces_path.substr(0, tail + 1) +
                              android::base::StringPrintf("slow%02d.txt", i);
            if (stat(anr_traces_path.c_str(), &st)) {
        while (true) {
            const std::string slow_trace_path =
                anr_traces_dir + android::base::StringPrintf("slow%02d.txt", i);
            if (stat(slow_trace_path.c_str(), &st)) {
                // No traces file at this index, done with the files.
                break;
            }
            ds.DumpFile("VM TRACES WHEN SLOW", anr_traces_path.c_str());
            ds.DumpFile("VM TRACES WHEN SLOW", slow_trace_path.c_str());
            i++;
        }
    }
@@ -1010,25 +1115,11 @@ static void dumpstate() {

    AddAnrTraceFiles();

    int dumped = 0;
    for (size_t i = 0; i < NUM_TOMBSTONES; i++) {
        if (tombstone_data[i].fd != -1) {
            const char *name = tombstone_data[i].name;
            int fd = tombstone_data[i].fd;
            dumped = 1;
            if (ds.IsZipping()) {
                if (!ds.AddZipEntryFromFd(ZIP_ROOT_DIR + name, fd)) {
                    MYLOGE("Unable to add tombstone %s to zip file\n", name);
                }
            } else {
                dump_file_from_fd("TOMBSTONE", name, fd);
            }
            close(fd);
            tombstone_data[i].fd = -1;
        }
    }
    if (!dumped) {
        printf("*** NO TOMBSTONES to dump in %s\n\n", TOMBSTONE_DIR);
    // NOTE: tombstones are always added as separate entries in the zip archive
    // and are not interspersed with the main report.
    const bool tombstones_dumped = AddDumps(*tombstone_data, "TOMBSTONE", true /* add_to_zip */);
    if (!tombstones_dumped) {
        printf("*** NO TOMBSTONES to dump in %s\n\n", TOMBSTONE_DIR.c_str());
    }

    DumpFile("NETWORK DEV INFO", "/proc/net/dev");
@@ -1707,7 +1798,9 @@ int main(int argc, char *argv[]) {
        dump_traces_path = dump_traces();

        /* Run some operations that require root. */
        get_tombstone_fds(tombstone_data);
        tombstone_data.reset(GetDumpFds(TOMBSTONE_FILE_PREFIX, !ds.IsZipping()));
        anr_data.reset(GetDumpFds(ANR_FILE_PREFIX, !ds.IsZipping()));

        ds.AddDir(RECOVERY_DIR, true);
        ds.AddDir(RECOVERY_DATA_DIR, true);
        ds.AddDir(LOGPERSIST_DATA_DIR, false);
+133 −3
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include <time.h>
#include <unistd.h>

#include <memory>
#include <set>
#include <string>
#include <vector>
@@ -46,6 +47,7 @@
#include <android-base/properties.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <android/hidl/manager/1.0/IServiceManager.h>
#include <cutils/properties.h>
#include <cutils/sockets.h>
@@ -852,15 +854,143 @@ std::set<int> get_interesting_hal_pids() {
    return pids; // whether it was okay or not
}

const char* DumpTraces(const std::string& traces_path);
const char* DumpTracesTombstoned(const std::string& traces_dir);

/* dump Dalvik and native stack traces, return the trace file location (NULL if none) */
const char *dump_traces() {
    DurationReporter duration_reporter("DUMP TRACES");

    const char* result = nullptr;
    const std::string traces_dir = android::base::GetProperty("dalvik.vm.stack-trace-dir", "");
    if (!traces_dir.empty()) {
        return DumpTracesTombstoned(traces_dir);
    }

    const std::string traces_file = android::base::GetProperty("dalvik.vm.stack-trace-file", "");
    if (!traces_file.empty()) {
        return DumpTraces(traces_file);
    }

    return nullptr;
}

static bool IsZygote(int pid) {
    static const std::string kZygotePrefix = "zygote";

    std::string traces_path = android::base::GetProperty("dalvik.vm.stack-trace-file", "");
    if (traces_path.empty()) return nullptr;
    std::string cmdline;
    if (!android::base::ReadFileToString(android::base::StringPrintf("/proc/%d/cmdline", pid),
                                         &cmdline)) {
        return true;
    }

    return (cmdline.find(kZygotePrefix) == 0);
}

const char* DumpTracesTombstoned(const std::string& traces_dir) {
    const std::string temp_file_pattern = traces_dir + "/dumptrace_XXXXXX";

    const size_t buf_size = temp_file_pattern.length() + 1;
    std::unique_ptr<char[]> file_name_buf(new char[buf_size]);
    memcpy(file_name_buf.get(), temp_file_pattern.c_str(), buf_size);

    // Create a new, empty file to receive all trace dumps.
    //
    // TODO: This can be simplified once we remove support for the old style
    // dumps. We can have a file descriptor passed in to dump_traces instead
    // of creating a file, closing it and then reopening it again.
    android::base::unique_fd fd(mkostemp(file_name_buf.get(), O_APPEND | O_CLOEXEC));
    if (fd < 0) {
        MYLOGE("mkostemp on pattern %s: %s\n", file_name_buf.get(), strerror(errno));
        return nullptr;
    }

    // Nobody should have access to this temporary file except dumpstate, but we
    // temporarily grant 'read' to 'others' here because this file is created
    // when tombstoned is still running as root, but dumped after dropping. This
    // can go away once support for old style dumping has.
    const int chmod_ret = fchmod(fd, 0666);
    if (chmod_ret < 0) {
        MYLOGE("fchmod on %s failed: %s\n", file_name_buf.get(), strerror(errno));
        return nullptr;
    }

    std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
    if (proc.get() == nullptr) {
        MYLOGE("opendir /proc failed: %s\n", strerror(errno));
        return nullptr;
    }

    // Number of times process dumping has timed out. If we encounter too many
    // failures, we'll give up.
    int timeout_failures = 0;
    bool dalvik_found = false;

    const std::set<int> hal_pids = get_interesting_hal_pids();

    struct dirent* d;
    while ((d = readdir(proc.get()))) {
        int pid = atoi(d->d_name);
        if (pid <= 0) {
            continue;
        }

        const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
        std::string exe;
        if (!android::base::Readlink(link_name, &exe)) {
            continue;
        }

        bool is_java_process;
        if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
            // Don't bother dumping backtraces for the zygote.
            if (IsZygote(pid)) {
                continue;
            }

            dalvik_found = true;
            is_java_process = true;
        } else if (should_dump_native_traces(exe.c_str()) || hal_pids.find(pid) != hal_pids.end()) {
            is_java_process = false;
        } else {
            // Probably a native process we don't care about, continue.
            continue;
        }

        // If 3 backtrace dumps fail in a row, consider debuggerd dead.
        if (timeout_failures == 3) {
            dprintf(fd, "ERROR: Too many stack dump failures, exiting.\n");
            break;
        }

        const uint64_t start = Nanotime();
        const int ret = dump_backtrace_to_file_timeout(
            pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace,
            is_java_process ? 5 : 20, fd);

        if (ret == -1) {
            dprintf(fd, "dumping failed, likely due to a timeout\n");
            timeout_failures++;
            continue;
        }

        // We've successfully dumped stack traces, reset the failure count
        // and write a summary of the elapsed time to the file and continue with the
        // next process.
        timeout_failures = 0;

        dprintf(fd, "[dump %s stack %d: %.3fs elapsed]\n", is_java_process ? "dalvik" : "native",
                pid, (float)(Nanotime() - start) / NANOS_PER_SEC);
    }

    if (!dalvik_found) {
        MYLOGE("Warning: no Dalvik processes found to dump stacks\n");
    }

    return file_name_buf.release();
}

const char* DumpTraces(const std::string& traces_path) {
    const char* result = NULL;
    /* move the old traces.txt (if any) out of the way temporarily */
    std::string anrtraces_path = traces_path + ".anr";
    if (rename(traces_path.c_str(), anrtraces_path.c_str()) && errno != ENOENT) {