Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 307b6509 authored by Christopher Ferris's avatar Christopher Ferris Committed by Automerger Merge Worker
Browse files

Merge "Create minimal remap table for symbol binary search." am: 126b010f

Change-Id: I6fb6ed4f5a958c88ebf9e3fe754f3f33d1f8e900
parents 32fb7917 126b010f
Loading
Loading
Loading
Loading
+94 −53
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@

#include <algorithm>
#include <string>
#include <vector>

#include <unwindstack/Memory.h>

@@ -29,23 +30,55 @@ namespace unwindstack {

Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset,
                 uint64_t str_size)
    : cur_offset_(offset),
      offset_(offset),
      end_(offset + size),
    : offset_(offset),
      count_(entry_size != 0 ? size / entry_size : 0),
      entry_size_(entry_size),
      str_offset_(str_offset),
      str_end_(str_offset_ + str_size) {}

const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) {
  // Binary search the table.
template <typename SymType>
static bool IsFunc(const SymType* entry) {
  return entry->st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry->st_info) == STT_FUNC;
}

// Read symbol entry from memory and cache it so we don't have to read it again.
template <typename SymType>
inline __attribute__((__always_inline__)) const Symbols::Info* Symbols::ReadFuncInfo(
    uint32_t symbol_index, Memory* elf_memory) {
  auto it = symbols_.find(symbol_index);
  if (it != symbols_.end()) {
    return &it->second;
  }
  SymType sym;
  if (!elf_memory->ReadFully(offset_ + symbol_index * entry_size_, &sym, sizeof(sym))) {
    return nullptr;
  }
  if (!IsFunc(&sym)) {
    // We need the address for binary search, but we don't want it to be matched.
    sym.st_size = 0;
  }
  Info info{.addr = sym.st_value, .size = static_cast<uint32_t>(sym.st_size), .name = sym.st_name};
  return &symbols_.emplace(symbol_index, info).first->second;
}

// Binary search the symbol table to find function containing the given address.
// Without remap, the symbol table is assumed to be sorted and accessed directly.
// If the symbol table is not sorted this method might fail but should not crash.
// When the indices are remapped, they are guaranteed to be sorted by address.
template <typename SymType, bool RemapIndices>
const Symbols::Info* Symbols::BinarySearch(uint64_t addr, Memory* elf_memory) {
  size_t first = 0;
  size_t last = symbols_.size();
  size_t last = RemapIndices ? remap_->size() : count_;
  while (first < last) {
    size_t current = first + (last - first) / 2;
    const Info* info = &symbols_[current];
    if (addr < info->start_offset) {
    size_t symbol_index = RemapIndices ? remap_.value()[current] : current;
    const Info* info = ReadFuncInfo<SymType>(symbol_index, elf_memory);
    if (info == nullptr) {
      return nullptr;
    }
    if (addr < info->addr) {
      last = current;
    } else if (addr < info->end_offset) {
    } else if (addr < info->addr + info->size) {
      return info;
    } else {
      first = current + 1;
@@ -54,64 +87,72 @@ const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) {
  return nullptr;
}

// Create remapping table which allows us to access symbols as if they were sorted by address.
template <typename SymType>
bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) {
  if (symbols_.size() != 0) {
    const Info* info = GetInfoFromCache(addr);
    if (info) {
      CHECK(addr >= info->start_offset && addr <= info->end_offset);
      *func_offset = addr - info->start_offset;
      return elf_memory->ReadString(info->str_offset, name, str_end_ - info->str_offset);
    }
void Symbols::BuildRemapTable(Memory* elf_memory) {
  std::vector<uint64_t> addrs;  // Addresses of all symbols (addrs[i] == symbols[i].st_value).
  addrs.reserve(count_);
  remap_.emplace();  // Construct the optional remap table.
  remap_->reserve(count_);
  for (size_t symbol_idx = 0; symbol_idx < count_;) {
    // Read symbols from memory.  We intentionally bypass the cache to save memory.
    // Do the reads in batches so that we minimize the number of memory read calls.
    uint8_t buffer[1024];
    size_t read = std::min<size_t>(sizeof(buffer), (count_ - symbol_idx) * entry_size_);
    size_t size = elf_memory->Read(offset_ + symbol_idx * entry_size_, buffer, read);
    if (size < sizeof(SymType)) {
      break;  // Stop processing, something looks like it is corrupted.
    }
    for (size_t offset = 0; offset + sizeof(SymType) <= size; offset += entry_size_, symbol_idx++) {
      SymType sym;
      memcpy(&sym, &buffer[offset], sizeof(SymType));  // Copy to ensure alignment.
      addrs.push_back(sym.st_value);  // Always insert so it is indexable by symbol index.
      if (IsFunc(&sym)) {
        remap_->push_back(symbol_idx);  // Indices of function symbols only.
      }
    }
  }
  // Sort by address to make the remap list binary searchable (stable due to the a<b tie break).
  auto comp = [&addrs](auto a, auto b) { return std::tie(addrs[a], a) < std::tie(addrs[b], b); };
  std::sort(remap_->begin(), remap_->end(), comp);
  // Remove duplicate entries (methods de-duplicated by the linker).
  auto pred = [&addrs](auto a, auto b) { return addrs[a] == addrs[b]; };
  remap_->erase(std::unique(remap_->begin(), remap_->end(), pred), remap_->end());
  remap_->shrink_to_fit();
}

  bool symbol_added = false;
  bool return_value = false;
  while (cur_offset_ + entry_size_ <= end_) {
    SymType entry;
    if (!elf_memory->ReadFully(cur_offset_, &entry, sizeof(entry))) {
      // Stop all processing, something looks like it is corrupted.
      cur_offset_ = UINT64_MAX;
      return false;
    }
    cur_offset_ += entry_size_;

    if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_FUNC) {
      // Treat st_value as virtual address.
      uint64_t start_offset = entry.st_value;
      uint64_t end_offset = start_offset + entry.st_size;

      // Cache the value.
      symbols_.emplace_back(start_offset, end_offset, str_offset_ + entry.st_name);
      symbol_added = true;

      if (addr >= start_offset && addr < end_offset) {
        *func_offset = addr - start_offset;
        uint64_t offset = str_offset_ + entry.st_name;
        if (offset < str_end_) {
          return_value = elf_memory->ReadString(offset, name, str_end_ - offset);
        }
        break;
      }
template <typename SymType>
bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) {
  const Info* info;
  if (!remap_.has_value()) {
    // Assume the symbol table is sorted. If it is not, this will gracefully fail.
    info = BinarySearch<SymType, false>(addr, elf_memory);
    if (info == nullptr) {
      // Create the remapping table and retry the search.
      BuildRemapTable<SymType>(elf_memory);
      symbols_.clear();  // Remove cached symbols since the access pattern will be different.
      info = BinarySearch<SymType, true>(addr, elf_memory);
    }
  } else {
    // Fast search using the previously created remap table.
    info = BinarySearch<SymType, true>(addr, elf_memory);
  }

  if (symbol_added) {
    std::sort(symbols_.begin(), symbols_.end(),
              [](const Info& a, const Info& b) { return a.start_offset < b.start_offset; });
  if (info == nullptr) {
    return false;
  }
  return return_value;
  // Read the function name from the string table.
  *func_offset = addr - info->addr;
  uint64_t str = str_offset_ + info->name;
  return str < str_end_ && elf_memory->ReadString(str, name, str_end_ - str);
}

template <typename SymType>
bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) {
  uint64_t cur_offset = offset_;
  while (cur_offset + entry_size_ <= end_) {
  for (uint32_t i = 0; i < count_; i++) {
    SymType entry;
    if (!elf_memory->ReadFully(cur_offset, &entry, sizeof(entry))) {
    if (!elf_memory->ReadFully(offset_ + i * entry_size_, &entry, sizeof(entry))) {
      return false;
    }
    cur_offset += entry_size_;

    if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT &&
        ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) {
+23 −17
Original line number Diff line number Diff line
@@ -19,8 +19,9 @@

#include <stdint.h>

#include <optional>
#include <string>
#include <vector>
#include <unordered_map>

namespace unwindstack {

@@ -29,11 +30,9 @@ class Memory;

class Symbols {
  struct Info {
    Info(uint64_t start_offset, uint64_t end_offset, uint64_t str_offset)
        : start_offset(start_offset), end_offset(end_offset), str_offset(str_offset) {}
    uint64_t start_offset;
    uint64_t end_offset;
    uint64_t str_offset;
    uint64_t addr;  // Symbol address.
    uint32_t size;  // Symbol size in bytes. Zero if not a function.
    uint32_t name;  // Offset in .strtab.
  };

 public:
@@ -41,8 +40,6 @@ class Symbols {
          uint64_t str_size);
  virtual ~Symbols() = default;

  const Info* GetInfoFromCache(uint64_t addr);

  template <typename SymType>
  bool GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset);

@@ -51,18 +48,27 @@ class Symbols {

  void ClearCache() {
    symbols_.clear();
    cur_offset_ = offset_;
    remap_.reset();
  }

 private:
  uint64_t cur_offset_;
  uint64_t offset_;
  uint64_t end_;
  uint64_t entry_size_;
  uint64_t str_offset_;
  uint64_t str_end_;

  std::vector<Info> symbols_;
  template <typename SymType>
  const Info* ReadFuncInfo(uint32_t symbol_index, Memory* elf_memory);

  template <typename SymType, bool RemapIndices>
  const Info* BinarySearch(uint64_t addr, Memory* elf_memory);

  template <typename SymType>
  void BuildRemapTable(Memory* elf_memory);

  const uint64_t offset_;
  const uint64_t count_;
  const uint64_t entry_size_;
  const uint64_t str_offset_;
  const uint64_t str_end_;

  std::unordered_map<uint32_t, Info> symbols_;  // Cache of read symbols (keyed by symbol index).
  std::optional<std::vector<uint32_t>> remap_;  // Indices of function symbols sorted by address.
};

}  // namespace unwindstack
+6 −1
Original line number Diff line number Diff line
@@ -185,18 +185,21 @@ TYPED_TEST_P(SymbolsTest, multiple_entries_nonstandard_size) {
  std::string fake_name;

  this->InitSym(&sym, 0x5000, 0x10, 0x40);
  this->memory_.SetMemoryBlock(offset, entry_size, 0);
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  fake_name = "function_one";
  this->memory_.SetMemory(0x2040, fake_name.c_str(), fake_name.size() + 1);
  offset += entry_size;

  this->InitSym(&sym, 0x3004, 0x200, 0x100);
  this->memory_.SetMemoryBlock(offset, entry_size, 0);
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  fake_name = "function_two";
  this->memory_.SetMemory(0x2100, fake_name.c_str(), fake_name.size() + 1);
  offset += entry_size;

  this->InitSym(&sym, 0xa010, 0x20, 0x230);
  this->memory_.SetMemoryBlock(offset, entry_size, 0);
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  fake_name = "function_three";
  this->memory_.SetMemory(0x2230, fake_name.c_str(), fake_name.size() + 1);
@@ -274,7 +277,9 @@ TYPED_TEST_P(SymbolsTest, symtab_read_cached) {
  // Do call that should cache all of the entries (except the string data).
  std::string name;
  uint64_t func_offset;
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x5000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x2000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x1000, &this->memory_, &name, &func_offset));
  this->memory_.Clear();
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));