Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 126b010f authored by Christopher Ferris's avatar Christopher Ferris Committed by Gerrit Code Review
Browse files

Merge "Create minimal remap table for symbol binary search."

parents cec90ea2 af41960a
Loading
Loading
Loading
Loading
+94 −53
Original line number Original line Diff line number Diff line
@@ -19,6 +19,7 @@


#include <algorithm>
#include <algorithm>
#include <string>
#include <string>
#include <vector>


#include <unwindstack/Memory.h>
#include <unwindstack/Memory.h>


@@ -29,23 +30,55 @@ namespace unwindstack {


Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset,
Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset,
                 uint64_t str_size)
                 uint64_t str_size)
    : cur_offset_(offset),
    : offset_(offset),
      offset_(offset),
      count_(entry_size != 0 ? size / entry_size : 0),
      end_(offset + size),
      entry_size_(entry_size),
      entry_size_(entry_size),
      str_offset_(str_offset),
      str_offset_(str_offset),
      str_end_(str_offset_ + str_size) {}
      str_end_(str_offset_ + str_size) {}


const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) {
template <typename SymType>
  // Binary search the table.
static bool IsFunc(const SymType* entry) {
  return entry->st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry->st_info) == STT_FUNC;
}

// Read symbol entry from memory and cache it so we don't have to read it again.
template <typename SymType>
inline __attribute__((__always_inline__)) const Symbols::Info* Symbols::ReadFuncInfo(
    uint32_t symbol_index, Memory* elf_memory) {
  auto it = symbols_.find(symbol_index);
  if (it != symbols_.end()) {
    return &it->second;
  }
  SymType sym;
  if (!elf_memory->ReadFully(offset_ + symbol_index * entry_size_, &sym, sizeof(sym))) {
    return nullptr;
  }
  if (!IsFunc(&sym)) {
    // We need the address for binary search, but we don't want it to be matched.
    sym.st_size = 0;
  }
  Info info{.addr = sym.st_value, .size = static_cast<uint32_t>(sym.st_size), .name = sym.st_name};
  return &symbols_.emplace(symbol_index, info).first->second;
}

// Binary search the symbol table to find function containing the given address.
// Without remap, the symbol table is assumed to be sorted and accessed directly.
// If the symbol table is not sorted this method might fail but should not crash.
// When the indices are remapped, they are guaranteed to be sorted by address.
template <typename SymType, bool RemapIndices>
const Symbols::Info* Symbols::BinarySearch(uint64_t addr, Memory* elf_memory) {
  size_t first = 0;
  size_t first = 0;
  size_t last = symbols_.size();
  size_t last = RemapIndices ? remap_->size() : count_;
  while (first < last) {
  while (first < last) {
    size_t current = first + (last - first) / 2;
    size_t current = first + (last - first) / 2;
    const Info* info = &symbols_[current];
    size_t symbol_index = RemapIndices ? remap_.value()[current] : current;
    if (addr < info->start_offset) {
    const Info* info = ReadFuncInfo<SymType>(symbol_index, elf_memory);
    if (info == nullptr) {
      return nullptr;
    }
    if (addr < info->addr) {
      last = current;
      last = current;
    } else if (addr < info->end_offset) {
    } else if (addr < info->addr + info->size) {
      return info;
      return info;
    } else {
    } else {
      first = current + 1;
      first = current + 1;
@@ -54,64 +87,72 @@ const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) {
  return nullptr;
  return nullptr;
}
}


// Create remapping table which allows us to access symbols as if they were sorted by address.
template <typename SymType>
template <typename SymType>
bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) {
void Symbols::BuildRemapTable(Memory* elf_memory) {
  if (symbols_.size() != 0) {
  std::vector<uint64_t> addrs;  // Addresses of all symbols (addrs[i] == symbols[i].st_value).
    const Info* info = GetInfoFromCache(addr);
  addrs.reserve(count_);
    if (info) {
  remap_.emplace();  // Construct the optional remap table.
      CHECK(addr >= info->start_offset && addr <= info->end_offset);
  remap_->reserve(count_);
      *func_offset = addr - info->start_offset;
  for (size_t symbol_idx = 0; symbol_idx < count_;) {
      return elf_memory->ReadString(info->str_offset, name, str_end_ - info->str_offset);
    // Read symbols from memory.  We intentionally bypass the cache to save memory.
    }
    // Do the reads in batches so that we minimize the number of memory read calls.
    uint8_t buffer[1024];
    size_t read = std::min<size_t>(sizeof(buffer), (count_ - symbol_idx) * entry_size_);
    size_t size = elf_memory->Read(offset_ + symbol_idx * entry_size_, buffer, read);
    if (size < sizeof(SymType)) {
      break;  // Stop processing, something looks like it is corrupted.
    }
    for (size_t offset = 0; offset + sizeof(SymType) <= size; offset += entry_size_, symbol_idx++) {
      SymType sym;
      memcpy(&sym, &buffer[offset], sizeof(SymType));  // Copy to ensure alignment.
      addrs.push_back(sym.st_value);  // Always insert so it is indexable by symbol index.
      if (IsFunc(&sym)) {
        remap_->push_back(symbol_idx);  // Indices of function symbols only.
      }
    }
  }
  // Sort by address to make the remap list binary searchable (stable due to the a<b tie break).
  auto comp = [&addrs](auto a, auto b) { return std::tie(addrs[a], a) < std::tie(addrs[b], b); };
  std::sort(remap_->begin(), remap_->end(), comp);
  // Remove duplicate entries (methods de-duplicated by the linker).
  auto pred = [&addrs](auto a, auto b) { return addrs[a] == addrs[b]; };
  remap_->erase(std::unique(remap_->begin(), remap_->end(), pred), remap_->end());
  remap_->shrink_to_fit();
}
}


  bool symbol_added = false;
template <typename SymType>
  bool return_value = false;
bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) {
  while (cur_offset_ + entry_size_ <= end_) {
  const Info* info;
    SymType entry;
  if (!remap_.has_value()) {
    if (!elf_memory->ReadFully(cur_offset_, &entry, sizeof(entry))) {
    // Assume the symbol table is sorted. If it is not, this will gracefully fail.
      // Stop all processing, something looks like it is corrupted.
    info = BinarySearch<SymType, false>(addr, elf_memory);
      cur_offset_ = UINT64_MAX;
    if (info == nullptr) {
      return false;
      // Create the remapping table and retry the search.
    }
      BuildRemapTable<SymType>(elf_memory);
    cur_offset_ += entry_size_;
      symbols_.clear();  // Remove cached symbols since the access pattern will be different.

      info = BinarySearch<SymType, true>(addr, elf_memory);
    if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_FUNC) {
      // Treat st_value as virtual address.
      uint64_t start_offset = entry.st_value;
      uint64_t end_offset = start_offset + entry.st_size;

      // Cache the value.
      symbols_.emplace_back(start_offset, end_offset, str_offset_ + entry.st_name);
      symbol_added = true;

      if (addr >= start_offset && addr < end_offset) {
        *func_offset = addr - start_offset;
        uint64_t offset = str_offset_ + entry.st_name;
        if (offset < str_end_) {
          return_value = elf_memory->ReadString(offset, name, str_end_ - offset);
        }
        break;
      }
    }
    }
  } else {
    // Fast search using the previously created remap table.
    info = BinarySearch<SymType, true>(addr, elf_memory);
  }
  }

  if (info == nullptr) {
  if (symbol_added) {
    return false;
    std::sort(symbols_.begin(), symbols_.end(),
              [](const Info& a, const Info& b) { return a.start_offset < b.start_offset; });
  }
  }
  return return_value;
  // Read the function name from the string table.
  *func_offset = addr - info->addr;
  uint64_t str = str_offset_ + info->name;
  return str < str_end_ && elf_memory->ReadString(str, name, str_end_ - str);
}
}


template <typename SymType>
template <typename SymType>
bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) {
bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) {
  uint64_t cur_offset = offset_;
  for (uint32_t i = 0; i < count_; i++) {
  while (cur_offset + entry_size_ <= end_) {
    SymType entry;
    SymType entry;
    if (!elf_memory->ReadFully(cur_offset, &entry, sizeof(entry))) {
    if (!elf_memory->ReadFully(offset_ + i * entry_size_, &entry, sizeof(entry))) {
      return false;
      return false;
    }
    }
    cur_offset += entry_size_;


    if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT &&
    if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT &&
        ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) {
        ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) {
+23 −17
Original line number Original line Diff line number Diff line
@@ -19,8 +19,9 @@


#include <stdint.h>
#include <stdint.h>


#include <optional>
#include <string>
#include <string>
#include <vector>
#include <unordered_map>


namespace unwindstack {
namespace unwindstack {


@@ -29,11 +30,9 @@ class Memory;


class Symbols {
class Symbols {
  struct Info {
  struct Info {
    Info(uint64_t start_offset, uint64_t end_offset, uint64_t str_offset)
    uint64_t addr;  // Symbol address.
        : start_offset(start_offset), end_offset(end_offset), str_offset(str_offset) {}
    uint32_t size;  // Symbol size in bytes. Zero if not a function.
    uint64_t start_offset;
    uint32_t name;  // Offset in .strtab.
    uint64_t end_offset;
    uint64_t str_offset;
  };
  };


 public:
 public:
@@ -41,8 +40,6 @@ class Symbols {
          uint64_t str_size);
          uint64_t str_size);
  virtual ~Symbols() = default;
  virtual ~Symbols() = default;


  const Info* GetInfoFromCache(uint64_t addr);

  template <typename SymType>
  template <typename SymType>
  bool GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset);
  bool GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset);


@@ -51,18 +48,27 @@ class Symbols {


  void ClearCache() {
  void ClearCache() {
    symbols_.clear();
    symbols_.clear();
    cur_offset_ = offset_;
    remap_.reset();
  }
  }


 private:
 private:
  uint64_t cur_offset_;
  template <typename SymType>
  uint64_t offset_;
  const Info* ReadFuncInfo(uint32_t symbol_index, Memory* elf_memory);
  uint64_t end_;

  uint64_t entry_size_;
  template <typename SymType, bool RemapIndices>
  uint64_t str_offset_;
  const Info* BinarySearch(uint64_t addr, Memory* elf_memory);
  uint64_t str_end_;


  template <typename SymType>
  std::vector<Info> symbols_;
  void BuildRemapTable(Memory* elf_memory);

  const uint64_t offset_;
  const uint64_t count_;
  const uint64_t entry_size_;
  const uint64_t str_offset_;
  const uint64_t str_end_;

  std::unordered_map<uint32_t, Info> symbols_;  // Cache of read symbols (keyed by symbol index).
  std::optional<std::vector<uint32_t>> remap_;  // Indices of function symbols sorted by address.
};
};


}  // namespace unwindstack
}  // namespace unwindstack
+6 −1
Original line number Original line Diff line number Diff line
@@ -185,18 +185,21 @@ TYPED_TEST_P(SymbolsTest, multiple_entries_nonstandard_size) {
  std::string fake_name;
  std::string fake_name;


  this->InitSym(&sym, 0x5000, 0x10, 0x40);
  this->InitSym(&sym, 0x5000, 0x10, 0x40);
  this->memory_.SetMemoryBlock(offset, entry_size, 0);
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  fake_name = "function_one";
  fake_name = "function_one";
  this->memory_.SetMemory(0x2040, fake_name.c_str(), fake_name.size() + 1);
  this->memory_.SetMemory(0x2040, fake_name.c_str(), fake_name.size() + 1);
  offset += entry_size;
  offset += entry_size;


  this->InitSym(&sym, 0x3004, 0x200, 0x100);
  this->InitSym(&sym, 0x3004, 0x200, 0x100);
  this->memory_.SetMemoryBlock(offset, entry_size, 0);
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  fake_name = "function_two";
  fake_name = "function_two";
  this->memory_.SetMemory(0x2100, fake_name.c_str(), fake_name.size() + 1);
  this->memory_.SetMemory(0x2100, fake_name.c_str(), fake_name.size() + 1);
  offset += entry_size;
  offset += entry_size;


  this->InitSym(&sym, 0xa010, 0x20, 0x230);
  this->InitSym(&sym, 0xa010, 0x20, 0x230);
  this->memory_.SetMemoryBlock(offset, entry_size, 0);
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  this->memory_.SetMemory(offset, &sym, sizeof(sym));
  fake_name = "function_three";
  fake_name = "function_three";
  this->memory_.SetMemory(0x2230, fake_name.c_str(), fake_name.size() + 1);
  this->memory_.SetMemory(0x2230, fake_name.c_str(), fake_name.size() + 1);
@@ -274,7 +277,9 @@ TYPED_TEST_P(SymbolsTest, symtab_read_cached) {
  // Do call that should cache all of the entries (except the string data).
  // Do call that should cache all of the entries (except the string data).
  std::string name;
  std::string name;
  uint64_t func_offset;
  uint64_t func_offset;
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x5000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x2000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x1000, &this->memory_, &name, &func_offset));
  this->memory_.Clear();
  this->memory_.Clear();
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));
  ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));