Create minimal remap table for symbol binary search.
ELF symbols are not sorted by address. Create remap table which reshuffles the indices into sorted-by-address order. This saves over 6x of memory (the remap table needs just uint32_t per entry, as opposed the FuncInfo cache entry). ART symbols are sorted. Make use of that fact. Bug: 110133331 Test: libunwindstack_test Test: art/test.py -b --host -r -t 137-cfi Change-Id: I1812d2dd3ad6a69ae93ed50ca387749c649289b9
This commit is contained in:
parent
451197cb64
commit
af41960acb
3 changed files with 122 additions and 70 deletions
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <unwindstack/Memory.h>
|
||||
|
||||
|
@ -29,23 +30,55 @@ namespace unwindstack {
|
|||
|
||||
Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset,
|
||||
uint64_t str_size)
|
||||
: cur_offset_(offset),
|
||||
offset_(offset),
|
||||
end_(offset + size),
|
||||
: offset_(offset),
|
||||
count_(entry_size != 0 ? size / entry_size : 0),
|
||||
entry_size_(entry_size),
|
||||
str_offset_(str_offset),
|
||||
str_end_(str_offset_ + str_size) {}
|
||||
|
||||
const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) {
|
||||
// Binary search the table.
|
||||
template <typename SymType>
|
||||
static bool IsFunc(const SymType* entry) {
|
||||
return entry->st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry->st_info) == STT_FUNC;
|
||||
}
|
||||
|
||||
// Read symbol entry from memory and cache it so we don't have to read it again.
|
||||
template <typename SymType>
|
||||
inline __attribute__((__always_inline__)) const Symbols::Info* Symbols::ReadFuncInfo(
|
||||
uint32_t symbol_index, Memory* elf_memory) {
|
||||
auto it = symbols_.find(symbol_index);
|
||||
if (it != symbols_.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
SymType sym;
|
||||
if (!elf_memory->ReadFully(offset_ + symbol_index * entry_size_, &sym, sizeof(sym))) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!IsFunc(&sym)) {
|
||||
// We need the address for binary search, but we don't want it to be matched.
|
||||
sym.st_size = 0;
|
||||
}
|
||||
Info info{.addr = sym.st_value, .size = static_cast<uint32_t>(sym.st_size), .name = sym.st_name};
|
||||
return &symbols_.emplace(symbol_index, info).first->second;
|
||||
}
|
||||
|
||||
// Binary search the symbol table to find function containing the given address.
|
||||
// Without remap, the symbol table is assumed to be sorted and accessed directly.
|
||||
// If the symbol table is not sorted this method might fail but should not crash.
|
||||
// When the indices are remapped, they are guaranteed to be sorted by address.
|
||||
template <typename SymType, bool RemapIndices>
|
||||
const Symbols::Info* Symbols::BinarySearch(uint64_t addr, Memory* elf_memory) {
|
||||
size_t first = 0;
|
||||
size_t last = symbols_.size();
|
||||
size_t last = RemapIndices ? remap_->size() : count_;
|
||||
while (first < last) {
|
||||
size_t current = first + (last - first) / 2;
|
||||
const Info* info = &symbols_[current];
|
||||
if (addr < info->start_offset) {
|
||||
size_t symbol_index = RemapIndices ? remap_.value()[current] : current;
|
||||
const Info* info = ReadFuncInfo<SymType>(symbol_index, elf_memory);
|
||||
if (info == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (addr < info->addr) {
|
||||
last = current;
|
||||
} else if (addr < info->end_offset) {
|
||||
} else if (addr < info->addr + info->size) {
|
||||
return info;
|
||||
} else {
|
||||
first = current + 1;
|
||||
|
@ -54,64 +87,72 @@ const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// Create remapping table which allows us to access symbols as if they were sorted by address.
|
||||
template <typename SymType>
|
||||
bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) {
|
||||
if (symbols_.size() != 0) {
|
||||
const Info* info = GetInfoFromCache(addr);
|
||||
if (info) {
|
||||
CHECK(addr >= info->start_offset && addr <= info->end_offset);
|
||||
*func_offset = addr - info->start_offset;
|
||||
return elf_memory->ReadString(info->str_offset, name, str_end_ - info->str_offset);
|
||||
void Symbols::BuildRemapTable(Memory* elf_memory) {
|
||||
std::vector<uint64_t> addrs; // Addresses of all symbols (addrs[i] == symbols[i].st_value).
|
||||
addrs.reserve(count_);
|
||||
remap_.emplace(); // Construct the optional remap table.
|
||||
remap_->reserve(count_);
|
||||
for (size_t symbol_idx = 0; symbol_idx < count_;) {
|
||||
// Read symbols from memory. We intentionally bypass the cache to save memory.
|
||||
// Do the reads in batches so that we minimize the number of memory read calls.
|
||||
uint8_t buffer[1024];
|
||||
size_t read = std::min<size_t>(sizeof(buffer), (count_ - symbol_idx) * entry_size_);
|
||||
size_t size = elf_memory->Read(offset_ + symbol_idx * entry_size_, buffer, read);
|
||||
if (size < sizeof(SymType)) {
|
||||
break; // Stop processing, something looks like it is corrupted.
|
||||
}
|
||||
}
|
||||
|
||||
bool symbol_added = false;
|
||||
bool return_value = false;
|
||||
while (cur_offset_ + entry_size_ <= end_) {
|
||||
SymType entry;
|
||||
if (!elf_memory->ReadFully(cur_offset_, &entry, sizeof(entry))) {
|
||||
// Stop all processing, something looks like it is corrupted.
|
||||
cur_offset_ = UINT64_MAX;
|
||||
return false;
|
||||
}
|
||||
cur_offset_ += entry_size_;
|
||||
|
||||
if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_FUNC) {
|
||||
// Treat st_value as virtual address.
|
||||
uint64_t start_offset = entry.st_value;
|
||||
uint64_t end_offset = start_offset + entry.st_size;
|
||||
|
||||
// Cache the value.
|
||||
symbols_.emplace_back(start_offset, end_offset, str_offset_ + entry.st_name);
|
||||
symbol_added = true;
|
||||
|
||||
if (addr >= start_offset && addr < end_offset) {
|
||||
*func_offset = addr - start_offset;
|
||||
uint64_t offset = str_offset_ + entry.st_name;
|
||||
if (offset < str_end_) {
|
||||
return_value = elf_memory->ReadString(offset, name, str_end_ - offset);
|
||||
}
|
||||
break;
|
||||
for (size_t offset = 0; offset + sizeof(SymType) <= size; offset += entry_size_, symbol_idx++) {
|
||||
SymType sym;
|
||||
memcpy(&sym, &buffer[offset], sizeof(SymType)); // Copy to ensure alignment.
|
||||
addrs.push_back(sym.st_value); // Always insert so it is indexable by symbol index.
|
||||
if (IsFunc(&sym)) {
|
||||
remap_->push_back(symbol_idx); // Indices of function symbols only.
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sort by address to make the remap list binary searchable (stable due to the a<b tie break).
|
||||
auto comp = [&addrs](auto a, auto b) { return std::tie(addrs[a], a) < std::tie(addrs[b], b); };
|
||||
std::sort(remap_->begin(), remap_->end(), comp);
|
||||
// Remove duplicate entries (methods de-duplicated by the linker).
|
||||
auto pred = [&addrs](auto a, auto b) { return addrs[a] == addrs[b]; };
|
||||
remap_->erase(std::unique(remap_->begin(), remap_->end(), pred), remap_->end());
|
||||
remap_->shrink_to_fit();
|
||||
}
|
||||
|
||||
if (symbol_added) {
|
||||
std::sort(symbols_.begin(), symbols_.end(),
|
||||
[](const Info& a, const Info& b) { return a.start_offset < b.start_offset; });
|
||||
template <typename SymType>
|
||||
bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) {
|
||||
const Info* info;
|
||||
if (!remap_.has_value()) {
|
||||
// Assume the symbol table is sorted. If it is not, this will gracefully fail.
|
||||
info = BinarySearch<SymType, false>(addr, elf_memory);
|
||||
if (info == nullptr) {
|
||||
// Create the remapping table and retry the search.
|
||||
BuildRemapTable<SymType>(elf_memory);
|
||||
symbols_.clear(); // Remove cached symbols since the access pattern will be different.
|
||||
info = BinarySearch<SymType, true>(addr, elf_memory);
|
||||
}
|
||||
} else {
|
||||
// Fast search using the previously created remap table.
|
||||
info = BinarySearch<SymType, true>(addr, elf_memory);
|
||||
}
|
||||
return return_value;
|
||||
if (info == nullptr) {
|
||||
return false;
|
||||
}
|
||||
// Read the function name from the string table.
|
||||
*func_offset = addr - info->addr;
|
||||
uint64_t str = str_offset_ + info->name;
|
||||
return str < str_end_ && elf_memory->ReadString(str, name, str_end_ - str);
|
||||
}
|
||||
|
||||
template <typename SymType>
|
||||
bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) {
|
||||
uint64_t cur_offset = offset_;
|
||||
while (cur_offset + entry_size_ <= end_) {
|
||||
for (uint32_t i = 0; i < count_; i++) {
|
||||
SymType entry;
|
||||
if (!elf_memory->ReadFully(cur_offset, &entry, sizeof(entry))) {
|
||||
if (!elf_memory->ReadFully(offset_ + i * entry_size_, &entry, sizeof(entry))) {
|
||||
return false;
|
||||
}
|
||||
cur_offset += entry_size_;
|
||||
|
||||
if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT &&
|
||||
ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) {
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace unwindstack {
|
||||
|
||||
|
@ -29,11 +30,9 @@ class Memory;
|
|||
|
||||
class Symbols {
|
||||
struct Info {
|
||||
Info(uint64_t start_offset, uint64_t end_offset, uint64_t str_offset)
|
||||
: start_offset(start_offset), end_offset(end_offset), str_offset(str_offset) {}
|
||||
uint64_t start_offset;
|
||||
uint64_t end_offset;
|
||||
uint64_t str_offset;
|
||||
uint64_t addr; // Symbol address.
|
||||
uint32_t size; // Symbol size in bytes. Zero if not a function.
|
||||
uint32_t name; // Offset in .strtab.
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -41,8 +40,6 @@ class Symbols {
|
|||
uint64_t str_size);
|
||||
virtual ~Symbols() = default;
|
||||
|
||||
const Info* GetInfoFromCache(uint64_t addr);
|
||||
|
||||
template <typename SymType>
|
||||
bool GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset);
|
||||
|
||||
|
@ -51,18 +48,27 @@ class Symbols {
|
|||
|
||||
void ClearCache() {
|
||||
symbols_.clear();
|
||||
cur_offset_ = offset_;
|
||||
remap_.reset();
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t cur_offset_;
|
||||
uint64_t offset_;
|
||||
uint64_t end_;
|
||||
uint64_t entry_size_;
|
||||
uint64_t str_offset_;
|
||||
uint64_t str_end_;
|
||||
template <typename SymType>
|
||||
const Info* ReadFuncInfo(uint32_t symbol_index, Memory* elf_memory);
|
||||
|
||||
std::vector<Info> symbols_;
|
||||
template <typename SymType, bool RemapIndices>
|
||||
const Info* BinarySearch(uint64_t addr, Memory* elf_memory);
|
||||
|
||||
template <typename SymType>
|
||||
void BuildRemapTable(Memory* elf_memory);
|
||||
|
||||
const uint64_t offset_;
|
||||
const uint64_t count_;
|
||||
const uint64_t entry_size_;
|
||||
const uint64_t str_offset_;
|
||||
const uint64_t str_end_;
|
||||
|
||||
std::unordered_map<uint32_t, Info> symbols_; // Cache of read symbols (keyed by symbol index).
|
||||
std::optional<std::vector<uint32_t>> remap_; // Indices of function symbols sorted by address.
|
||||
};
|
||||
|
||||
} // namespace unwindstack
|
||||
|
|
|
@ -185,18 +185,21 @@ TYPED_TEST_P(SymbolsTest, multiple_entries_nonstandard_size) {
|
|||
std::string fake_name;
|
||||
|
||||
this->InitSym(&sym, 0x5000, 0x10, 0x40);
|
||||
this->memory_.SetMemoryBlock(offset, entry_size, 0);
|
||||
this->memory_.SetMemory(offset, &sym, sizeof(sym));
|
||||
fake_name = "function_one";
|
||||
this->memory_.SetMemory(0x2040, fake_name.c_str(), fake_name.size() + 1);
|
||||
offset += entry_size;
|
||||
|
||||
this->InitSym(&sym, 0x3004, 0x200, 0x100);
|
||||
this->memory_.SetMemoryBlock(offset, entry_size, 0);
|
||||
this->memory_.SetMemory(offset, &sym, sizeof(sym));
|
||||
fake_name = "function_two";
|
||||
this->memory_.SetMemory(0x2100, fake_name.c_str(), fake_name.size() + 1);
|
||||
offset += entry_size;
|
||||
|
||||
this->InitSym(&sym, 0xa010, 0x20, 0x230);
|
||||
this->memory_.SetMemoryBlock(offset, entry_size, 0);
|
||||
this->memory_.SetMemory(offset, &sym, sizeof(sym));
|
||||
fake_name = "function_three";
|
||||
this->memory_.SetMemory(0x2230, fake_name.c_str(), fake_name.size() + 1);
|
||||
|
@ -274,7 +277,9 @@ TYPED_TEST_P(SymbolsTest, symtab_read_cached) {
|
|||
// Do call that should cache all of the entries (except the string data).
|
||||
std::string name;
|
||||
uint64_t func_offset;
|
||||
ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));
|
||||
ASSERT_FALSE(symbols.GetName<TypeParam>(0x5000, &this->memory_, &name, &func_offset));
|
||||
ASSERT_FALSE(symbols.GetName<TypeParam>(0x2000, &this->memory_, &name, &func_offset));
|
||||
ASSERT_FALSE(symbols.GetName<TypeParam>(0x1000, &this->memory_, &name, &func_offset));
|
||||
this->memory_.Clear();
|
||||
ASSERT_FALSE(symbols.GetName<TypeParam>(0x6000, &this->memory_, &name, &func_offset));
|
||||
|
||||
|
|
Loading…
Reference in a new issue