From af41960acb56efd5fb7d1fb1b18dae21b4d6b122 Mon Sep 17 00:00:00 2001 From: David Srbecky Date: Tue, 26 Mar 2019 14:38:28 +0000 Subject: [PATCH] Create minimal remap table for symbol binary search. ELF symbols are not sorted by address. Create remap table which reshuffles the indices into sorted-by-address order. This saves over 6x of memory (the remap table needs just uint32_t per entry, as opposed the FuncInfo cache entry). ART symbols are sorted. Make use of that fact. Bug: 110133331 Test: libunwindstack_test Test: art/test.py -b --host -r -t 137-cfi Change-Id: I1812d2dd3ad6a69ae93ed50ca387749c649289b9 --- libunwindstack/Symbols.cpp | 147 +++++++++++++++++---------- libunwindstack/Symbols.h | 38 ++++--- libunwindstack/tests/SymbolsTest.cpp | 7 +- 3 files changed, 122 insertions(+), 70 deletions(-) diff --git a/libunwindstack/Symbols.cpp b/libunwindstack/Symbols.cpp index e3c15a298..2117ebd3e 100644 --- a/libunwindstack/Symbols.cpp +++ b/libunwindstack/Symbols.cpp @@ -19,6 +19,7 @@ #include #include +#include #include @@ -29,23 +30,55 @@ namespace unwindstack { Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset, uint64_t str_size) - : cur_offset_(offset), - offset_(offset), - end_(offset + size), + : offset_(offset), + count_(entry_size != 0 ? size / entry_size : 0), entry_size_(entry_size), str_offset_(str_offset), str_end_(str_offset_ + str_size) {} -const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) { - // Binary search the table. +template +static bool IsFunc(const SymType* entry) { + return entry->st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry->st_info) == STT_FUNC; +} + +// Read symbol entry from memory and cache it so we don't have to read it again. +template +inline __attribute__((__always_inline__)) const Symbols::Info* Symbols::ReadFuncInfo( + uint32_t symbol_index, Memory* elf_memory) { + auto it = symbols_.find(symbol_index); + if (it != symbols_.end()) { + return &it->second; + } + SymType sym; + if (!elf_memory->ReadFully(offset_ + symbol_index * entry_size_, &sym, sizeof(sym))) { + return nullptr; + } + if (!IsFunc(&sym)) { + // We need the address for binary search, but we don't want it to be matched. + sym.st_size = 0; + } + Info info{.addr = sym.st_value, .size = static_cast(sym.st_size), .name = sym.st_name}; + return &symbols_.emplace(symbol_index, info).first->second; +} + +// Binary search the symbol table to find function containing the given address. +// Without remap, the symbol table is assumed to be sorted and accessed directly. +// If the symbol table is not sorted this method might fail but should not crash. +// When the indices are remapped, they are guaranteed to be sorted by address. +template +const Symbols::Info* Symbols::BinarySearch(uint64_t addr, Memory* elf_memory) { size_t first = 0; - size_t last = symbols_.size(); + size_t last = RemapIndices ? remap_->size() : count_; while (first < last) { size_t current = first + (last - first) / 2; - const Info* info = &symbols_[current]; - if (addr < info->start_offset) { + size_t symbol_index = RemapIndices ? remap_.value()[current] : current; + const Info* info = ReadFuncInfo(symbol_index, elf_memory); + if (info == nullptr) { + return nullptr; + } + if (addr < info->addr) { last = current; - } else if (addr < info->end_offset) { + } else if (addr < info->addr + info->size) { return info; } else { first = current + 1; @@ -54,64 +87,72 @@ const Symbols::Info* Symbols::GetInfoFromCache(uint64_t addr) { return nullptr; } +// Create remapping table which allows us to access symbols as if they were sorted by address. template -bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) { - if (symbols_.size() != 0) { - const Info* info = GetInfoFromCache(addr); - if (info) { - CHECK(addr >= info->start_offset && addr <= info->end_offset); - *func_offset = addr - info->start_offset; - return elf_memory->ReadString(info->str_offset, name, str_end_ - info->str_offset); +void Symbols::BuildRemapTable(Memory* elf_memory) { + std::vector addrs; // Addresses of all symbols (addrs[i] == symbols[i].st_value). + addrs.reserve(count_); + remap_.emplace(); // Construct the optional remap table. + remap_->reserve(count_); + for (size_t symbol_idx = 0; symbol_idx < count_;) { + // Read symbols from memory. We intentionally bypass the cache to save memory. + // Do the reads in batches so that we minimize the number of memory read calls. + uint8_t buffer[1024]; + size_t read = std::min(sizeof(buffer), (count_ - symbol_idx) * entry_size_); + size_t size = elf_memory->Read(offset_ + symbol_idx * entry_size_, buffer, read); + if (size < sizeof(SymType)) { + break; // Stop processing, something looks like it is corrupted. } - } - - bool symbol_added = false; - bool return_value = false; - while (cur_offset_ + entry_size_ <= end_) { - SymType entry; - if (!elf_memory->ReadFully(cur_offset_, &entry, sizeof(entry))) { - // Stop all processing, something looks like it is corrupted. - cur_offset_ = UINT64_MAX; - return false; - } - cur_offset_ += entry_size_; - - if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_FUNC) { - // Treat st_value as virtual address. - uint64_t start_offset = entry.st_value; - uint64_t end_offset = start_offset + entry.st_size; - - // Cache the value. - symbols_.emplace_back(start_offset, end_offset, str_offset_ + entry.st_name); - symbol_added = true; - - if (addr >= start_offset && addr < end_offset) { - *func_offset = addr - start_offset; - uint64_t offset = str_offset_ + entry.st_name; - if (offset < str_end_) { - return_value = elf_memory->ReadString(offset, name, str_end_ - offset); - } - break; + for (size_t offset = 0; offset + sizeof(SymType) <= size; offset += entry_size_, symbol_idx++) { + SymType sym; + memcpy(&sym, &buffer[offset], sizeof(SymType)); // Copy to ensure alignment. + addrs.push_back(sym.st_value); // Always insert so it is indexable by symbol index. + if (IsFunc(&sym)) { + remap_->push_back(symbol_idx); // Indices of function symbols only. } } } + // Sort by address to make the remap list binary searchable (stable due to the abegin(), remap_->end(), comp); + // Remove duplicate entries (methods de-duplicated by the linker). + auto pred = [&addrs](auto a, auto b) { return addrs[a] == addrs[b]; }; + remap_->erase(std::unique(remap_->begin(), remap_->end(), pred), remap_->end()); + remap_->shrink_to_fit(); +} - if (symbol_added) { - std::sort(symbols_.begin(), symbols_.end(), - [](const Info& a, const Info& b) { return a.start_offset < b.start_offset; }); +template +bool Symbols::GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset) { + const Info* info; + if (!remap_.has_value()) { + // Assume the symbol table is sorted. If it is not, this will gracefully fail. + info = BinarySearch(addr, elf_memory); + if (info == nullptr) { + // Create the remapping table and retry the search. + BuildRemapTable(elf_memory); + symbols_.clear(); // Remove cached symbols since the access pattern will be different. + info = BinarySearch(addr, elf_memory); + } + } else { + // Fast search using the previously created remap table. + info = BinarySearch(addr, elf_memory); } - return return_value; + if (info == nullptr) { + return false; + } + // Read the function name from the string table. + *func_offset = addr - info->addr; + uint64_t str = str_offset_ + info->name; + return str < str_end_ && elf_memory->ReadString(str, name, str_end_ - str); } template bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) { - uint64_t cur_offset = offset_; - while (cur_offset + entry_size_ <= end_) { + for (uint32_t i = 0; i < count_; i++) { SymType entry; - if (!elf_memory->ReadFully(cur_offset, &entry, sizeof(entry))) { + if (!elf_memory->ReadFully(offset_ + i * entry_size_, &entry, sizeof(entry))) { return false; } - cur_offset += entry_size_; if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT && ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) { diff --git a/libunwindstack/Symbols.h b/libunwindstack/Symbols.h index 7fcd067d3..3b3f20b1c 100644 --- a/libunwindstack/Symbols.h +++ b/libunwindstack/Symbols.h @@ -19,8 +19,9 @@ #include +#include #include -#include +#include namespace unwindstack { @@ -29,11 +30,9 @@ class Memory; class Symbols { struct Info { - Info(uint64_t start_offset, uint64_t end_offset, uint64_t str_offset) - : start_offset(start_offset), end_offset(end_offset), str_offset(str_offset) {} - uint64_t start_offset; - uint64_t end_offset; - uint64_t str_offset; + uint64_t addr; // Symbol address. + uint32_t size; // Symbol size in bytes. Zero if not a function. + uint32_t name; // Offset in .strtab. }; public: @@ -41,8 +40,6 @@ class Symbols { uint64_t str_size); virtual ~Symbols() = default; - const Info* GetInfoFromCache(uint64_t addr); - template bool GetName(uint64_t addr, Memory* elf_memory, std::string* name, uint64_t* func_offset); @@ -51,18 +48,27 @@ class Symbols { void ClearCache() { symbols_.clear(); - cur_offset_ = offset_; + remap_.reset(); } private: - uint64_t cur_offset_; - uint64_t offset_; - uint64_t end_; - uint64_t entry_size_; - uint64_t str_offset_; - uint64_t str_end_; + template + const Info* ReadFuncInfo(uint32_t symbol_index, Memory* elf_memory); - std::vector symbols_; + template + const Info* BinarySearch(uint64_t addr, Memory* elf_memory); + + template + void BuildRemapTable(Memory* elf_memory); + + const uint64_t offset_; + const uint64_t count_; + const uint64_t entry_size_; + const uint64_t str_offset_; + const uint64_t str_end_; + + std::unordered_map symbols_; // Cache of read symbols (keyed by symbol index). + std::optional> remap_; // Indices of function symbols sorted by address. }; } // namespace unwindstack diff --git a/libunwindstack/tests/SymbolsTest.cpp b/libunwindstack/tests/SymbolsTest.cpp index c58aeff71..9afbeec55 100644 --- a/libunwindstack/tests/SymbolsTest.cpp +++ b/libunwindstack/tests/SymbolsTest.cpp @@ -185,18 +185,21 @@ TYPED_TEST_P(SymbolsTest, multiple_entries_nonstandard_size) { std::string fake_name; this->InitSym(&sym, 0x5000, 0x10, 0x40); + this->memory_.SetMemoryBlock(offset, entry_size, 0); this->memory_.SetMemory(offset, &sym, sizeof(sym)); fake_name = "function_one"; this->memory_.SetMemory(0x2040, fake_name.c_str(), fake_name.size() + 1); offset += entry_size; this->InitSym(&sym, 0x3004, 0x200, 0x100); + this->memory_.SetMemoryBlock(offset, entry_size, 0); this->memory_.SetMemory(offset, &sym, sizeof(sym)); fake_name = "function_two"; this->memory_.SetMemory(0x2100, fake_name.c_str(), fake_name.size() + 1); offset += entry_size; this->InitSym(&sym, 0xa010, 0x20, 0x230); + this->memory_.SetMemoryBlock(offset, entry_size, 0); this->memory_.SetMemory(offset, &sym, sizeof(sym)); fake_name = "function_three"; this->memory_.SetMemory(0x2230, fake_name.c_str(), fake_name.size() + 1); @@ -274,7 +277,9 @@ TYPED_TEST_P(SymbolsTest, symtab_read_cached) { // Do call that should cache all of the entries (except the string data). std::string name; uint64_t func_offset; - ASSERT_FALSE(symbols.GetName(0x6000, &this->memory_, &name, &func_offset)); + ASSERT_FALSE(symbols.GetName(0x5000, &this->memory_, &name, &func_offset)); + ASSERT_FALSE(symbols.GetName(0x2000, &this->memory_, &name, &func_offset)); + ASSERT_FALSE(symbols.GetName(0x1000, &this->memory_, &name, &func_offset)); this->memory_.Clear(); ASSERT_FALSE(symbols.GetName(0x6000, &this->memory_, &name, &func_offset));