248 lines
8.3 KiB
Python
248 lines
8.3 KiB
Python
|
#!/usr/bin/env python3
|
||
|
#
|
||
|
# Copyright (C) 2019 The Android Open Source Project
|
||
|
# All rights reserved.
|
||
|
#
|
||
|
# Redistribution and use in source and binary forms, with or without
|
||
|
# modification, are permitted provided that the following conditions
|
||
|
# are met:
|
||
|
# * Redistributions of source code must retain the above copyright
|
||
|
# notice, this list of conditions and the following disclaimer.
|
||
|
# * Redistributions in binary form must reproduce the above copyright
|
||
|
# notice, this list of conditions and the following disclaimer in
|
||
|
# the documentation and/or other materials provided with the
|
||
|
# distribution.
|
||
|
#
|
||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||
|
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||
|
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||
|
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||
|
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||
|
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||
|
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||
|
# SUCH DAMAGE.
|
||
|
|
||
|
# Scan an ELF file and its tree of DT_NEEDED ELF files, and dump out a JSON file listing:
|
||
|
# - each ELF file
|
||
|
# - its DT_NEEDED entries
|
||
|
# - its defined symbols
|
||
|
# - its relocations
|
||
|
|
||
|
import argparse
|
||
|
import json
|
||
|
import os
|
||
|
import re
|
||
|
import shlex
|
||
|
import shutil
|
||
|
import subprocess
|
||
|
import sys
|
||
|
import tempfile
|
||
|
import textwrap
|
||
|
import typing
|
||
|
from enum import Enum
|
||
|
from typing import Any, Set, List, Dict, Optional
|
||
|
from subprocess import PIPE, DEVNULL
|
||
|
from pathlib import Path
|
||
|
|
||
|
from common_types import LoadedLibrary, SymBind, SymKind, DynSymbol, DynSymbols, Relocations, \
|
||
|
SymbolRef, bfs_walk, elf_tree_to_json
|
||
|
|
||
|
|
||
|
g_readelf_cache: Dict[str, str] = {}
|
||
|
g_path_to_soname_cache: Dict[Path, str] = {}
|
||
|
|
||
|
def do_readelf_query(arguments: List[str]) -> List[str]:
|
||
|
cmdline = ['llvm-readelf'] + arguments
|
||
|
key = repr(cmdline)
|
||
|
if key in g_readelf_cache: return g_readelf_cache[key].splitlines()
|
||
|
out = subprocess.run(cmdline, check=True, stdout=PIPE).stdout.decode()
|
||
|
g_readelf_cache[key] = out
|
||
|
return out.splitlines()
|
||
|
|
||
|
|
||
|
def get_elf_soname(path: Path) -> str:
|
||
|
if path in g_path_to_soname_cache: return g_path_to_soname_cache[path]
|
||
|
out = do_readelf_query(['-d', str(path)])
|
||
|
for line in out:
|
||
|
m = re.search(r'\(SONAME\)\s+Library soname: \[(.+)\]$', line)
|
||
|
if not m: continue
|
||
|
result = m.group(1)
|
||
|
break
|
||
|
else:
|
||
|
result = os.path.basename(path)
|
||
|
g_path_to_soname_cache[path] = result
|
||
|
return result
|
||
|
|
||
|
|
||
|
def get_elf_needed(path: Path) -> List[str]:
|
||
|
result = []
|
||
|
out = do_readelf_query(['-d', str(path)])
|
||
|
for line in out:
|
||
|
m = re.search(r'\(NEEDED\)\s+Shared library: \[(.+)\]$', line)
|
||
|
if not m: continue
|
||
|
result.append(m.group(1))
|
||
|
return result
|
||
|
|
||
|
|
||
|
kSymbolMatcher = re.compile(r'''
|
||
|
\s+ (\d+) : \s* # number
|
||
|
[0-9a-f]+ \s+ # value
|
||
|
[0-9a-f]+ \s+ # size
|
||
|
(FUNC|IFUNC|OBJECT|NOTYPE) \s+ # type
|
||
|
(GLOBAL|WEAK) \s+ # bind
|
||
|
\w+ \s+ # vis
|
||
|
(\d+|UND) \s+ # ndx
|
||
|
([\.\w]+) # name
|
||
|
(?:(@@?)(\w+))? # version
|
||
|
$
|
||
|
''', re.VERBOSE)
|
||
|
|
||
|
|
||
|
def get_dyn_symbols(path: Path) -> DynSymbols:
|
||
|
kind_lookup = {
|
||
|
'FUNC': SymKind.Func,
|
||
|
'IFUNC': SymKind.Func,
|
||
|
'OBJECT': SymKind.Var,
|
||
|
'NOTYPE': SymKind.Func,
|
||
|
}
|
||
|
bind_lookup = { 'GLOBAL': SymBind.Global, 'WEAK': SymBind.Weak }
|
||
|
|
||
|
result = {}
|
||
|
out = do_readelf_query(['--dyn-syms', str(path)])
|
||
|
for line in out:
|
||
|
m = kSymbolMatcher.match(line)
|
||
|
if not m:
|
||
|
# gLinux currently has a version of llvm-readelf whose output is very different from
|
||
|
# the current versions of llvm-readelf (or GNU readelf).
|
||
|
if 'Symbol table of .gnu.hash for image:' in line:
|
||
|
sys.exit(f'error: obsolete version of llvm-readelf')
|
||
|
continue
|
||
|
|
||
|
num, kind, bind, ndx, name, ver_type, ver_name = m.groups()
|
||
|
|
||
|
if name == '__cfi_check':
|
||
|
# The linker gives an error like:
|
||
|
# CANNOT LINK EXECUTABLE "/data/local/tmp/out-linker-bench/b_libandroid_servers": unaligned __cfi_check in the library "(null)"
|
||
|
# I am probably breaking some kind of CFI invariant, so strip these out for now.
|
||
|
continue
|
||
|
|
||
|
result[int(num)] = DynSymbol(name, kind_lookup[kind], bind_lookup[bind], ndx != 'UND',
|
||
|
ver_type, ver_name)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
kRelocationMatcher = re.compile(r'''
|
||
|
([0-9a-f]+) \s+ # offset
|
||
|
([0-9a-f]+) \s+ # info
|
||
|
(\w+) # type
|
||
|
(?:
|
||
|
\s+ [0-9a-f]+ \s+ # symbol value
|
||
|
([\.\w]+) # symbol name
|
||
|
(?: @@? ([\.\w]+) )? # version
|
||
|
)?
|
||
|
\b
|
||
|
''', re.VERBOSE)
|
||
|
|
||
|
|
||
|
def scan_relocations(path: Path, syms: DynSymbols) -> Relocations:
|
||
|
result: Relocations = Relocations()
|
||
|
out = do_readelf_query(['-r', str(path)])
|
||
|
for line in out:
|
||
|
m = kRelocationMatcher.match(line)
|
||
|
if not m: continue
|
||
|
|
||
|
offset_str, info_str, reloc_name, sym_name, ver = m.groups()
|
||
|
|
||
|
if len(offset_str) == 8:
|
||
|
offset = int(offset_str, 16) // 4
|
||
|
sym_idx = int(info_str, 16) >> 8
|
||
|
elif len(offset_str) == 16:
|
||
|
offset = int(offset_str, 16) // 8
|
||
|
sym_idx = int(info_str, 16) >> 32
|
||
|
else:
|
||
|
sys.exit(f'error: invalid offset length: {repr(offset_str)}')
|
||
|
|
||
|
# TODO: R_ARM_IRELATIVE doesn't work, so skip it.
|
||
|
if reloc_name == 'R_ARM_IRELATIVE': continue
|
||
|
|
||
|
if reloc_name in ['R_ARM_RELATIVE', 'R_AARCH64_RELATIVE']:
|
||
|
assert sym_name is None
|
||
|
result.relative.append(offset)
|
||
|
else:
|
||
|
if sym_name is None:
|
||
|
sys.exit(f'error: missing symbol for reloc {m.groups()} in {path}')
|
||
|
|
||
|
is_weak = syms[sym_idx].bind == SymBind.Weak
|
||
|
symbol = SymbolRef(sym_name, is_weak, ver)
|
||
|
|
||
|
if reloc_name in ['R_ARM_JUMP_SLOT', 'R_AARCH64_JUMP_SLOT']:
|
||
|
result.jump_slots.append(symbol)
|
||
|
elif reloc_name in ['R_ARM_GLOB_DAT', 'R_AARCH64_GLOB_DAT']:
|
||
|
result.got.append(symbol)
|
||
|
elif reloc_name in ['R_ARM_ABS32', 'R_AARCH64_ABS64']:
|
||
|
result.symbolic.append((offset, symbol))
|
||
|
else:
|
||
|
sys.exit(f'error: unrecognized reloc {m.groups()} in {path}')
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def load_elf_tree(search_path: List[Path], path: Path) -> LoadedLibrary:
|
||
|
|
||
|
libraries: Dict[str, LoadedLibrary] = {}
|
||
|
|
||
|
def find_library(needed: str) -> Optional[LoadedLibrary]:
|
||
|
nonlocal libraries
|
||
|
|
||
|
if needed in libraries: return libraries[needed]
|
||
|
|
||
|
for candidate_dir in search_path:
|
||
|
candidate_path = candidate_dir / needed
|
||
|
if candidate_path.exists():
|
||
|
return load(candidate_path)
|
||
|
|
||
|
sys.exit(f'error: missing DT_NEEDED lib {needed}!')
|
||
|
|
||
|
def load(path: Path) -> LoadedLibrary:
|
||
|
nonlocal libraries
|
||
|
|
||
|
lib = LoadedLibrary()
|
||
|
lib.soname = get_elf_soname(path)
|
||
|
if lib.soname in libraries: sys.exit(f'soname already loaded: {lib.soname}')
|
||
|
libraries[lib.soname] = lib
|
||
|
|
||
|
lib.syms = get_dyn_symbols(path)
|
||
|
lib.rels = scan_relocations(path, lib.syms)
|
||
|
|
||
|
for needed in get_elf_needed(path):
|
||
|
needed_lib = find_library(needed)
|
||
|
if needed_lib is not None:
|
||
|
lib.needed.append(needed_lib)
|
||
|
|
||
|
return lib
|
||
|
|
||
|
return load(path)
|
||
|
|
||
|
|
||
|
def main() -> None:
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument('input', type=str)
|
||
|
parser.add_argument('output', type=str)
|
||
|
parser.add_argument('-L', dest='search_path', metavar='PATH', action='append', type=str, default=[])
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
search_path = [Path(p) for p in args.search_path]
|
||
|
|
||
|
with open(Path(args.output), 'w') as f:
|
||
|
root = load_elf_tree(search_path, Path(args.input))
|
||
|
json.dump(elf_tree_to_json(root), f, sort_keys=True, indent=2)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|