Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2019 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """ELF file checker.
     18 
     19 This command ensures all undefined symbols in an ELF file can be resolved to
     20 global (or weak) symbols defined in shared objects specified in DT_NEEDED
     21 entries.
     22 """
     23 
     24 from __future__ import print_function
     25 
     26 import argparse
     27 import collections
     28 import os
     29 import os.path
     30 import re
     31 import struct
     32 import subprocess
     33 import sys
     34 
     35 
     36 _ELF_MAGIC = b'\x7fELF'
     37 
     38 
     39 # Known machines
     40 _EM_386 = 3
     41 _EM_ARM = 40
     42 _EM_X86_64 = 62
     43 _EM_AARCH64 = 183
     44 
     45 _KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
     46 
     47 
     48 # ELF header struct
     49 _ELF_HEADER_STRUCT = (
     50   ('ei_magic', '4s'),
     51   ('ei_class', 'B'),
     52   ('ei_data', 'B'),
     53   ('ei_version', 'B'),
     54   ('ei_osabi', 'B'),
     55   ('ei_pad', '8s'),
     56   ('e_type', 'H'),
     57   ('e_machine', 'H'),
     58   ('e_version', 'I'),
     59 )
     60 
     61 _ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
     62 
     63 
     64 ELFHeader = collections.namedtuple(
     65   'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
     66 
     67 
     68 ELF = collections.namedtuple(
     69   'ELF',
     70   ('dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
     71 
     72 
     73 def _get_os_name():
     74   """Get the host OS name."""
     75   if sys.platform == 'linux2':
     76     return 'linux'
     77   if sys.platform == 'darwin':
     78     return 'darwin'
     79   raise ValueError(sys.platform + ' is not supported')
     80 
     81 
     82 def _get_build_top():
     83   """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
     84   prev_path = None
     85   curr_path = os.path.abspath(os.getcwd())
     86   while prev_path != curr_path:
     87     if os.path.exists(os.path.join(curr_path, '.repo')):
     88       return curr_path
     89     prev_path = curr_path
     90     curr_path = os.path.dirname(curr_path)
     91   return None
     92 
     93 
     94 def _select_latest_llvm_version(versions):
     95   """Select the latest LLVM prebuilts version from a set of versions."""
     96   pattern = re.compile('clang-r([0-9]+)([a-z]?)')
     97   found_rev = 0
     98   found_ver = None
     99   for curr_ver in versions:
    100     match = pattern.match(curr_ver)
    101     if not match:
    102       continue
    103     curr_rev = int(match.group(1))
    104     if not found_ver or curr_rev > found_rev or (
    105         curr_rev == found_rev and curr_ver > found_ver):
    106       found_rev = curr_rev
    107       found_ver = curr_ver
    108   return found_ver
    109 
    110 
    111 def _get_latest_llvm_version(llvm_dir):
    112   """Find the latest LLVM prebuilts version from `llvm_dir`."""
    113   return _select_latest_llvm_version(os.listdir(llvm_dir))
    114 
    115 
    116 def _get_llvm_dir():
    117   """Find the path to LLVM prebuilts."""
    118   build_top = _get_build_top()
    119 
    120   llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
    121   if not llvm_prebuilts_base:
    122     llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
    123 
    124   llvm_dir = os.path.join(
    125     build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
    126 
    127   if not os.path.exists(llvm_dir):
    128     return None
    129 
    130   llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
    131   if not llvm_prebuilts_version:
    132     llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
    133 
    134   llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
    135 
    136   if not os.path.exists(llvm_dir):
    137     return None
    138 
    139   return llvm_dir
    140 
    141 
    142 def _get_llvm_readobj():
    143   """Find the path to llvm-readobj executable."""
    144   llvm_dir = _get_llvm_dir()
    145   llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
    146   return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
    147 
    148 
    149 class ELFError(ValueError):
    150   """Generic ELF parse error"""
    151   pass
    152 
    153 
    154 class ELFInvalidMagicError(ELFError):
    155   """Invalid ELF magic word error"""
    156   def __init__(self):
    157     super(ELFInvalidMagicError, self).__init__('bad ELF magic')
    158 
    159 
    160 class ELFParser(object):
    161   """ELF file parser"""
    162 
    163   @classmethod
    164   def _read_elf_header(cls, elf_file_path):
    165     """Read the ELF magic word from the beginning of the file."""
    166     with open(elf_file_path, 'rb') as elf_file:
    167       buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
    168       try:
    169         return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
    170       except struct.error:
    171         return None
    172 
    173 
    174   @classmethod
    175   def open(cls, elf_file_path, llvm_readobj):
    176     """Open and parse the ELF file."""
    177     # Parse the ELF header for simple sanity checks.
    178     header = cls._read_elf_header(elf_file_path)
    179     if not header or header.ei_magic != _ELF_MAGIC:
    180       raise ELFInvalidMagicError()
    181 
    182     # Run llvm-readobj and parse the output.
    183     return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
    184 
    185 
    186   @classmethod
    187   def _find_prefix(cls, pattern, lines_it):
    188     """Iterate `lines_it` until finding a string that starts with `pattern`."""
    189     for line in lines_it:
    190       if line.startswith(pattern):
    191         return True
    192     return False
    193 
    194 
    195   @classmethod
    196   def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
    197     """Run llvm-readobj and parse the output."""
    198     proc = subprocess.Popen(
    199       [llvm_readobj, '-dynamic-table', '-dyn-symbols', elf_file_path],
    200       stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    201     out, _ = proc.communicate()
    202     lines = out.splitlines()
    203     return cls._parse_llvm_readobj(elf_file_path, header, lines)
    204 
    205 
    206   @classmethod
    207   def _parse_llvm_readobj(cls, elf_file_path, header, lines):
    208     """Parse the output of llvm-readobj."""
    209     lines_it = iter(lines)
    210     imported, exported = cls._parse_dynamic_symbols(lines_it)
    211     dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
    212     return ELF(dt_soname, dt_needed, imported, exported, header)
    213 
    214 
    215   _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
    216 
    217   _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
    218     '^  0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
    219 
    220   _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
    221     '^  0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
    222 
    223   _DYNAMIC_SECTION_END_PATTERN = ']'
    224 
    225 
    226   @classmethod
    227   def _parse_dynamic_table(cls, elf_file_path, lines_it):
    228     """Parse the dynamic table section."""
    229     dt_soname = os.path.basename(elf_file_path)
    230     dt_needed = []
    231 
    232     dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
    233     if not dynamic:
    234       return (dt_soname, dt_needed)
    235 
    236     for line in lines_it:
    237       if line == cls._DYNAMIC_SECTION_END_PATTERN:
    238         break
    239 
    240       match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
    241       if match:
    242         dt_needed.append(match.group(1))
    243         continue
    244 
    245       match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
    246       if match:
    247         dt_soname = match.group(1)
    248         continue
    249 
    250     return (dt_soname, dt_needed)
    251 
    252 
    253   _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
    254   _DYNAMIC_SYMBOLS_END_PATTERN = ']'
    255 
    256   _SYMBOL_ENTRY_START_PATTERN = '  Symbol {'
    257   _SYMBOL_ENTRY_PATTERN = re.compile('^    ([A-Za-z0-9_]+): (.*)$')
    258   _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
    259     '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
    260   _SYMBOL_ENTRY_END_PATTERN = '  }'
    261 
    262 
    263   @staticmethod
    264   def _parse_symbol_name(name_with_version):
    265     """Split `name_with_version` into name and version. This function may split
    266     at last occurrence of `@@` or `@`."""
    267     pos = name_with_version.rfind('@')
    268     if pos == -1:
    269       name = name_with_version
    270       version = ''
    271     else:
    272       if pos > 0 and name_with_version[pos - 1] == '@':
    273         name = name_with_version[0:pos - 1]
    274       else:
    275         name = name_with_version[0:pos]
    276       version = name_with_version[pos + 1:]
    277     return (name, version)
    278 
    279 
    280   @classmethod
    281   def _parse_dynamic_symbols(cls, lines_it):
    282     """Parse dynamic symbol table and collect imported and exported symbols."""
    283     imported = collections.defaultdict(set)
    284     exported = collections.defaultdict(set)
    285 
    286     for symbol in cls._parse_dynamic_symbols_internal(lines_it):
    287       name, version = cls._parse_symbol_name(symbol['Name'])
    288       if name:
    289         if symbol['Section'] == 'Undefined':
    290           if symbol['Binding'] != 'Weak':
    291             imported[name].add(version)
    292         else:
    293           if symbol['Binding'] != 'Local':
    294             exported[name].add(version)
    295 
    296     # Freeze the returned imported/exported dict.
    297     return (dict(imported), dict(exported))
    298 
    299 
    300   @classmethod
    301   def _parse_dynamic_symbols_internal(cls, lines_it):
    302     """Parse symbols entries and yield each symbols."""
    303 
    304     if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
    305       return
    306 
    307     for line in lines_it:
    308       if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
    309         return
    310 
    311       if line == cls._SYMBOL_ENTRY_START_PATTERN:
    312         symbol = {}
    313         continue
    314 
    315       if line == cls._SYMBOL_ENTRY_END_PATTERN:
    316         yield symbol
    317         symbol = None
    318         continue
    319 
    320       match = cls._SYMBOL_ENTRY_PATTERN.match(line)
    321       if match:
    322         key = match.group(1)
    323         value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
    324         symbol[key] = value
    325         continue
    326 
    327 
    328 class Checker(object):
    329   """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
    330 
    331   def __init__(self, llvm_readobj):
    332     self._file_path = ''
    333     self._file_under_test = None
    334     self._shared_libs = []
    335 
    336     self._llvm_readobj = llvm_readobj
    337 
    338 
    339   if sys.stderr.isatty():
    340     _ERROR_TAG = '\033[0;1;31merror:\033[m'  # Red error
    341     _NOTE_TAG = '\033[0;1;30mnote:\033[m'  # Black note
    342   else:
    343     _ERROR_TAG = 'error:'  # Red error
    344     _NOTE_TAG = 'note:'  # Black note
    345 
    346 
    347   def _error(self, *args):
    348     """Emit an error to stderr."""
    349     print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
    350 
    351 
    352   def _note(self, *args):
    353     """Emit a note to stderr."""
    354     print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
    355 
    356 
    357   def _load_elf_file(self, path, skip_bad_elf_magic):
    358     """Load an ELF file from the `path`."""
    359     try:
    360       return ELFParser.open(path, self._llvm_readobj)
    361     except (IOError, OSError):
    362       self._error('Failed to open "{}".'.format(path))
    363       sys.exit(2)
    364     except ELFInvalidMagicError:
    365       if skip_bad_elf_magic:
    366         sys.exit(0)
    367       else:
    368         self._error('File "{}" must have a valid ELF magic word.'.format(path))
    369         sys.exit(2)
    370     except:
    371       self._error('An unknown error occurred while opening "{}".'.format(path))
    372       raise
    373 
    374 
    375   def load_file_under_test(self, path, skip_bad_elf_magic,
    376                            skip_unknown_elf_machine):
    377     """Load file-under-test (either an executable or a shared lib)."""
    378     self._file_path = path
    379     self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
    380 
    381     if skip_unknown_elf_machine and \
    382         self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
    383       sys.exit(0)
    384 
    385 
    386   def load_shared_libs(self, shared_lib_paths):
    387     """Load shared libraries."""
    388     for path in shared_lib_paths:
    389       self._shared_libs.append(self._load_elf_file(path, False))
    390 
    391 
    392   def check_dt_soname(self, soname):
    393     """Check whether DT_SONAME matches installation file name."""
    394     if self._file_under_test.dt_soname != soname:
    395       self._error('DT_SONAME "{}" must be equal to the file name "{}".'
    396                   .format(self._file_under_test.dt_soname, soname))
    397       sys.exit(2)
    398 
    399 
    400   def check_dt_needed(self):
    401     """Check whether all DT_NEEDED entries are specified in the build
    402     system."""
    403 
    404     missing_shared_libs = False
    405 
    406     # Collect the DT_SONAMEs from shared libs specified in the build system.
    407     specified_sonames = {lib.dt_soname for lib in self._shared_libs}
    408 
    409     # Chech whether all DT_NEEDED entries are specified.
    410     for lib in self._file_under_test.dt_needed:
    411       if lib not in specified_sonames:
    412         self._error('DT_NEEDED "{}" is not specified in shared_libs.'
    413                     .format(lib.decode('utf-8')))
    414         missing_shared_libs = True
    415 
    416     if missing_shared_libs:
    417       dt_needed = sorted(set(self._file_under_test.dt_needed))
    418       modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
    419 
    420       self._note()
    421       self._note('Fix suggestions:')
    422       self._note(
    423         '  Android.bp: shared_libs: [' +
    424         ', '.join('"' + module + '"' for module in modules) + '],')
    425       self._note(
    426         '  Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
    427 
    428       self._note()
    429       self._note('If the fix above doesn\'t work, bypass this check with:')
    430       self._note('  Android.bp: check_elf_files: false,')
    431       self._note('  Android.mk: LOCAL_CHECK_ELF_FILES := false')
    432 
    433       sys.exit(2)
    434 
    435 
    436   @staticmethod
    437   def _find_symbol(lib, name, version):
    438     """Check whether the symbol name and version matches a definition in
    439     lib."""
    440     try:
    441       lib_sym_vers = lib.exported[name]
    442     except KeyError:
    443       return False
    444     if version == '':  # Symbol version is not requested
    445       return True
    446     return version in lib_sym_vers
    447 
    448 
    449   @classmethod
    450   def _find_symbol_from_libs(cls, libs, name, version):
    451     """Check whether the symbol name and version is defined in one of the
    452     shared libraries in libs."""
    453     for lib in libs:
    454       if cls._find_symbol(lib, name, version):
    455         return lib
    456     return None
    457 
    458 
    459   def check_symbols(self):
    460     """Check whether all undefined symbols are resolved to a definition."""
    461     all_elf_files = [self._file_under_test] + self._shared_libs
    462     missing_symbols = []
    463     for sym, imported_vers in self._file_under_test.imported.iteritems():
    464       for imported_ver in imported_vers:
    465         lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
    466         if not lib:
    467           missing_symbols.append((sym, imported_ver))
    468 
    469     if missing_symbols:
    470       for sym, ver in sorted(missing_symbols):
    471         sym = sym.decode('utf-8')
    472         if ver:
    473           sym += '@' + ver.decode('utf-8')
    474         self._error('Unresolved symbol: {}'.format(sym))
    475 
    476       self._note()
    477       self._note('Some dependencies might be changed, thus the symbol(s) '
    478                  'above cannot be resolved.')
    479       self._note('Please re-build the prebuilt file: "{}".'
    480                  .format(self._file_path))
    481 
    482       self._note()
    483       self._note('If this is a new prebuilt file and it is designed to have '
    484                  'unresolved symbols, add one of the following properties:')
    485       self._note('  Android.bp: allow_undefined_symbols: true,')
    486       self._note('  Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
    487 
    488       sys.exit(2)
    489 
    490 
    491 def _parse_args():
    492   """Parse command line options."""
    493   parser = argparse.ArgumentParser()
    494 
    495   # Input file
    496   parser.add_argument('file',
    497                       help='Path to the input file to be checked')
    498   parser.add_argument('--soname',
    499                       help='Shared object name of the input file')
    500 
    501   # Shared library dependencies
    502   parser.add_argument('--shared-lib', action='append', default=[],
    503                       help='Path to shared library dependencies')
    504 
    505   # Check options
    506   parser.add_argument('--skip-bad-elf-magic', action='store_true',
    507                       help='Ignore the input file without the ELF magic word')
    508   parser.add_argument('--skip-unknown-elf-machine', action='store_true',
    509                       help='Ignore the input file with unknown machine ID')
    510   parser.add_argument('--allow-undefined-symbols', action='store_true',
    511                       help='Ignore unresolved undefined symbols')
    512 
    513   # Other options
    514   parser.add_argument('--llvm-readobj',
    515                       help='Path to the llvm-readobj executable')
    516 
    517   return parser.parse_args()
    518 
    519 
    520 def main():
    521   """Main function"""
    522   args = _parse_args()
    523 
    524   llvm_readobj = args.llvm_readobj
    525   if not llvm_readobj:
    526     llvm_readobj = _get_llvm_readobj()
    527 
    528   # Load ELF files
    529   checker = Checker(llvm_readobj)
    530   checker.load_file_under_test(
    531     args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
    532   checker.load_shared_libs(args.shared_lib)
    533 
    534   # Run checks
    535   if args.soname:
    536     checker.check_dt_soname(args.soname)
    537 
    538   checker.check_dt_needed()
    539 
    540   if not args.allow_undefined_symbols:
    541     checker.check_symbols()
    542 
    543 
    544 if __name__ == '__main__':
    545   main()
    546