Home | History | Annotate | Download | only in binary_search_tool
      1 # Copyright 2016 Google Inc. All Rights Reserved.
      2 #
      3 # This script is used to help the compiler wrapper in the Android build system
      4 # bisect for bad object files.
      5 """Utilities for bisection of Android object files.
      6 
      7 This module contains a set of utilities to allow bisection between
      8 two sets (good and bad) of object files. Mostly used to find compiler
      9 bugs.
     10 
     11 Reference page:
     12 https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
     13 
     14 Design doc:
     15 https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
     16 """
     17 
     18 from __future__ import print_function
     19 
     20 import contextlib
     21 import fcntl
     22 import os
     23 import shutil
     24 import subprocess
     25 import sys
     26 
     27 VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE']
     28 GOOD_CACHE = 'good'
     29 BAD_CACHE = 'bad'
     30 LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
     31 
     32 CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
     33 WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1'
     34 
     35 
     36 class Error(Exception):
     37   """The general compiler wrapper error class."""
     38   pass
     39 
     40 
     41 @contextlib.contextmanager
     42 def lock_file(path, mode):
     43   """Lock file and block if other process has lock on file.
     44 
     45   Acquire exclusive lock for file. Only blocks other processes if they attempt
     46   to also acquire lock through this method. If only reading (modes 'r' and 'rb')
     47   then the lock is shared (i.e. many reads can happen concurrently, but only one
     48   process may write at a time).
     49 
     50   This function is a contextmanager, meaning it's meant to be used with the
     51   "with" statement in Python. This is so cleanup and setup happens automatically
     52   and cleanly. Execution of the outer "with" statement happens at the "yield"
     53   statement. Execution resumes after the yield when the outer "with" statement
     54   ends.
     55 
     56   Args:
     57     path: path to file being locked
     58     mode: mode to open file with ('w', 'r', etc.)
     59   """
     60   with open(path, mode) as f:
     61     # Share the lock if just reading, make lock exclusive if writing
     62     if f.mode == 'r' or f.mode == 'rb':
     63       lock_type = fcntl.LOCK_SH
     64     else:
     65       lock_type = fcntl.LOCK_EX
     66 
     67     try:
     68       fcntl.lockf(f, lock_type)
     69       yield f
     70       f.flush()
     71     except:
     72       raise
     73     finally:
     74       fcntl.lockf(f, fcntl.LOCK_UN)
     75 
     76 
     77 def log_to_file(path, execargs, link_from=None, link_to=None):
     78   """Common logging function.
     79 
     80   Log current working directory, current execargs, and a from-to relationship
     81   between files.
     82   """
     83   with lock_file(path, 'a') as log:
     84     log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
     85     if link_from and link_to:
     86       log.write('%s -> %s\n' % (link_from, link_to))
     87 
     88 
     89 def exec_and_return(execargs):
     90   """Execute process and return.
     91 
     92   Execute according to execargs and return immediately. Don't inspect
     93   stderr or stdout.
     94   """
     95   return subprocess.call(execargs)
     96 
     97 
     98 def which_cache(obj_file):
     99   """Determine which cache an object belongs to.
    100 
    101   The binary search tool creates two files for each search iteration listing
    102   the full set of bad objects and full set of good objects. We use this to
    103   determine where an object file should be linked from (good or bad).
    104   """
    105   bad_set_file = os.environ.get('BISECT_BAD_SET')
    106   ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file])
    107   if ret == 0:
    108     return BAD_CACHE
    109   else:
    110     return GOOD_CACHE
    111 
    112 
    113 def makedirs(path):
    114   """Try to create directories in path."""
    115   try:
    116     os.makedirs(path)
    117   except os.error:
    118     if not os.path.isdir(path):
    119       raise
    120 
    121 
    122 def get_obj_path(execargs):
    123   """Get the object path for the object file in the list of arguments.
    124 
    125   Returns:
    126     Absolute object path from execution args (-o argument). If no object being
    127     outputted or output doesn't end in ".o" then return empty string.
    128   """
    129   try:
    130     i = execargs.index('-o')
    131   except ValueError:
    132     return ''
    133 
    134   obj_path = execargs[i + 1]
    135   if not obj_path.endswith(('.o',)):
    136     # TODO: what suffixes do we need to contemplate
    137     # TODO: add this as a warning
    138     # TODO: need to handle -r compilations
    139     return ''
    140 
    141   return os.path.abspath(obj_path)
    142 
    143 
    144 def get_dep_path(execargs):
    145   """Get the dep file path for the dep file in the list of arguments.
    146 
    147   Returns:
    148     Absolute path of dependency file path from execution args (-o argument). If
    149     no dependency being outputted then return empty string.
    150   """
    151   if '-MD' not in execargs and '-MMD' not in execargs:
    152     return ''
    153 
    154   # If -MF given this is the path of the dependency file. Otherwise the
    155   # dependency file is the value of -o but with a .d extension
    156   if '-MF' in execargs:
    157     i = execargs.index('-MF')
    158     dep_path = execargs[i + 1]
    159     return os.path.abspath(dep_path)
    160 
    161   full_obj_path = get_obj_path(execargs)
    162   if not full_obj_path:
    163     return ''
    164 
    165   return full_obj_path[:-2] + '.d'
    166 
    167 
    168 def get_dwo_path(execargs):
    169   """Get the dwo file path for the dwo file in the list of arguments.
    170 
    171   Returns:
    172     Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
    173     dwo file being outputted then return empty string.
    174   """
    175   if '-gsplit-dwarf' not in execargs:
    176     return ''
    177 
    178   full_obj_path = get_obj_path(execargs)
    179   if not full_obj_path:
    180     return ''
    181 
    182   return full_obj_path[:-2] + '.dwo'
    183 
    184 
    185 def in_object_list(obj_name, list_filename):
    186   """Check if object file name exist in file with object list."""
    187   if not obj_name:
    188     return False
    189 
    190   with lock_file(list_filename, 'r') as list_file:
    191     for line in list_file:
    192       if line.strip() == obj_name:
    193         return True
    194 
    195     return False
    196 
    197 
    198 def get_side_effects(execargs):
    199   """Determine side effects generated by compiler
    200 
    201   Returns:
    202     List of paths of objects that the compiler generates as side effects.
    203   """
    204   side_effects = []
    205 
    206   # Cache dependency files
    207   full_dep_path = get_dep_path(execargs)
    208   if full_dep_path:
    209     side_effects.append(full_dep_path)
    210 
    211   # Cache dwo files
    212   full_dwo_path = get_dwo_path(execargs)
    213   if full_dwo_path:
    214     side_effects.append(full_dwo_path)
    215 
    216   return side_effects
    217 
    218 
    219 def cache_file(execargs, bisect_dir, cache, abs_file_path):
    220   """Cache compiler output file (.o/.d/.dwo)."""
    221   # os.path.join fails with absolute paths, use + instead
    222   bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
    223   bisect_path_dir = os.path.dirname(bisect_path)
    224   makedirs(bisect_path_dir)
    225   pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG')
    226   log_to_file(pop_log, execargs, abs_file_path, bisect_path)
    227 
    228   try:
    229     if os.path.exists(abs_file_path):
    230       shutil.copy2(abs_file_path, bisect_path)
    231   except Exception:
    232     print('Could not cache file %s' % abs_file_path, file=sys.stderr)
    233     raise
    234 
    235 
    236 def restore_file(bisect_dir, cache, abs_file_path):
    237   """Restore file from cache (.o/.d/.dwo)."""
    238   # os.path.join fails with absolute paths, use + instead
    239   cached_path = os.path.join(bisect_dir, cache) + abs_file_path
    240   if os.path.exists(cached_path):
    241     if os.path.exists(abs_file_path):
    242       os.remove(abs_file_path)
    243     os.link(cached_path, abs_file_path)
    244   else:
    245     raise Error(('%s is missing from %s cache! Unsure how to proceed. Make '
    246                  'will now crash.' % (cache, cached_path)))
    247 
    248 
    249 def bisect_populate(execargs, bisect_dir, population_name):
    250   """Add necessary information to the bisect cache for the given execution.
    251 
    252   Extract the necessary information for bisection from the compiler
    253   execution arguments and put it into the bisection cache. This
    254   includes copying the created object file, adding the object
    255   file path to the cache list and keeping a log of the execution.
    256 
    257   Args:
    258     execargs: compiler execution arguments.
    259     bisect_dir: bisection directory.
    260     population_name: name of the cache being populated (good/bad).
    261   """
    262   retval = exec_and_return(execargs)
    263   if retval:
    264     return retval
    265 
    266   full_obj_path = get_obj_path(execargs)
    267   # If not a normal compiler call then just exit
    268   if not full_obj_path:
    269     return
    270 
    271   cache_file(execargs, bisect_dir, population_name, full_obj_path)
    272 
    273   population_dir = os.path.join(bisect_dir, population_name)
    274   with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
    275     object_list.write('%s\n' % full_obj_path)
    276 
    277   for side_effect in get_side_effects(execargs):
    278     cache_file(execargs, bisect_dir, population_name, side_effect)
    279 
    280 
    281 def bisect_triage(execargs, bisect_dir):
    282   full_obj_path = get_obj_path(execargs)
    283   obj_list = os.path.join(bisect_dir, LIST_FILE)
    284 
    285   # If the output isn't an object file just call compiler
    286   if not full_obj_path:
    287     return exec_and_return(execargs)
    288 
    289   # If this isn't a bisected object just call compiler
    290   # This shouldn't happen!
    291   if not in_object_list(full_obj_path, obj_list):
    292     if CONTINUE_ON_MISSING:
    293       log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
    294       log_to_file(log_file, execargs, '? compiler', full_obj_path)
    295       return exec_and_return(execargs)
    296     else:
    297       raise Error(('%s is missing from cache! To ignore export '
    298                    'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
    299                    'details on this option.' % full_obj_path))
    300 
    301   cache = which_cache(full_obj_path)
    302 
    303   # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
    304   # result from the good/bad cache. This option is safe and covers all compiler
    305   # side effects, but is very slow!
    306   if WRAPPER_SAFE_MODE:
    307     retval = exec_and_return(execargs)
    308     if retval:
    309       return retval
    310     os.remove(full_obj_path)
    311     restore_file(bisect_dir, cache, full_obj_path)
    312     return
    313 
    314   # Generate compiler side effects. Trick Make into thinking compiler was
    315   # actually executed.
    316   for side_effect in get_side_effects(execargs):
    317     restore_file(bisect_dir, cache, side_effect)
    318 
    319   # If generated object file happened to be pruned/cleaned by Make then link it
    320   # over from cache again.
    321   if not os.path.exists(full_obj_path):
    322     restore_file(bisect_dir, cache, full_obj_path)
    323 
    324 
    325 def bisect_driver(bisect_stage, bisect_dir, execargs):
    326   """Call appropriate bisection stage according to value in bisect_stage."""
    327   if bisect_stage == 'POPULATE_GOOD':
    328     bisect_populate(execargs, bisect_dir, GOOD_CACHE)
    329   elif bisect_stage == 'POPULATE_BAD':
    330     bisect_populate(execargs, bisect_dir, BAD_CACHE)
    331   elif bisect_stage == 'TRIAGE':
    332     bisect_triage(execargs, bisect_dir)
    333   else:
    334     raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
    335