Home | History | Annotate | Download | only in bin
      1 # Copyright 2016 Google Inc. All Rights Reserved.
      2 #
      3 # This script is used to help the compiler wrapper in the Android build system
      4 # bisect for bad object files.
      5 """Utilities for bisection of Android object files.
      6 
      7 This module contains a set of utilities to allow bisection between
      8 two sets (good and bad) of object files. Mostly used to find compiler
      9 bugs.
     10 
     11 Reference page:
     12 https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
     13 
     14 Design doc:
     15 https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
     16 """
     17 
     18 from __future__ import print_function
     19 
     20 import contextlib
     21 import fcntl
     22 import os
     23 import shutil
     24 import subprocess
     25 import sys
     26 
     27 VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE']
     28 GOOD_CACHE = 'good'
     29 BAD_CACHE = 'bad'
     30 LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
     31 
     32 CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
     33 WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1'
     34 
     35 
     36 class Error(Exception):
     37   """The general compiler wrapper error class."""
     38   pass
     39 
     40 
     41 @contextlib.contextmanager
     42 def lock_file(path, mode):
     43   """Lock file and block if other process has lock on file.
     44 
     45   Acquire exclusive lock for file. Only blocks other processes if they attempt
     46   to also acquire lock through this method. If only reading (modes 'r' and 'rb')
     47   then the lock is shared (i.e. many reads can happen concurrently, but only one
     48   process may write at a time).
     49 
     50   This function is a contextmanager, meaning it's meant to be used with the
     51   "with" statement in Python. This is so cleanup and setup happens automatically
     52   and cleanly. Execution of the outer "with" statement happens at the "yield"
     53   statement. Execution resumes after the yield when the outer "with" statement
     54   ends.
     55 
     56   Args:
     57     path: path to file being locked
     58     mode: mode to open file with ('w', 'r', etc.)
     59   """
     60   with open(path, mode) as f:
     61     # Share the lock if just reading, make lock exclusive if writing
     62     if f.mode == 'r' or f.mode == 'rb':
     63       lock_type = fcntl.LOCK_SH
     64     else:
     65       lock_type = fcntl.LOCK_EX
     66 
     67     try:
     68       fcntl.lockf(f, lock_type)
     69       yield f
     70       f.flush()
     71     except:
     72       raise
     73     finally:
     74       fcntl.lockf(f, fcntl.LOCK_UN)
     75 
     76 
     77 def log_to_file(path, execargs, link_from=None, link_to=None):
     78   """Common logging function.
     79 
     80   Log current working directory, current execargs, and a from-to relationship
     81   between files.
     82   """
     83   with lock_file(path, 'a') as log:
     84     log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
     85     if link_from and link_to:
     86       log.write('%s -> %s\n' % (link_from, link_to))
     87 
     88 
     89 def exec_and_return(execargs):
     90   """Execute process and return.
     91 
     92   Execute according to execargs and return immediately. Don't inspect
     93   stderr or stdout.
     94   """
     95   return subprocess.call(execargs)
     96 
     97 
     98 def which_cache(obj_file):
     99   """Determine which cache an object belongs to.
    100 
    101   The binary search tool creates two files for each search iteration listing
    102   the full set of bad objects and full set of good objects. We use this to
    103   determine where an object file should be linked from (good or bad).
    104   """
    105   bad_set_file = os.environ.get('BISECT_BAD_SET')
    106   ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file])
    107   if ret == 0:
    108     return BAD_CACHE
    109   else:
    110     return GOOD_CACHE
    111 
    112 
    113 def makedirs(path):
    114   """Try to create directories in path."""
    115   try:
    116     os.makedirs(path)
    117   except os.error:
    118     if not os.path.isdir(path):
    119       raise
    120 
    121 
    122 def get_obj_path(execargs):
    123   """Get the object path for the object file in the list of arguments.
    124 
    125   Returns:
    126     Absolute object path from execution args (-o argument). If no object being
    127     outputted or output doesn't end in ".o" then return empty string.
    128   """
    129   try:
    130     i = execargs.index('-o')
    131   except ValueError:
    132     return ''
    133 
    134   obj_path = execargs[i + 1]
    135   if not obj_path.endswith(('.o',)):
    136     # TODO: what suffixes do we need to contemplate
    137     # TODO: add this as a warning
    138     # TODO: need to handle -r compilations
    139     return ''
    140 
    141   return os.path.abspath(obj_path)
    142 
    143 
    144 def get_dep_path(execargs):
    145   """Get the dep file path for the dep file in the list of arguments.
    146 
    147   Returns:
    148     Absolute path of dependency file path from execution args (-o argument). If
    149     no dependency being outputted then return empty string.
    150   """
    151   if '-MD' not in execargs and '-MMD' not in execargs:
    152     return ''
    153 
    154   # If -MF given this is the path of the dependency file. Otherwise the
    155   # dependency file is the value of -o but with a .d extension
    156   if '-MF' in execargs:
    157     i = execargs.index('-MF')
    158     dep_path = execargs[i + 1]
    159     return os.path.abspath(dep_path)
    160 
    161   full_obj_path = get_obj_path(execargs)
    162   if not full_obj_path:
    163     return ''
    164 
    165   return full_obj_path[:-2] + '.d'
    166 
    167 
    168 def get_dwo_path(execargs):
    169   """Get the dwo file path for the dwo file in the list of arguments.
    170 
    171   Returns:
    172     Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
    173     dwo file being outputted then return empty string.
    174   """
    175   if '-gsplit-dwarf' not in execargs:
    176     return ''
    177 
    178   full_obj_path = get_obj_path(execargs)
    179   if not full_obj_path:
    180     return ''
    181 
    182   return full_obj_path[:-2] + '.dwo'
    183 
    184 
    185 def in_object_list(obj_name, list_filename):
    186   """Check if object file name exist in file with object list."""
    187   if not obj_name:
    188     return False
    189 
    190   with lock_file(list_filename, 'r') as list_file:
    191     for line in list_file:
    192       if line.strip() == obj_name:
    193         return True
    194 
    195     return False
    196 
    197 
    198 def get_side_effects(execargs):
    199   """Determine side effects generated by compiler
    200 
    201   Returns:
    202     List of paths of objects that the compiler generates as side effects.
    203   """
    204   side_effects = []
    205 
    206   # Cache dependency files
    207   full_dep_path = get_dep_path(execargs)
    208   if full_dep_path:
    209     side_effects.append(full_dep_path)
    210 
    211   # Cache dwo files
    212   full_dwo_path = get_dwo_path(execargs)
    213   if full_dwo_path:
    214     side_effects.append(full_dwo_path)
    215 
    216   return side_effects
    217 
    218 
    219 def cache_file(execargs, bisect_dir, cache, abs_file_path):
    220   """Cache compiler output file (.o/.d/.dwo)."""
    221   # os.path.join fails with absolute paths, use + instead
    222   bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
    223   bisect_path_dir = os.path.dirname(bisect_path)
    224   makedirs(bisect_path_dir)
    225   pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG')
    226   log_to_file(pop_log, execargs, abs_file_path, bisect_path)
    227 
    228   try:
    229     if os.path.exists(abs_file_path):
    230       shutil.copy2(abs_file_path, bisect_path)
    231   except Exception:
    232     print('Could not cache file %s' % abs_file_path, file=sys.stderr)
    233     raise
    234 
    235 
    236 def restore_file(bisect_dir, cache, abs_file_path):
    237   """Restore file from cache (.o/.d/.dwo)."""
    238   # os.path.join fails with absolute paths, use + instead
    239   cached_path = os.path.join(bisect_dir, cache) + abs_file_path
    240   if os.path.exists(cached_path):
    241     if os.path.exists(abs_file_path):
    242       os.remove(abs_file_path)
    243     try:
    244       os.link(cached_path, abs_file_path)
    245     except OSError:
    246       shutil.copyfile(cached_path, abs_file_path)
    247   else:
    248     raise Error(('%s is missing from %s cache! Unsure how to proceed. Make '
    249                  'will now crash.' % (cache, cached_path)))
    250 
    251 
    252 def bisect_populate(execargs, bisect_dir, population_name):
    253   """Add necessary information to the bisect cache for the given execution.
    254 
    255   Extract the necessary information for bisection from the compiler
    256   execution arguments and put it into the bisection cache. This
    257   includes copying the created object file, adding the object
    258   file path to the cache list and keeping a log of the execution.
    259 
    260   Args:
    261     execargs: compiler execution arguments.
    262     bisect_dir: bisection directory.
    263     population_name: name of the cache being populated (good/bad).
    264   """
    265   retval = exec_and_return(execargs)
    266   if retval:
    267     return retval
    268 
    269   full_obj_path = get_obj_path(execargs)
    270   # If not a normal compiler call then just exit
    271   if not full_obj_path:
    272     return
    273 
    274   cache_file(execargs, bisect_dir, population_name, full_obj_path)
    275 
    276   population_dir = os.path.join(bisect_dir, population_name)
    277   with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
    278     object_list.write('%s\n' % full_obj_path)
    279 
    280   for side_effect in get_side_effects(execargs):
    281     cache_file(execargs, bisect_dir, population_name, side_effect)
    282 
    283 
    284 def bisect_triage(execargs, bisect_dir):
    285   full_obj_path = get_obj_path(execargs)
    286   obj_list = os.path.join(bisect_dir, LIST_FILE)
    287 
    288   # If the output isn't an object file just call compiler
    289   if not full_obj_path:
    290     return exec_and_return(execargs)
    291 
    292   # If this isn't a bisected object just call compiler
    293   # This shouldn't happen!
    294   if not in_object_list(full_obj_path, obj_list):
    295     if CONTINUE_ON_MISSING:
    296       log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
    297       log_to_file(log_file, execargs, '? compiler', full_obj_path)
    298       return exec_and_return(execargs)
    299     else:
    300       raise Error(('%s is missing from cache! To ignore export '
    301                    'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
    302                    'details on this option.' % full_obj_path))
    303 
    304   cache = which_cache(full_obj_path)
    305 
    306   # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
    307   # result from the good/bad cache. This option is safe and covers all compiler
    308   # side effects, but is very slow!
    309   if WRAPPER_SAFE_MODE:
    310     retval = exec_and_return(execargs)
    311     if retval:
    312       return retval
    313     os.remove(full_obj_path)
    314     restore_file(bisect_dir, cache, full_obj_path)
    315     return
    316 
    317   # Generate compiler side effects. Trick Make into thinking compiler was
    318   # actually executed.
    319   for side_effect in get_side_effects(execargs):
    320     restore_file(bisect_dir, cache, side_effect)
    321 
    322   # If generated object file happened to be pruned/cleaned by Make then link it
    323   # over from cache again.
    324   if not os.path.exists(full_obj_path):
    325     restore_file(bisect_dir, cache, full_obj_path)
    326 
    327 
    328 def bisect_driver(bisect_stage, bisect_dir, execargs):
    329   """Call appropriate bisection stage according to value in bisect_stage."""
    330   if bisect_stage == 'POPULATE_GOOD':
    331     bisect_populate(execargs, bisect_dir, GOOD_CACHE)
    332   elif bisect_stage == 'POPULATE_BAD':
    333     bisect_populate(execargs, bisect_dir, BAD_CACHE)
    334   elif bisect_stage == 'TRIAGE':
    335     bisect_triage(execargs, bisect_dir)
    336   else:
    337     raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
    338