Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Wrapper script to help run clang tools across Chromium code.
      7 
      8 How to use this tool:
      9 If you want to run the tool across all Chromium code:
     10 run_tool.py <tool> <path/to/compiledb>
     11 
     12 If you only want to run the tool across just chrome/browser and content/browser:
     13 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
     14 
     15 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
     16 information, which documents the entire automated refactoring flow in Chromium.
     17 
     18 Why use this tool:
     19 The clang tool implementation doesn't take advantage of multiple cores, and if
     20 it fails mysteriously in the middle, all the generated replacements will be
     21 lost.
     22 
     23 Unfortunately, if the work is simply sharded across multiple cores by running
     24 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
     25 the same time. To work around that, clang tools that are run using this tool
     26 should output edits to stdout in the following format:
     27 
     28 ==== BEGIN EDITS ====
     29 r:<file path>:<offset>:<length>:<replacement text>
     30 r:<file path>:<offset>:<length>:<replacement text>
     31 ...etc...
     32 ==== END EDITS ====
     33 
     34 Any generated edits are applied once the clang tool has finished running
     35 across Chromium, regardless of whether some instances failed or not.
     36 """
     37 
     38 import collections
     39 import functools
     40 import multiprocessing
     41 import os.path
     42 import subprocess
     43 import sys
     44 
     45 
     46 Edit = collections.namedtuple(
     47     'Edit', ('edit_type', 'offset', 'length', 'replacement'))
     48 
     49 
     50 def _GetFilesFromGit(paths = None):
     51   """Gets the list of files in the git repository.
     52 
     53   Args:
     54     paths: Prefix filter for the returned paths. May contain multiple entries.
     55   """
     56   args = ['git', 'ls-files']
     57   if paths:
     58     args.extend(paths)
     59   command = subprocess.Popen(args, stdout=subprocess.PIPE)
     60   output, _ = command.communicate()
     61   return output.splitlines()
     62 
     63 
     64 def _ExtractEditsFromStdout(build_directory, stdout):
     65   """Extracts generated list of edits from the tool's stdout.
     66 
     67   The expected format is documented at the top of this file.
     68 
     69   Args:
     70     build_directory: Directory that contains the compile database. Used to
     71       normalize the filenames.
     72     stdout: The stdout from running the clang tool.
     73 
     74   Returns:
     75     A dictionary mapping filenames to the associated edits.
     76   """
     77   lines = stdout.splitlines()
     78   start_index = lines.index('==== BEGIN EDITS ====')
     79   end_index = lines.index('==== END EDITS ====')
     80   edits = collections.defaultdict(list)
     81   for line in lines[start_index + 1:end_index]:
     82     try:
     83       edit_type, path, offset, length, replacement = line.split(':', 4)
     84       # Normalize the file path emitted by the clang tool to be relative to the
     85       # current working directory.
     86       path = os.path.relpath(os.path.join(build_directory, path))
     87       edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
     88     except ValueError:
     89       print 'Unable to parse edit: %s' % line
     90   return edits
     91 
     92 
     93 def _ExecuteTool(toolname, build_directory, filename):
     94   """Executes the tool.
     95 
     96   This is defined outside the class so it can be pickled for the multiprocessing
     97   module.
     98 
     99   Args:
    100     toolname: Path to the tool to execute.
    101     build_directory: Directory that contains the compile database.
    102     filename: The file to run the tool over.
    103 
    104   Returns:
    105     A dictionary that must contain the key "status" and a boolean value
    106     associated with it.
    107 
    108     If status is True, then the generated edits are stored with the key "edits"
    109     in the dictionary.
    110 
    111     Otherwise, the filename and the output from stderr are associated with the
    112     keys "filename" and "stderr" respectively.
    113   """
    114   command = subprocess.Popen((toolname, '-p', build_directory, filename),
    115                              stdout=subprocess.PIPE,
    116                              stderr=subprocess.PIPE)
    117   stdout, stderr = command.communicate()
    118   if command.returncode != 0:
    119     return {'status': False, 'filename': filename, 'stderr': stderr}
    120   else:
    121     return {'status': True,
    122             'edits': _ExtractEditsFromStdout(build_directory, stdout)}
    123 
    124 
    125 class _CompilerDispatcher(object):
    126   """Multiprocessing controller for running clang tools in parallel."""
    127 
    128   def __init__(self, toolname, build_directory, filenames):
    129     """Initializer method.
    130 
    131     Args:
    132       toolname: Path to the tool to execute.
    133       build_directory: Directory that contains the compile database.
    134       filenames: The files to run the tool over.
    135     """
    136     self.__toolname = toolname
    137     self.__build_directory = build_directory
    138     self.__filenames = filenames
    139     self.__success_count = 0
    140     self.__failed_count = 0
    141     self.__edits = collections.defaultdict(list)
    142 
    143   @property
    144   def edits(self):
    145     return self.__edits
    146 
    147   @property
    148   def failed_count(self):
    149     return self.__failed_count
    150 
    151   def Run(self):
    152     """Does the grunt work."""
    153     pool = multiprocessing.Pool()
    154     result_iterator = pool.imap_unordered(
    155         functools.partial(_ExecuteTool, self.__toolname,
    156                           self.__build_directory),
    157         self.__filenames)
    158     for result in result_iterator:
    159       self.__ProcessResult(result)
    160     sys.stdout.write('\n')
    161     sys.stdout.flush()
    162 
    163   def __ProcessResult(self, result):
    164     """Handles result processing.
    165 
    166     Args:
    167       result: The result dictionary returned by _ExecuteTool.
    168     """
    169     if result['status']:
    170       self.__success_count += 1
    171       for k, v in result['edits'].iteritems():
    172         self.__edits[k].extend(v)
    173     else:
    174       self.__failed_count += 1
    175       sys.stdout.write('\nFailed to process %s\n' % result['filename'])
    176       sys.stdout.write(result['stderr'])
    177       sys.stdout.write('\n')
    178     percentage = (
    179         float(self.__success_count + self.__failed_count) /
    180         len(self.__filenames)) * 100
    181     sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
    182         self.__success_count, self.__failed_count, percentage))
    183     sys.stdout.flush()
    184 
    185 
    186 def _ApplyEdits(edits, clang_format_diff_path):
    187   """Apply the generated edits.
    188 
    189   Args:
    190     edits: A dict mapping filenames to Edit instances that apply to that file.
    191     clang_format_diff_path: Path to the clang-format-diff.py helper to help
    192       automatically reformat diffs to avoid style violations. Pass None if the
    193       clang-format step should be skipped.
    194   """
    195   edit_count = 0
    196   for k, v in edits.iteritems():
    197     # Sort the edits and iterate through them in reverse order. Sorting allows
    198     # duplicate edits to be quickly skipped, while reversing means that
    199     # subsequent edits don't need to have their offsets updated with each edit
    200     # applied.
    201     v.sort()
    202     last_edit = None
    203     with open(k, 'rb+') as f:
    204       contents = bytearray(f.read())
    205       for edit in reversed(v):
    206         if edit == last_edit:
    207           continue
    208         last_edit = edit
    209         contents[edit.offset:edit.offset + edit.length] = edit.replacement
    210         if not edit.replacement:
    211           _ExtendDeletionIfElementIsInList(contents, edit.offset)
    212         edit_count += 1
    213       f.seek(0)
    214       f.truncate()
    215       f.write(contents)
    216     if clang_format_diff_path:
    217       if subprocess.call('git diff -U0 %s | python %s -style=Chromium' % (
    218           k, clang_format_diff_path), shell=True) != 0:
    219         print 'clang-format failed for %s' % k
    220   print 'Applied %d edits to %d files' % (edit_count, len(edits))
    221 
    222 
    223 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
    224 
    225 
    226 def _ExtendDeletionIfElementIsInList(contents, offset):
    227   """Extends the range of a deletion if the deleted element was part of a list.
    228 
    229   This rewriter helper makes it easy for refactoring tools to remove elements
    230   from a list. Even if a matcher callback knows that it is removing an element
    231   from a list, it may not have enough information to accurately remove the list
    232   element; for example, another matcher callback may end up removing an adjacent
    233   list element, or all the list elements may end up being removed.
    234 
    235   With this helper, refactoring tools can simply remove the list element and not
    236   worry about having to include the comma in the replacement.
    237 
    238   Args:
    239     contents: A bytearray with the deletion already applied.
    240     offset: The offset in the bytearray where the deleted range used to be.
    241   """
    242   char_before = char_after = None
    243   left_trim_count = 0
    244   for byte in reversed(contents[:offset]):
    245     left_trim_count += 1
    246     if byte in _WHITESPACE_BYTES:
    247       continue
    248     if byte in (ord(','), ord(':'), ord('('), ord('{')):
    249       char_before = chr(byte)
    250     break
    251 
    252   right_trim_count = 0
    253   for byte in contents[offset:]:
    254     right_trim_count += 1
    255     if byte in _WHITESPACE_BYTES:
    256       continue
    257     if byte == ord(','):
    258       char_after = chr(byte)
    259     break
    260 
    261   if char_before:
    262     if char_after:
    263       del contents[offset:offset + right_trim_count]
    264     elif char_before in (',', ':'):
    265       del contents[offset - left_trim_count:offset]
    266 
    267 
    268 def main(argv):
    269   if len(argv) < 2:
    270     print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
    271     print '  <clang tool> is the clang tool that should be run.'
    272     print '  <compile db> is the directory that contains the compile database'
    273     print '  <path 1> <path2> ... can be used to filter what files are edited'
    274     return 1
    275 
    276   clang_format_diff_path = os.path.join(
    277       os.path.dirname(os.path.realpath(__file__)),
    278       '../../../third_party/llvm/tools/clang/tools/clang-format',
    279       'clang-format-diff.py')
    280   # TODO(dcheng): Allow this to be controlled with a flag as well.
    281   if not os.path.isfile(clang_format_diff_path):
    282     clang_format_diff_path = None
    283 
    284   filenames = frozenset(_GetFilesFromGit(argv[2:]))
    285   # Filter out files that aren't C/C++/Obj-C/Obj-C++.
    286   extensions = frozenset(('.c', '.cc', '.m', '.mm'))
    287   dispatcher = _CompilerDispatcher(argv[0], argv[1],
    288                                    [f for f in filenames
    289                                     if os.path.splitext(f)[1] in extensions])
    290   dispatcher.Run()
    291   # Filter out edits to files that aren't in the git repository, since it's not
    292   # useful to modify files that aren't under source control--typically, these
    293   # are generated files or files in a git submodule that's not part of Chromium.
    294   _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
    295                     if k in filenames},
    296               clang_format_diff_path)
    297   if dispatcher.failed_count != 0:
    298     return 2
    299   return 0
    300 
    301 
    302 if __name__ == '__main__':
    303   sys.exit(main(sys.argv[1:]))
    304