1 #!/usr/bin/env python 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Wrapper script to help run clang tools across Chromium code. 7 8 How to use this tool: 9 If you want to run the tool across all Chromium code: 10 run_tool.py <tool> <path/to/compiledb> 11 12 If you only want to run the tool across just chrome/browser and content/browser: 13 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser 14 15 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more 16 information, which documents the entire automated refactoring flow in Chromium. 17 18 Why use this tool: 19 The clang tool implementation doesn't take advantage of multiple cores, and if 20 it fails mysteriously in the middle, all the generated replacements will be 21 lost. 22 23 Unfortunately, if the work is simply sharded across multiple cores by running 24 multiple RefactoringTools, problems arise when they attempt to rewrite a file at 25 the same time. To work around that, clang tools that are run using this tool 26 should output edits to stdout in the following format: 27 28 ==== BEGIN EDITS ==== 29 r:<file path>:<offset>:<length>:<replacement text> 30 r:<file path>:<offset>:<length>:<replacement text> 31 ...etc... 32 ==== END EDITS ==== 33 34 Any generated edits are applied once the clang tool has finished running 35 across Chromium, regardless of whether some instances failed or not. 36 """ 37 38 import collections 39 import functools 40 import multiprocessing 41 import os.path 42 import subprocess 43 import sys 44 45 46 Edit = collections.namedtuple( 47 'Edit', ('edit_type', 'offset', 'length', 'replacement')) 48 49 50 def _GetFilesFromGit(paths = None): 51 """Gets the list of files in the git repository. 52 53 Args: 54 paths: Prefix filter for the returned paths. May contain multiple entries. 55 """ 56 args = ['git', 'ls-files'] 57 if paths: 58 args.extend(paths) 59 command = subprocess.Popen(args, stdout=subprocess.PIPE) 60 output, _ = command.communicate() 61 return output.splitlines() 62 63 64 def _ExtractEditsFromStdout(build_directory, stdout): 65 """Extracts generated list of edits from the tool's stdout. 66 67 The expected format is documented at the top of this file. 68 69 Args: 70 build_directory: Directory that contains the compile database. Used to 71 normalize the filenames. 72 stdout: The stdout from running the clang tool. 73 74 Returns: 75 A dictionary mapping filenames to the associated edits. 76 """ 77 lines = stdout.splitlines() 78 start_index = lines.index('==== BEGIN EDITS ====') 79 end_index = lines.index('==== END EDITS ====') 80 edits = collections.defaultdict(list) 81 for line in lines[start_index + 1:end_index]: 82 try: 83 edit_type, path, offset, length, replacement = line.split(':', 4) 84 # Normalize the file path emitted by the clang tool to be relative to the 85 # current working directory. 86 path = os.path.relpath(os.path.join(build_directory, path)) 87 edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) 88 except ValueError: 89 print 'Unable to parse edit: %s' % line 90 return edits 91 92 93 def _ExecuteTool(toolname, build_directory, filename): 94 """Executes the tool. 95 96 This is defined outside the class so it can be pickled for the multiprocessing 97 module. 98 99 Args: 100 toolname: Path to the tool to execute. 101 build_directory: Directory that contains the compile database. 102 filename: The file to run the tool over. 103 104 Returns: 105 A dictionary that must contain the key "status" and a boolean value 106 associated with it. 107 108 If status is True, then the generated edits are stored with the key "edits" 109 in the dictionary. 110 111 Otherwise, the filename and the output from stderr are associated with the 112 keys "filename" and "stderr" respectively. 113 """ 114 command = subprocess.Popen((toolname, '-p', build_directory, filename), 115 stdout=subprocess.PIPE, 116 stderr=subprocess.PIPE) 117 stdout, stderr = command.communicate() 118 if command.returncode != 0: 119 return {'status': False, 'filename': filename, 'stderr': stderr} 120 else: 121 return {'status': True, 122 'edits': _ExtractEditsFromStdout(build_directory, stdout)} 123 124 125 class _CompilerDispatcher(object): 126 """Multiprocessing controller for running clang tools in parallel.""" 127 128 def __init__(self, toolname, build_directory, filenames): 129 """Initializer method. 130 131 Args: 132 toolname: Path to the tool to execute. 133 build_directory: Directory that contains the compile database. 134 filenames: The files to run the tool over. 135 """ 136 self.__toolname = toolname 137 self.__build_directory = build_directory 138 self.__filenames = filenames 139 self.__success_count = 0 140 self.__failed_count = 0 141 self.__edits = collections.defaultdict(list) 142 143 @property 144 def edits(self): 145 return self.__edits 146 147 @property 148 def failed_count(self): 149 return self.__failed_count 150 151 def Run(self): 152 """Does the grunt work.""" 153 pool = multiprocessing.Pool() 154 result_iterator = pool.imap_unordered( 155 functools.partial(_ExecuteTool, self.__toolname, 156 self.__build_directory), 157 self.__filenames) 158 for result in result_iterator: 159 self.__ProcessResult(result) 160 sys.stdout.write('\n') 161 sys.stdout.flush() 162 163 def __ProcessResult(self, result): 164 """Handles result processing. 165 166 Args: 167 result: The result dictionary returned by _ExecuteTool. 168 """ 169 if result['status']: 170 self.__success_count += 1 171 for k, v in result['edits'].iteritems(): 172 self.__edits[k].extend(v) 173 else: 174 self.__failed_count += 1 175 sys.stdout.write('\nFailed to process %s\n' % result['filename']) 176 sys.stdout.write(result['stderr']) 177 sys.stdout.write('\n') 178 percentage = ( 179 float(self.__success_count + self.__failed_count) / 180 len(self.__filenames)) * 100 181 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % ( 182 self.__success_count, self.__failed_count, percentage)) 183 sys.stdout.flush() 184 185 186 def _ApplyEdits(edits, clang_format_diff_path): 187 """Apply the generated edits. 188 189 Args: 190 edits: A dict mapping filenames to Edit instances that apply to that file. 191 clang_format_diff_path: Path to the clang-format-diff.py helper to help 192 automatically reformat diffs to avoid style violations. Pass None if the 193 clang-format step should be skipped. 194 """ 195 edit_count = 0 196 for k, v in edits.iteritems(): 197 # Sort the edits and iterate through them in reverse order. Sorting allows 198 # duplicate edits to be quickly skipped, while reversing means that 199 # subsequent edits don't need to have their offsets updated with each edit 200 # applied. 201 v.sort() 202 last_edit = None 203 with open(k, 'rb+') as f: 204 contents = bytearray(f.read()) 205 for edit in reversed(v): 206 if edit == last_edit: 207 continue 208 last_edit = edit 209 contents[edit.offset:edit.offset + edit.length] = edit.replacement 210 if not edit.replacement: 211 _ExtendDeletionIfElementIsInList(contents, edit.offset) 212 edit_count += 1 213 f.seek(0) 214 f.truncate() 215 f.write(contents) 216 if clang_format_diff_path: 217 if subprocess.call('git diff -U0 %s | python %s -style=Chromium' % ( 218 k, clang_format_diff_path), shell=True) != 0: 219 print 'clang-format failed for %s' % k 220 print 'Applied %d edits to %d files' % (edit_count, len(edits)) 221 222 223 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) 224 225 226 def _ExtendDeletionIfElementIsInList(contents, offset): 227 """Extends the range of a deletion if the deleted element was part of a list. 228 229 This rewriter helper makes it easy for refactoring tools to remove elements 230 from a list. Even if a matcher callback knows that it is removing an element 231 from a list, it may not have enough information to accurately remove the list 232 element; for example, another matcher callback may end up removing an adjacent 233 list element, or all the list elements may end up being removed. 234 235 With this helper, refactoring tools can simply remove the list element and not 236 worry about having to include the comma in the replacement. 237 238 Args: 239 contents: A bytearray with the deletion already applied. 240 offset: The offset in the bytearray where the deleted range used to be. 241 """ 242 char_before = char_after = None 243 left_trim_count = 0 244 for byte in reversed(contents[:offset]): 245 left_trim_count += 1 246 if byte in _WHITESPACE_BYTES: 247 continue 248 if byte in (ord(','), ord(':'), ord('('), ord('{')): 249 char_before = chr(byte) 250 break 251 252 right_trim_count = 0 253 for byte in contents[offset:]: 254 right_trim_count += 1 255 if byte in _WHITESPACE_BYTES: 256 continue 257 if byte == ord(','): 258 char_after = chr(byte) 259 break 260 261 if char_before: 262 if char_after: 263 del contents[offset:offset + right_trim_count] 264 elif char_before in (',', ':'): 265 del contents[offset - left_trim_count:offset] 266 267 268 def main(argv): 269 if len(argv) < 2: 270 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' 271 print ' <clang tool> is the clang tool that should be run.' 272 print ' <compile db> is the directory that contains the compile database' 273 print ' <path 1> <path2> ... can be used to filter what files are edited' 274 return 1 275 276 clang_format_diff_path = os.path.join( 277 os.path.dirname(os.path.realpath(__file__)), 278 '../../../third_party/llvm/tools/clang/tools/clang-format', 279 'clang-format-diff.py') 280 # TODO(dcheng): Allow this to be controlled with a flag as well. 281 if not os.path.isfile(clang_format_diff_path): 282 clang_format_diff_path = None 283 284 filenames = frozenset(_GetFilesFromGit(argv[2:])) 285 # Filter out files that aren't C/C++/Obj-C/Obj-C++. 286 extensions = frozenset(('.c', '.cc', '.m', '.mm')) 287 dispatcher = _CompilerDispatcher(argv[0], argv[1], 288 [f for f in filenames 289 if os.path.splitext(f)[1] in extensions]) 290 dispatcher.Run() 291 # Filter out edits to files that aren't in the git repository, since it's not 292 # useful to modify files that aren't under source control--typically, these 293 # are generated files or files in a git submodule that's not part of Chromium. 294 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() 295 if k in filenames}, 296 clang_format_diff_path) 297 if dispatcher.failed_count != 0: 298 return 2 299 return 0 300 301 302 if __name__ == '__main__': 303 sys.exit(main(sys.argv[1:])) 304