1 #!/usr/bin/python 2 # 3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4 # 5 # The LLVM Compiler Infrastructure 6 # 7 # This file is distributed under the University of Illinois Open Source 8 # License. See LICENSE.TXT for details. 9 # 10 #===------------------------------------------------------------------------===# 11 12 r""" 13 clang-format git integration 14 ============================ 15 16 This file provides a clang-format integration for git. Put it somewhere in your 17 path and ensure that it is executable. Then, "git clang-format" will invoke 18 clang-format on the changes in current files or a specific commit. 19 20 For further details, run: 21 git clang-format -h 22 23 Requires Python 2.7 24 """ 25 26 import argparse 27 import collections 28 import contextlib 29 import errno 30 import os 31 import re 32 import subprocess 33 import sys 34 35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' 36 37 desc = ''' 38 Run clang-format on all lines that differ between the working directory 39 and <commit>, which defaults to HEAD. Changes are only applied to the working 40 directory. 41 42 The following git-config settings set the default of the corresponding option: 43 clangFormat.binary 44 clangFormat.commit 45 clangFormat.extension 46 clangFormat.style 47 ''' 48 49 # Name of the temporary index file in which save the output of clang-format. 50 # This file is created within the .git directory. 51 temp_index_basename = 'clang-format-index' 52 53 54 Range = collections.namedtuple('Range', 'start, count') 55 56 57 def main(): 58 config = load_git_config() 59 60 # In order to keep '--' yet allow options after positionals, we need to 61 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 62 # nargs=argparse.REMAINDER disallows options after positionals.) 63 argv = sys.argv[1:] 64 try: 65 idx = argv.index('--') 66 except ValueError: 67 dash_dash = [] 68 else: 69 dash_dash = argv[idx:] 70 argv = argv[:idx] 71 72 default_extensions = ','.join([ 73 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 74 'c', 'h', # C 75 'm', # ObjC 76 'mm', # ObjC++ 77 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 78 # Other languages that clang-format supports 79 'proto', 'protodevel', # Protocol Buffers 80 'js', # JavaScript 81 ]) 82 83 p = argparse.ArgumentParser( 84 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 85 description=desc) 86 p.add_argument('--binary', 87 default=config.get('clangformat.binary', 'clang-format'), 88 help='path to clang-format'), 89 p.add_argument('--commit', 90 default=config.get('clangformat.commit', 'HEAD'), 91 help='default commit to use if none is specified'), 92 p.add_argument('--diff', action='store_true', 93 help='print a diff instead of applying the changes') 94 p.add_argument('--extensions', 95 default=config.get('clangformat.extensions', 96 default_extensions), 97 help=('comma-separated list of file extensions to format, ' 98 'excluding the period and case-insensitive')), 99 p.add_argument('-f', '--force', action='store_true', 100 help='allow changes to unstaged files') 101 p.add_argument('-p', '--patch', action='store_true', 102 help='select hunks interactively') 103 p.add_argument('-q', '--quiet', action='count', default=0, 104 help='print less information') 105 p.add_argument('--style', 106 default=config.get('clangformat.style', None), 107 help='passed to clang-format'), 108 p.add_argument('-v', '--verbose', action='count', default=0, 109 help='print extra information') 110 # We gather all the remaining positional arguments into 'args' since we need 111 # to use some heuristics to determine whether or not <commit> was present. 112 # However, to print pretty messages, we make use of metavar and help. 113 p.add_argument('args', nargs='*', metavar='<commit>', 114 help='revision from which to compute the diff') 115 p.add_argument('ignored', nargs='*', metavar='<file>...', 116 help='if specified, only consider differences in these files') 117 opts = p.parse_args(argv) 118 119 opts.verbose -= opts.quiet 120 del opts.quiet 121 122 commit, files = interpret_args(opts.args, dash_dash, opts.commit) 123 changed_lines = compute_diff_and_extract_lines(commit, files) 124 if opts.verbose >= 1: 125 ignored_files = set(changed_lines) 126 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 127 if opts.verbose >= 1: 128 ignored_files.difference_update(changed_lines) 129 if ignored_files: 130 print 'Ignoring changes in the following files (wrong extension):' 131 for filename in ignored_files: 132 print ' ', filename 133 if changed_lines: 134 print 'Running clang-format on the following files:' 135 for filename in changed_lines: 136 print ' ', filename 137 if not changed_lines: 138 print 'no modified files to format' 139 return 140 # The computed diff outputs absolute paths, so we must cd before accessing 141 # those files. 142 cd_to_toplevel() 143 old_tree = create_tree_from_workdir(changed_lines) 144 new_tree = run_clang_format_and_save_to_tree(changed_lines, 145 binary=opts.binary, 146 style=opts.style) 147 if opts.verbose >= 1: 148 print 'old tree:', old_tree 149 print 'new tree:', new_tree 150 if old_tree == new_tree: 151 if opts.verbose >= 0: 152 print 'clang-format did not modify any files' 153 elif opts.diff: 154 print_diff(old_tree, new_tree) 155 else: 156 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 157 patch_mode=opts.patch) 158 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 159 print 'changed files:' 160 for filename in changed_files: 161 print ' ', filename 162 163 164 def load_git_config(non_string_options=None): 165 """Return the git configuration as a dictionary. 166 167 All options are assumed to be strings unless in `non_string_options`, in which 168 is a dictionary mapping option name (in lower case) to either "--bool" or 169 "--int".""" 170 if non_string_options is None: 171 non_string_options = {} 172 out = {} 173 for entry in run('git', 'config', '--list', '--null').split('\0'): 174 if entry: 175 name, value = entry.split('\n', 1) 176 if name in non_string_options: 177 value = run('git', 'config', non_string_options[name], name) 178 out[name] = value 179 return out 180 181 182 def interpret_args(args, dash_dash, default_commit): 183 """Interpret `args` as "[commit] [--] [files...]" and return (commit, files). 184 185 It is assumed that "--" and everything that follows has been removed from 186 args and placed in `dash_dash`. 187 188 If "--" is present (i.e., `dash_dash` is non-empty), the argument to its 189 left (if present) is taken as commit. Otherwise, the first argument is 190 checked if it is a commit or a file. If commit is not given, 191 `default_commit` is used.""" 192 if dash_dash: 193 if len(args) == 0: 194 commit = default_commit 195 elif len(args) > 1: 196 die('at most one commit allowed; %d given' % len(args)) 197 else: 198 commit = args[0] 199 object_type = get_object_type(commit) 200 if object_type not in ('commit', 'tag'): 201 if object_type is None: 202 die("'%s' is not a commit" % commit) 203 else: 204 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 205 files = dash_dash[1:] 206 elif args: 207 if disambiguate_revision(args[0]): 208 commit = args[0] 209 files = args[1:] 210 else: 211 commit = default_commit 212 files = args 213 else: 214 commit = default_commit 215 files = [] 216 return commit, files 217 218 219 def disambiguate_revision(value): 220 """Returns True if `value` is a revision, False if it is a file, or dies.""" 221 # If `value` is ambiguous (neither a commit nor a file), the following 222 # command will die with an appropriate error message. 223 run('git', 'rev-parse', value, verbose=False) 224 object_type = get_object_type(value) 225 if object_type is None: 226 return False 227 if object_type in ('commit', 'tag'): 228 return True 229 die('`%s` is a %s, but a commit or filename was expected' % 230 (value, object_type)) 231 232 233 def get_object_type(value): 234 """Returns a string description of an object's type, or None if it is not 235 a valid git object.""" 236 cmd = ['git', 'cat-file', '-t', value] 237 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 238 stdout, stderr = p.communicate() 239 if p.returncode != 0: 240 return None 241 return stdout.strip() 242 243 244 def compute_diff_and_extract_lines(commit, files): 245 """Calls compute_diff() followed by extract_lines().""" 246 diff_process = compute_diff(commit, files) 247 changed_lines = extract_lines(diff_process.stdout) 248 diff_process.stdout.close() 249 diff_process.wait() 250 if diff_process.returncode != 0: 251 # Assume error was already printed to stderr. 252 sys.exit(2) 253 return changed_lines 254 255 256 def compute_diff(commit, files): 257 """Return a subprocess object producing the diff from `commit`. 258 259 The return value's `stdin` file object will produce a patch with the 260 differences between the working directory and `commit`, filtered on `files` 261 (if non-empty). Zero context lines are used in the patch.""" 262 cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] 263 cmd.extend(files) 264 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 265 p.stdin.close() 266 return p 267 268 269 def extract_lines(patch_file): 270 """Extract the changed lines in `patch_file`. 271 272 The return value is a dictionary mapping filename to a list of (start_line, 273 line_count) pairs. 274 275 The input must have been produced with ``-U0``, meaning unidiff format with 276 zero lines of context. The return value is a dict mapping filename to a 277 list of line `Range`s.""" 278 matches = {} 279 for line in patch_file: 280 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 281 if match: 282 filename = match.group(1).rstrip('\r\n') 283 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 284 if match: 285 start_line = int(match.group(1)) 286 line_count = 1 287 if match.group(3): 288 line_count = int(match.group(3)) 289 if line_count > 0: 290 matches.setdefault(filename, []).append(Range(start_line, line_count)) 291 return matches 292 293 294 def filter_by_extension(dictionary, allowed_extensions): 295 """Delete every key in `dictionary` that doesn't have an allowed extension. 296 297 `allowed_extensions` must be a collection of lowercase file extensions, 298 excluding the period.""" 299 allowed_extensions = frozenset(allowed_extensions) 300 for filename in dictionary.keys(): 301 base_ext = filename.rsplit('.', 1) 302 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 303 del dictionary[filename] 304 305 306 def cd_to_toplevel(): 307 """Change to the top level of the git repository.""" 308 toplevel = run('git', 'rev-parse', '--show-toplevel') 309 os.chdir(toplevel) 310 311 312 def create_tree_from_workdir(filenames): 313 """Create a new git tree with the given files from the working directory. 314 315 Returns the object ID (SHA-1) of the created tree.""" 316 return create_tree(filenames, '--stdin') 317 318 319 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format', 320 style=None): 321 """Run clang-format on each file and save the result to a git tree. 322 323 Returns the object ID (SHA-1) of the created tree.""" 324 def index_info_generator(): 325 for filename, line_ranges in changed_lines.iteritems(): 326 mode = oct(os.stat(filename).st_mode) 327 blob_id = clang_format_to_blob(filename, line_ranges, binary=binary, 328 style=style) 329 yield '%s %s\t%s' % (mode, blob_id, filename) 330 return create_tree(index_info_generator(), '--index-info') 331 332 333 def create_tree(input_lines, mode): 334 """Create a tree object from the given input. 335 336 If mode is '--stdin', it must be a list of filenames. If mode is 337 '--index-info' is must be a list of values suitable for "git update-index 338 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 339 is invalid.""" 340 assert mode in ('--stdin', '--index-info') 341 cmd = ['git', 'update-index', '--add', '-z', mode] 342 with temporary_index_file(): 343 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 344 for line in input_lines: 345 p.stdin.write('%s\0' % line) 346 p.stdin.close() 347 if p.wait() != 0: 348 die('`%s` failed' % ' '.join(cmd)) 349 tree_id = run('git', 'write-tree') 350 return tree_id 351 352 353 def clang_format_to_blob(filename, line_ranges, binary='clang-format', 354 style=None): 355 """Run clang-format on the given file and save the result to a git blob. 356 357 Returns the object ID (SHA-1) of the created blob.""" 358 clang_format_cmd = [binary, filename] 359 if style: 360 clang_format_cmd.extend(['-style='+style]) 361 clang_format_cmd.extend([ 362 '-lines=%s:%s' % (start_line, start_line+line_count-1) 363 for start_line, line_count in line_ranges]) 364 try: 365 clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE, 366 stdout=subprocess.PIPE) 367 except OSError as e: 368 if e.errno == errno.ENOENT: 369 die('cannot find executable "%s"' % binary) 370 else: 371 raise 372 clang_format.stdin.close() 373 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 374 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 375 stdout=subprocess.PIPE) 376 clang_format.stdout.close() 377 stdout = hash_object.communicate()[0] 378 if hash_object.returncode != 0: 379 die('`%s` failed' % ' '.join(hash_object_cmd)) 380 if clang_format.wait() != 0: 381 die('`%s` failed' % ' '.join(clang_format_cmd)) 382 return stdout.rstrip('\r\n') 383 384 385 @contextlib.contextmanager 386 def temporary_index_file(tree=None): 387 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 388 the file afterward.""" 389 index_path = create_temporary_index(tree) 390 old_index_path = os.environ.get('GIT_INDEX_FILE') 391 os.environ['GIT_INDEX_FILE'] = index_path 392 try: 393 yield 394 finally: 395 if old_index_path is None: 396 del os.environ['GIT_INDEX_FILE'] 397 else: 398 os.environ['GIT_INDEX_FILE'] = old_index_path 399 os.remove(index_path) 400 401 402 def create_temporary_index(tree=None): 403 """Create a temporary index file and return the created file's path. 404 405 If `tree` is not None, use that as the tree to read in. Otherwise, an 406 empty index is created.""" 407 gitdir = run('git', 'rev-parse', '--git-dir') 408 path = os.path.join(gitdir, temp_index_basename) 409 if tree is None: 410 tree = '--empty' 411 run('git', 'read-tree', '--index-output='+path, tree) 412 return path 413 414 415 def print_diff(old_tree, new_tree): 416 """Print the diff between the two trees to stdout.""" 417 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 418 # is expected to be viewed by the user, and only the former does nice things 419 # like color and pagination. 420 subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) 421 422 423 def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 424 """Apply the changes in `new_tree` to the working directory. 425 426 Bails if there are local changes in those files and not `force`. If 427 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 428 changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree, 429 new_tree).rstrip('\0').split('\0') 430 if not force: 431 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 432 if unstaged_files: 433 print >>sys.stderr, ('The following files would be modified but ' 434 'have unstaged changes:') 435 print >>sys.stderr, unstaged_files 436 print >>sys.stderr, 'Please commit, stage, or stash them first.' 437 sys.exit(2) 438 if patch_mode: 439 # In patch mode, we could just as well create an index from the new tree 440 # and checkout from that, but then the user will be presented with a 441 # message saying "Discard ... from worktree". Instead, we use the old 442 # tree as the index and checkout from new_tree, which gives the slightly 443 # better message, "Apply ... to index and worktree". This is not quite 444 # right, since it won't be applied to the user's index, but oh well. 445 with temporary_index_file(old_tree): 446 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 447 index_tree = old_tree 448 else: 449 with temporary_index_file(new_tree): 450 run('git', 'checkout-index', '-a', '-f') 451 return changed_files 452 453 454 def run(*args, **kwargs): 455 stdin = kwargs.pop('stdin', '') 456 verbose = kwargs.pop('verbose', True) 457 strip = kwargs.pop('strip', True) 458 for name in kwargs: 459 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 460 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 461 stdin=subprocess.PIPE) 462 stdout, stderr = p.communicate(input=stdin) 463 if p.returncode == 0: 464 if stderr: 465 if verbose: 466 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) 467 print >>sys.stderr, stderr.rstrip() 468 if strip: 469 stdout = stdout.rstrip('\r\n') 470 return stdout 471 if verbose: 472 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) 473 if stderr: 474 print >>sys.stderr, stderr.rstrip() 475 sys.exit(2) 476 477 478 def die(message): 479 print >>sys.stderr, 'error:', message 480 sys.exit(2) 481 482 483 if __name__ == '__main__': 484 main() 485