1 #!/usr/bin/env python 2 # 3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4 # 5 # The LLVM Compiler Infrastructure 6 # 7 # This file is distributed under the University of Illinois Open Source 8 # License. See LICENSE.TXT for details. 9 # 10 #===------------------------------------------------------------------------===# 11 12 r""" 13 clang-format git integration 14 ============================ 15 16 This file provides a clang-format integration for git. Put it somewhere in your 17 path and ensure that it is executable. Then, "git clang-format" will invoke 18 clang-format on the changes in current files or a specific commit. 19 20 For further details, run: 21 git clang-format -h 22 23 Requires Python 2.7 24 """ 25 26 import argparse 27 import collections 28 import contextlib 29 import errno 30 import os 31 import re 32 import subprocess 33 import sys 34 35 usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37 desc = ''' 38 If zero or one commits are given, run clang-format on all lines that differ 39 between the working directory and <commit>, which defaults to HEAD. Changes are 40 only applied to the working directory. 41 42 If two commits are given (requires --diff), run clang-format on all lines in the 43 second <commit> that differ from the first <commit>. 44 45 The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extension 49 clangFormat.style 50 ''' 51 52 # Name of the temporary index file in which save the output of clang-format. 53 # This file is created within the .git directory. 54 temp_index_basename = 'clang-format-index' 55 56 57 Range = collections.namedtuple('Range', 'start, count') 58 59 60 def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 81 # Other languages that clang-format supports 82 'proto', 'protodevel', # Protocol Buffers 83 'java', # Java 84 'js', # JavaScript 85 'ts', # TypeScript 86 ]) 87 88 p = argparse.ArgumentParser( 89 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 90 description=desc) 91 p.add_argument('--binary', 92 default=config.get('clangformat.binary', 'clang-format'), 93 help='path to clang-format'), 94 p.add_argument('--commit', 95 default=config.get('clangformat.commit', 'HEAD'), 96 help='default commit to use if none is specified'), 97 p.add_argument('--diff', action='store_true', 98 help='print a diff instead of applying the changes') 99 p.add_argument('--extensions', 100 default=config.get('clangformat.extensions', 101 default_extensions), 102 help=('comma-separated list of file extensions to format, ' 103 'excluding the period and case-insensitive')), 104 p.add_argument('-f', '--force', action='store_true', 105 help='allow changes to unstaged files') 106 p.add_argument('-p', '--patch', action='store_true', 107 help='select hunks interactively') 108 p.add_argument('-q', '--quiet', action='count', default=0, 109 help='print less information') 110 p.add_argument('--style', 111 default=config.get('clangformat.style', None), 112 help='passed to clang-format'), 113 p.add_argument('-v', '--verbose', action='count', default=0, 114 help='print extra information') 115 # We gather all the remaining positional arguments into 'args' since we need 116 # to use some heuristics to determine whether or not <commit> was present. 117 # However, to print pretty messages, we make use of metavar and help. 118 p.add_argument('args', nargs='*', metavar='<commit>', 119 help='revision from which to compute the diff') 120 p.add_argument('ignored', nargs='*', metavar='<file>...', 121 help='if specified, only consider differences in these files') 122 opts = p.parse_args(argv) 123 124 opts.verbose -= opts.quiet 125 del opts.quiet 126 127 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 128 if len(commits) > 1: 129 if not opts.diff: 130 die('--diff is required when two commits are given') 131 else: 132 if len(commits) > 2: 133 die('at most two commits allowed; %d given' % len(commits)) 134 changed_lines = compute_diff_and_extract_lines(commits, files) 135 if opts.verbose >= 1: 136 ignored_files = set(changed_lines) 137 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 138 if opts.verbose >= 1: 139 ignored_files.difference_update(changed_lines) 140 if ignored_files: 141 print 'Ignoring changes in the following files (wrong extension):' 142 for filename in ignored_files: 143 print ' ', filename 144 if changed_lines: 145 print 'Running clang-format on the following files:' 146 for filename in changed_lines: 147 print ' ', filename 148 if not changed_lines: 149 print 'no modified files to format' 150 return 151 # The computed diff outputs absolute paths, so we must cd before accessing 152 # those files. 153 cd_to_toplevel() 154 if len(commits) > 1: 155 old_tree = commits[1] 156 new_tree = run_clang_format_and_save_to_tree(changed_lines, 157 revision=commits[1], 158 binary=opts.binary, 159 style=opts.style) 160 else: 161 old_tree = create_tree_from_workdir(changed_lines) 162 new_tree = run_clang_format_and_save_to_tree(changed_lines, 163 binary=opts.binary, 164 style=opts.style) 165 if opts.verbose >= 1: 166 print 'old tree:', old_tree 167 print 'new tree:', new_tree 168 if old_tree == new_tree: 169 if opts.verbose >= 0: 170 print 'clang-format did not modify any files' 171 elif opts.diff: 172 print_diff(old_tree, new_tree) 173 else: 174 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 175 patch_mode=opts.patch) 176 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 177 print 'changed files:' 178 for filename in changed_files: 179 print ' ', filename 180 181 182 def load_git_config(non_string_options=None): 183 """Return the git configuration as a dictionary. 184 185 All options are assumed to be strings unless in `non_string_options`, in which 186 is a dictionary mapping option name (in lower case) to either "--bool" or 187 "--int".""" 188 if non_string_options is None: 189 non_string_options = {} 190 out = {} 191 for entry in run('git', 'config', '--list', '--null').split('\0'): 192 if entry: 193 name, value = entry.split('\n', 1) 194 if name in non_string_options: 195 value = run('git', 'config', non_string_options[name], name) 196 out[name] = value 197 return out 198 199 200 def interpret_args(args, dash_dash, default_commit): 201 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 202 203 It is assumed that "--" and everything that follows has been removed from 204 args and placed in `dash_dash`. 205 206 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 207 left (if present) are taken as commits. Otherwise, the arguments are checked 208 from left to right if they are commits or files. If commits are not given, 209 a list with `default_commit` is used.""" 210 if dash_dash: 211 if len(args) == 0: 212 commits = [default_commit] 213 else: 214 commits = args 215 for commit in commits: 216 object_type = get_object_type(commit) 217 if object_type not in ('commit', 'tag'): 218 if object_type is None: 219 die("'%s' is not a commit" % commit) 220 else: 221 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 222 files = dash_dash[1:] 223 elif args: 224 commits = [] 225 while args: 226 if not disambiguate_revision(args[0]): 227 break 228 commits.append(args.pop(0)) 229 if not commits: 230 commits = [default_commit] 231 files = args 232 else: 233 commits = [default_commit] 234 files = [] 235 return commits, files 236 237 238 def disambiguate_revision(value): 239 """Returns True if `value` is a revision, False if it is a file, or dies.""" 240 # If `value` is ambiguous (neither a commit nor a file), the following 241 # command will die with an appropriate error message. 242 run('git', 'rev-parse', value, verbose=False) 243 object_type = get_object_type(value) 244 if object_type is None: 245 return False 246 if object_type in ('commit', 'tag'): 247 return True 248 die('`%s` is a %s, but a commit or filename was expected' % 249 (value, object_type)) 250 251 252 def get_object_type(value): 253 """Returns a string description of an object's type, or None if it is not 254 a valid git object.""" 255 cmd = ['git', 'cat-file', '-t', value] 256 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 257 stdout, stderr = p.communicate() 258 if p.returncode != 0: 259 return None 260 return stdout.strip() 261 262 263 def compute_diff_and_extract_lines(commits, files): 264 """Calls compute_diff() followed by extract_lines().""" 265 diff_process = compute_diff(commits, files) 266 changed_lines = extract_lines(diff_process.stdout) 267 diff_process.stdout.close() 268 diff_process.wait() 269 if diff_process.returncode != 0: 270 # Assume error was already printed to stderr. 271 sys.exit(2) 272 return changed_lines 273 274 275 def compute_diff(commits, files): 276 """Return a subprocess object producing the diff from `commits`. 277 278 The return value's `stdin` file object will produce a patch with the 279 differences between the working directory and the first commit if a single 280 one was specified, or the difference between both specified commits, filtered 281 on `files` (if non-empty). Zero context lines are used in the patch.""" 282 git_tool = 'diff-index' 283 if len(commits) > 1: 284 git_tool = 'diff-tree' 285 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 286 cmd.extend(files) 287 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 288 p.stdin.close() 289 return p 290 291 292 def extract_lines(patch_file): 293 """Extract the changed lines in `patch_file`. 294 295 The return value is a dictionary mapping filename to a list of (start_line, 296 line_count) pairs. 297 298 The input must have been produced with ``-U0``, meaning unidiff format with 299 zero lines of context. The return value is a dict mapping filename to a 300 list of line `Range`s.""" 301 matches = {} 302 for line in patch_file: 303 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 304 if match: 305 filename = match.group(1).rstrip('\r\n') 306 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 307 if match: 308 start_line = int(match.group(1)) 309 line_count = 1 310 if match.group(3): 311 line_count = int(match.group(3)) 312 if line_count > 0: 313 matches.setdefault(filename, []).append(Range(start_line, line_count)) 314 return matches 315 316 317 def filter_by_extension(dictionary, allowed_extensions): 318 """Delete every key in `dictionary` that doesn't have an allowed extension. 319 320 `allowed_extensions` must be a collection of lowercase file extensions, 321 excluding the period.""" 322 allowed_extensions = frozenset(allowed_extensions) 323 for filename in dictionary.keys(): 324 base_ext = filename.rsplit('.', 1) 325 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 326 del dictionary[filename] 327 328 329 def cd_to_toplevel(): 330 """Change to the top level of the git repository.""" 331 toplevel = run('git', 'rev-parse', '--show-toplevel') 332 os.chdir(toplevel) 333 334 335 def create_tree_from_workdir(filenames): 336 """Create a new git tree with the given files from the working directory. 337 338 Returns the object ID (SHA-1) of the created tree.""" 339 return create_tree(filenames, '--stdin') 340 341 342 def run_clang_format_and_save_to_tree(changed_lines, revision=None, 343 binary='clang-format', style=None): 344 """Run clang-format on each file and save the result to a git tree. 345 346 Returns the object ID (SHA-1) of the created tree.""" 347 def index_info_generator(): 348 for filename, line_ranges in changed_lines.iteritems(): 349 if revision: 350 git_metadata_cmd = ['git', 'ls-tree', 351 '%s:%s' % (revision, os.path.dirname(filename)), 352 os.path.basename(filename)] 353 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 354 stdout=subprocess.PIPE) 355 stdout = git_metadata.communicate()[0] 356 mode = oct(int(stdout.split()[0], 8)) 357 else: 358 mode = oct(os.stat(filename).st_mode) 359 blob_id = clang_format_to_blob(filename, line_ranges, 360 revision=revision, 361 binary=binary, 362 style=style) 363 yield '%s %s\t%s' % (mode, blob_id, filename) 364 return create_tree(index_info_generator(), '--index-info') 365 366 367 def create_tree(input_lines, mode): 368 """Create a tree object from the given input. 369 370 If mode is '--stdin', it must be a list of filenames. If mode is 371 '--index-info' is must be a list of values suitable for "git update-index 372 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 373 is invalid.""" 374 assert mode in ('--stdin', '--index-info') 375 cmd = ['git', 'update-index', '--add', '-z', mode] 376 with temporary_index_file(): 377 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 378 for line in input_lines: 379 p.stdin.write('%s\0' % line) 380 p.stdin.close() 381 if p.wait() != 0: 382 die('`%s` failed' % ' '.join(cmd)) 383 tree_id = run('git', 'write-tree') 384 return tree_id 385 386 387 def clang_format_to_blob(filename, line_ranges, revision=None, 388 binary='clang-format', style=None): 389 """Run clang-format on the given file and save the result to a git blob. 390 391 Runs on the file in `revision` if not None, or on the file in the working 392 directory if `revision` is None. 393 394 Returns the object ID (SHA-1) of the created blob.""" 395 clang_format_cmd = [binary] 396 if style: 397 clang_format_cmd.extend(['-style='+style]) 398 clang_format_cmd.extend([ 399 '-lines=%s:%s' % (start_line, start_line+line_count-1) 400 for start_line, line_count in line_ranges]) 401 if revision: 402 clang_format_cmd.extend(['-assume-filename='+filename]) 403 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 404 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 405 stdout=subprocess.PIPE) 406 git_show.stdin.close() 407 clang_format_stdin = git_show.stdout 408 else: 409 clang_format_cmd.extend([filename]) 410 git_show = None 411 clang_format_stdin = subprocess.PIPE 412 try: 413 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 414 stdout=subprocess.PIPE) 415 if clang_format_stdin == subprocess.PIPE: 416 clang_format_stdin = clang_format.stdin 417 except OSError as e: 418 if e.errno == errno.ENOENT: 419 die('cannot find executable "%s"' % binary) 420 else: 421 raise 422 clang_format_stdin.close() 423 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 424 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 425 stdout=subprocess.PIPE) 426 clang_format.stdout.close() 427 stdout = hash_object.communicate()[0] 428 if hash_object.returncode != 0: 429 die('`%s` failed' % ' '.join(hash_object_cmd)) 430 if clang_format.wait() != 0: 431 die('`%s` failed' % ' '.join(clang_format_cmd)) 432 if git_show and git_show.wait() != 0: 433 die('`%s` failed' % ' '.join(git_show_cmd)) 434 return stdout.rstrip('\r\n') 435 436 437 @contextlib.contextmanager 438 def temporary_index_file(tree=None): 439 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 440 the file afterward.""" 441 index_path = create_temporary_index(tree) 442 old_index_path = os.environ.get('GIT_INDEX_FILE') 443 os.environ['GIT_INDEX_FILE'] = index_path 444 try: 445 yield 446 finally: 447 if old_index_path is None: 448 del os.environ['GIT_INDEX_FILE'] 449 else: 450 os.environ['GIT_INDEX_FILE'] = old_index_path 451 os.remove(index_path) 452 453 454 def create_temporary_index(tree=None): 455 """Create a temporary index file and return the created file's path. 456 457 If `tree` is not None, use that as the tree to read in. Otherwise, an 458 empty index is created.""" 459 gitdir = run('git', 'rev-parse', '--git-dir') 460 path = os.path.join(gitdir, temp_index_basename) 461 if tree is None: 462 tree = '--empty' 463 run('git', 'read-tree', '--index-output='+path, tree) 464 return path 465 466 467 def print_diff(old_tree, new_tree): 468 """Print the diff between the two trees to stdout.""" 469 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 470 # is expected to be viewed by the user, and only the former does nice things 471 # like color and pagination. 472 # 473 # We also only print modified files since `new_tree` only contains the files 474 # that were modified, so unmodified files would show as deleted without the 475 # filter. 476 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 477 '--']) 478 479 480 def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 481 """Apply the changes in `new_tree` to the working directory. 482 483 Bails if there are local changes in those files and not `force`. If 484 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 485 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 486 '--name-only', old_tree, 487 new_tree).rstrip('\0').split('\0') 488 if not force: 489 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 490 if unstaged_files: 491 print >>sys.stderr, ('The following files would be modified but ' 492 'have unstaged changes:') 493 print >>sys.stderr, unstaged_files 494 print >>sys.stderr, 'Please commit, stage, or stash them first.' 495 sys.exit(2) 496 if patch_mode: 497 # In patch mode, we could just as well create an index from the new tree 498 # and checkout from that, but then the user will be presented with a 499 # message saying "Discard ... from worktree". Instead, we use the old 500 # tree as the index and checkout from new_tree, which gives the slightly 501 # better message, "Apply ... to index and worktree". This is not quite 502 # right, since it won't be applied to the user's index, but oh well. 503 with temporary_index_file(old_tree): 504 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 505 index_tree = old_tree 506 else: 507 with temporary_index_file(new_tree): 508 run('git', 'checkout-index', '-a', '-f') 509 return changed_files 510 511 512 def run(*args, **kwargs): 513 stdin = kwargs.pop('stdin', '') 514 verbose = kwargs.pop('verbose', True) 515 strip = kwargs.pop('strip', True) 516 for name in kwargs: 517 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 518 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 519 stdin=subprocess.PIPE) 520 stdout, stderr = p.communicate(input=stdin) 521 if p.returncode == 0: 522 if stderr: 523 if verbose: 524 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) 525 print >>sys.stderr, stderr.rstrip() 526 if strip: 527 stdout = stdout.rstrip('\r\n') 528 return stdout 529 if verbose: 530 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) 531 if stderr: 532 print >>sys.stderr, stderr.rstrip() 533 sys.exit(2) 534 535 536 def die(message): 537 print >>sys.stderr, 'error:', message 538 sys.exit(2) 539 540 541 if __name__ == '__main__': 542 main() 543