1 #!/usr/bin/env python 2 # 3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4 # 5 # The LLVM Compiler Infrastructure 6 # 7 # This file is distributed under the University of Illinois Open Source 8 # License. See LICENSE.TXT for details. 9 # 10 #===------------------------------------------------------------------------===# 11 12 r""" 13 clang-format git integration 14 ============================ 15 16 This file provides a clang-format integration for git. Put it somewhere in your 17 path and ensure that it is executable. Then, "git clang-format" will invoke 18 clang-format on the changes in current files or a specific commit. 19 20 For further details, run: 21 git clang-format -h 22 23 Requires Python 2.7 or Python 3 24 """ 25 26 from __future__ import print_function 27 import argparse 28 import collections 29 import contextlib 30 import errno 31 import os 32 import re 33 import subprocess 34 import sys 35 36 usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 37 38 desc = ''' 39 If zero or one commits are given, run clang-format on all lines that differ 40 between the working directory and <commit>, which defaults to HEAD. Changes are 41 only applied to the working directory. 42 43 If two commits are given (requires --diff), run clang-format on all lines in the 44 second <commit> that differ from the first <commit>. 45 46 The following git-config settings set the default of the corresponding option: 47 clangFormat.binary 48 clangFormat.commit 49 clangFormat.extension 50 clangFormat.style 51 ''' 52 53 # Name of the temporary index file in which save the output of clang-format. 54 # This file is created within the .git directory. 55 temp_index_basename = 'clang-format-index' 56 57 58 Range = collections.namedtuple('Range', 'start, count') 59 60 61 def main(): 62 config = load_git_config() 63 64 # In order to keep '--' yet allow options after positionals, we need to 65 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 66 # nargs=argparse.REMAINDER disallows options after positionals.) 67 argv = sys.argv[1:] 68 try: 69 idx = argv.index('--') 70 except ValueError: 71 dash_dash = [] 72 else: 73 dash_dash = argv[idx:] 74 argv = argv[:idx] 75 76 default_extensions = ','.join([ 77 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 78 'c', 'h', # C 79 'm', # ObjC 80 'mm', # ObjC++ 81 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 82 # Other languages that clang-format supports 83 'proto', 'protodevel', # Protocol Buffers 84 'java', # Java 85 'js', # JavaScript 86 'ts', # TypeScript 87 ]) 88 89 p = argparse.ArgumentParser( 90 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 91 description=desc) 92 p.add_argument('--binary', 93 default=config.get('clangformat.binary', 'clang-format'), 94 help='path to clang-format'), 95 p.add_argument('--commit', 96 default=config.get('clangformat.commit', 'HEAD'), 97 help='default commit to use if none is specified'), 98 p.add_argument('--diff', action='store_true', 99 help='print a diff instead of applying the changes') 100 p.add_argument('--extensions', 101 default=config.get('clangformat.extensions', 102 default_extensions), 103 help=('comma-separated list of file extensions to format, ' 104 'excluding the period and case-insensitive')), 105 p.add_argument('-f', '--force', action='store_true', 106 help='allow changes to unstaged files') 107 p.add_argument('-p', '--patch', action='store_true', 108 help='select hunks interactively') 109 p.add_argument('-q', '--quiet', action='count', default=0, 110 help='print less information') 111 p.add_argument('--style', 112 default=config.get('clangformat.style', None), 113 help='passed to clang-format'), 114 p.add_argument('-v', '--verbose', action='count', default=0, 115 help='print extra information') 116 # We gather all the remaining positional arguments into 'args' since we need 117 # to use some heuristics to determine whether or not <commit> was present. 118 # However, to print pretty messages, we make use of metavar and help. 119 p.add_argument('args', nargs='*', metavar='<commit>', 120 help='revision from which to compute the diff') 121 p.add_argument('ignored', nargs='*', metavar='<file>...', 122 help='if specified, only consider differences in these files') 123 opts = p.parse_args(argv) 124 125 opts.verbose -= opts.quiet 126 del opts.quiet 127 128 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 129 if len(commits) > 1: 130 if not opts.diff: 131 die('--diff is required when two commits are given') 132 else: 133 if len(commits) > 2: 134 die('at most two commits allowed; %d given' % len(commits)) 135 changed_lines = compute_diff_and_extract_lines(commits, files) 136 if opts.verbose >= 1: 137 ignored_files = set(changed_lines) 138 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 139 if opts.verbose >= 1: 140 ignored_files.difference_update(changed_lines) 141 if ignored_files: 142 print('Ignoring changes in the following files (wrong extension):') 143 for filename in ignored_files: 144 print(' %s' % filename) 145 if changed_lines: 146 print('Running clang-format on the following files:') 147 for filename in changed_lines: 148 print(' %s' % filename) 149 if not changed_lines: 150 print('no modified files to format') 151 return 152 # The computed diff outputs absolute paths, so we must cd before accessing 153 # those files. 154 cd_to_toplevel() 155 if len(commits) > 1: 156 old_tree = commits[1] 157 new_tree = run_clang_format_and_save_to_tree(changed_lines, 158 revision=commits[1], 159 binary=opts.binary, 160 style=opts.style) 161 else: 162 old_tree = create_tree_from_workdir(changed_lines) 163 new_tree = run_clang_format_and_save_to_tree(changed_lines, 164 binary=opts.binary, 165 style=opts.style) 166 if opts.verbose >= 1: 167 print('old tree: %s' % old_tree) 168 print('new tree: %s' % new_tree) 169 if old_tree == new_tree: 170 if opts.verbose >= 0: 171 print('clang-format did not modify any files') 172 elif opts.diff: 173 print_diff(old_tree, new_tree) 174 else: 175 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 176 patch_mode=opts.patch) 177 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 178 print('changed files:') 179 for filename in changed_files: 180 print(' %s' % filename) 181 182 183 def load_git_config(non_string_options=None): 184 """Return the git configuration as a dictionary. 185 186 All options are assumed to be strings unless in `non_string_options`, in which 187 is a dictionary mapping option name (in lower case) to either "--bool" or 188 "--int".""" 189 if non_string_options is None: 190 non_string_options = {} 191 out = {} 192 for entry in run('git', 'config', '--list', '--null').split('\0'): 193 if entry: 194 name, value = entry.split('\n', 1) 195 if name in non_string_options: 196 value = run('git', 'config', non_string_options[name], name) 197 out[name] = value 198 return out 199 200 201 def interpret_args(args, dash_dash, default_commit): 202 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 203 204 It is assumed that "--" and everything that follows has been removed from 205 args and placed in `dash_dash`. 206 207 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 208 left (if present) are taken as commits. Otherwise, the arguments are checked 209 from left to right if they are commits or files. If commits are not given, 210 a list with `default_commit` is used.""" 211 if dash_dash: 212 if len(args) == 0: 213 commits = [default_commit] 214 else: 215 commits = args 216 for commit in commits: 217 object_type = get_object_type(commit) 218 if object_type not in ('commit', 'tag'): 219 if object_type is None: 220 die("'%s' is not a commit" % commit) 221 else: 222 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 223 files = dash_dash[1:] 224 elif args: 225 commits = [] 226 while args: 227 if not disambiguate_revision(args[0]): 228 break 229 commits.append(args.pop(0)) 230 if not commits: 231 commits = [default_commit] 232 files = args 233 else: 234 commits = [default_commit] 235 files = [] 236 return commits, files 237 238 239 def disambiguate_revision(value): 240 """Returns True if `value` is a revision, False if it is a file, or dies.""" 241 # If `value` is ambiguous (neither a commit nor a file), the following 242 # command will die with an appropriate error message. 243 run('git', 'rev-parse', value, verbose=False) 244 object_type = get_object_type(value) 245 if object_type is None: 246 return False 247 if object_type in ('commit', 'tag'): 248 return True 249 die('`%s` is a %s, but a commit or filename was expected' % 250 (value, object_type)) 251 252 253 def get_object_type(value): 254 """Returns a string description of an object's type, or None if it is not 255 a valid git object.""" 256 cmd = ['git', 'cat-file', '-t', value] 257 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 258 stdout, stderr = p.communicate() 259 if p.returncode != 0: 260 return None 261 return convert_string(stdout.strip()) 262 263 264 def compute_diff_and_extract_lines(commits, files): 265 """Calls compute_diff() followed by extract_lines().""" 266 diff_process = compute_diff(commits, files) 267 changed_lines = extract_lines(diff_process.stdout) 268 diff_process.stdout.close() 269 diff_process.wait() 270 if diff_process.returncode != 0: 271 # Assume error was already printed to stderr. 272 sys.exit(2) 273 return changed_lines 274 275 276 def compute_diff(commits, files): 277 """Return a subprocess object producing the diff from `commits`. 278 279 The return value's `stdin` file object will produce a patch with the 280 differences between the working directory and the first commit if a single 281 one was specified, or the difference between both specified commits, filtered 282 on `files` (if non-empty). Zero context lines are used in the patch.""" 283 git_tool = 'diff-index' 284 if len(commits) > 1: 285 git_tool = 'diff-tree' 286 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 287 cmd.extend(files) 288 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 289 p.stdin.close() 290 return p 291 292 293 def extract_lines(patch_file): 294 """Extract the changed lines in `patch_file`. 295 296 The return value is a dictionary mapping filename to a list of (start_line, 297 line_count) pairs. 298 299 The input must have been produced with ``-U0``, meaning unidiff format with 300 zero lines of context. The return value is a dict mapping filename to a 301 list of line `Range`s.""" 302 matches = {} 303 for line in patch_file: 304 line = convert_string(line) 305 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 306 if match: 307 filename = match.group(1).rstrip('\r\n') 308 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 309 if match: 310 start_line = int(match.group(1)) 311 line_count = 1 312 if match.group(3): 313 line_count = int(match.group(3)) 314 if line_count > 0: 315 matches.setdefault(filename, []).append(Range(start_line, line_count)) 316 return matches 317 318 319 def filter_by_extension(dictionary, allowed_extensions): 320 """Delete every key in `dictionary` that doesn't have an allowed extension. 321 322 `allowed_extensions` must be a collection of lowercase file extensions, 323 excluding the period.""" 324 allowed_extensions = frozenset(allowed_extensions) 325 for filename in list(dictionary.keys()): 326 base_ext = filename.rsplit('.', 1) 327 if len(base_ext) == 1 and '' in allowed_extensions: 328 continue 329 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 330 del dictionary[filename] 331 332 333 def cd_to_toplevel(): 334 """Change to the top level of the git repository.""" 335 toplevel = run('git', 'rev-parse', '--show-toplevel') 336 os.chdir(toplevel) 337 338 339 def create_tree_from_workdir(filenames): 340 """Create a new git tree with the given files from the working directory. 341 342 Returns the object ID (SHA-1) of the created tree.""" 343 return create_tree(filenames, '--stdin') 344 345 346 def run_clang_format_and_save_to_tree(changed_lines, revision=None, 347 binary='clang-format', style=None): 348 """Run clang-format on each file and save the result to a git tree. 349 350 Returns the object ID (SHA-1) of the created tree.""" 351 def iteritems(container): 352 try: 353 return container.iteritems() # Python 2 354 except AttributeError: 355 return container.items() # Python 3 356 def index_info_generator(): 357 for filename, line_ranges in iteritems(changed_lines): 358 if revision: 359 git_metadata_cmd = ['git', 'ls-tree', 360 '%s:%s' % (revision, os.path.dirname(filename)), 361 os.path.basename(filename)] 362 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 363 stdout=subprocess.PIPE) 364 stdout = git_metadata.communicate()[0] 365 mode = oct(int(stdout.split()[0], 8)) 366 else: 367 mode = oct(os.stat(filename).st_mode) 368 # Adjust python3 octal format so that it matches what git expects 369 if mode.startswith('0o'): 370 mode = '0' + mode[2:] 371 blob_id = clang_format_to_blob(filename, line_ranges, 372 revision=revision, 373 binary=binary, 374 style=style) 375 yield '%s %s\t%s' % (mode, blob_id, filename) 376 return create_tree(index_info_generator(), '--index-info') 377 378 379 def create_tree(input_lines, mode): 380 """Create a tree object from the given input. 381 382 If mode is '--stdin', it must be a list of filenames. If mode is 383 '--index-info' is must be a list of values suitable for "git update-index 384 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 385 is invalid.""" 386 assert mode in ('--stdin', '--index-info') 387 cmd = ['git', 'update-index', '--add', '-z', mode] 388 with temporary_index_file(): 389 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 390 for line in input_lines: 391 p.stdin.write(to_bytes('%s\0' % line)) 392 p.stdin.close() 393 if p.wait() != 0: 394 die('`%s` failed' % ' '.join(cmd)) 395 tree_id = run('git', 'write-tree') 396 return tree_id 397 398 399 def clang_format_to_blob(filename, line_ranges, revision=None, 400 binary='clang-format', style=None): 401 """Run clang-format on the given file and save the result to a git blob. 402 403 Runs on the file in `revision` if not None, or on the file in the working 404 directory if `revision` is None. 405 406 Returns the object ID (SHA-1) of the created blob.""" 407 clang_format_cmd = [binary] 408 if style: 409 clang_format_cmd.extend(['-style='+style]) 410 clang_format_cmd.extend([ 411 '-lines=%s:%s' % (start_line, start_line+line_count-1) 412 for start_line, line_count in line_ranges]) 413 if revision: 414 clang_format_cmd.extend(['-assume-filename='+filename]) 415 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 416 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 417 stdout=subprocess.PIPE) 418 git_show.stdin.close() 419 clang_format_stdin = git_show.stdout 420 else: 421 clang_format_cmd.extend([filename]) 422 git_show = None 423 clang_format_stdin = subprocess.PIPE 424 try: 425 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 426 stdout=subprocess.PIPE) 427 if clang_format_stdin == subprocess.PIPE: 428 clang_format_stdin = clang_format.stdin 429 except OSError as e: 430 if e.errno == errno.ENOENT: 431 die('cannot find executable "%s"' % binary) 432 else: 433 raise 434 clang_format_stdin.close() 435 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 436 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 437 stdout=subprocess.PIPE) 438 clang_format.stdout.close() 439 stdout = hash_object.communicate()[0] 440 if hash_object.returncode != 0: 441 die('`%s` failed' % ' '.join(hash_object_cmd)) 442 if clang_format.wait() != 0: 443 die('`%s` failed' % ' '.join(clang_format_cmd)) 444 if git_show and git_show.wait() != 0: 445 die('`%s` failed' % ' '.join(git_show_cmd)) 446 return convert_string(stdout).rstrip('\r\n') 447 448 449 @contextlib.contextmanager 450 def temporary_index_file(tree=None): 451 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 452 the file afterward.""" 453 index_path = create_temporary_index(tree) 454 old_index_path = os.environ.get('GIT_INDEX_FILE') 455 os.environ['GIT_INDEX_FILE'] = index_path 456 try: 457 yield 458 finally: 459 if old_index_path is None: 460 del os.environ['GIT_INDEX_FILE'] 461 else: 462 os.environ['GIT_INDEX_FILE'] = old_index_path 463 os.remove(index_path) 464 465 466 def create_temporary_index(tree=None): 467 """Create a temporary index file and return the created file's path. 468 469 If `tree` is not None, use that as the tree to read in. Otherwise, an 470 empty index is created.""" 471 gitdir = run('git', 'rev-parse', '--git-dir') 472 path = os.path.join(gitdir, temp_index_basename) 473 if tree is None: 474 tree = '--empty' 475 run('git', 'read-tree', '--index-output='+path, tree) 476 return path 477 478 479 def print_diff(old_tree, new_tree): 480 """Print the diff between the two trees to stdout.""" 481 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 482 # is expected to be viewed by the user, and only the former does nice things 483 # like color and pagination. 484 # 485 # We also only print modified files since `new_tree` only contains the files 486 # that were modified, so unmodified files would show as deleted without the 487 # filter. 488 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 489 '--']) 490 491 492 def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 493 """Apply the changes in `new_tree` to the working directory. 494 495 Bails if there are local changes in those files and not `force`. If 496 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 497 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 498 '--name-only', old_tree, 499 new_tree).rstrip('\0').split('\0') 500 if not force: 501 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 502 if unstaged_files: 503 print('The following files would be modified but ' 504 'have unstaged changes:', file=sys.stderr) 505 print(unstaged_files, file=sys.stderr) 506 print('Please commit, stage, or stash them first.', file=sys.stderr) 507 sys.exit(2) 508 if patch_mode: 509 # In patch mode, we could just as well create an index from the new tree 510 # and checkout from that, but then the user will be presented with a 511 # message saying "Discard ... from worktree". Instead, we use the old 512 # tree as the index and checkout from new_tree, which gives the slightly 513 # better message, "Apply ... to index and worktree". This is not quite 514 # right, since it won't be applied to the user's index, but oh well. 515 with temporary_index_file(old_tree): 516 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 517 index_tree = old_tree 518 else: 519 with temporary_index_file(new_tree): 520 run('git', 'checkout-index', '-a', '-f') 521 return changed_files 522 523 524 def run(*args, **kwargs): 525 stdin = kwargs.pop('stdin', '') 526 verbose = kwargs.pop('verbose', True) 527 strip = kwargs.pop('strip', True) 528 for name in kwargs: 529 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 530 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 531 stdin=subprocess.PIPE) 532 stdout, stderr = p.communicate(input=stdin) 533 534 stdout = convert_string(stdout) 535 stderr = convert_string(stderr) 536 537 if p.returncode == 0: 538 if stderr: 539 if verbose: 540 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 541 print(stderr.rstrip(), file=sys.stderr) 542 if strip: 543 stdout = stdout.rstrip('\r\n') 544 return stdout 545 if verbose: 546 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 547 if stderr: 548 print(stderr.rstrip(), file=sys.stderr) 549 sys.exit(2) 550 551 552 def die(message): 553 print('error:', message, file=sys.stderr) 554 sys.exit(2) 555 556 557 def to_bytes(str_input): 558 # Encode to UTF-8 to get binary data. 559 if isinstance(str_input, bytes): 560 return str_input 561 return str_input.encode('utf-8') 562 563 564 def to_string(bytes_input): 565 if isinstance(bytes_input, str): 566 return bytes_input 567 return bytes_input.encode('utf-8') 568 569 570 def convert_string(bytes_input): 571 try: 572 return to_string(bytes_input.decode('utf-8')) 573 except AttributeError: # 'str' object has no attribute 'decode'. 574 return str(bytes_input) 575 except UnicodeError: 576 return str(bytes_input) 577 578 if __name__ == '__main__': 579 main() 580