1 #!/usr/bin/env python 2 # 3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4 # 5 # The LLVM Compiler Infrastructure 6 # 7 # This file is distributed under the University of Illinois Open Source 8 # License. See LICENSE.TXT for details. 9 # 10 #===------------------------------------------------------------------------===# 11 12 r""" 13 clang-format git integration 14 ============================ 15 16 This file provides a clang-format integration for git. Put it somewhere in your 17 path and ensure that it is executable. Then, "git clang-format" will invoke 18 clang-format on the changes in current files or a specific commit. 19 20 For further details, run: 21 git clang-format -h 22 23 Requires Python 2.7 24 """ 25 26 import argparse 27 import collections 28 import contextlib 29 import errno 30 import os 31 import re 32 import subprocess 33 import sys 34 35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' 36 37 desc = ''' 38 Run clang-format on all lines that differ between the working directory 39 and <commit>, which defaults to HEAD. Changes are only applied to the working 40 directory. 41 42 The following git-config settings set the default of the corresponding option: 43 clangFormat.binary 44 clangFormat.commit 45 clangFormat.extension 46 clangFormat.style 47 ''' 48 49 # Name of the temporary index file in which save the output of clang-format. 50 # This file is created within the .git directory. 51 temp_index_basename = 'clang-format-index' 52 53 54 Range = collections.namedtuple('Range', 'start, count') 55 56 57 def main(): 58 config = load_git_config() 59 60 # In order to keep '--' yet allow options after positionals, we need to 61 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 62 # nargs=argparse.REMAINDER disallows options after positionals.) 63 argv = sys.argv[1:] 64 try: 65 idx = argv.index('--') 66 except ValueError: 67 dash_dash = [] 68 else: 69 dash_dash = argv[idx:] 70 argv = argv[:idx] 71 72 default_extensions = ','.join([ 73 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 74 'c', 'h', # C 75 'm', # ObjC 76 'mm', # ObjC++ 77 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 78 # Other languages that clang-format supports 79 'proto', 'protodevel', # Protocol Buffers 80 'js', # JavaScript 81 'ts', # TypeScript 82 ]) 83 84 p = argparse.ArgumentParser( 85 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 86 description=desc) 87 p.add_argument('--binary', 88 default=config.get('clangformat.binary', 'clang-format'), 89 help='path to clang-format'), 90 p.add_argument('--commit', 91 default=config.get('clangformat.commit', 'HEAD'), 92 help='default commit to use if none is specified'), 93 p.add_argument('--diff', action='store_true', 94 help='print a diff instead of applying the changes') 95 p.add_argument('--extensions', 96 default=config.get('clangformat.extensions', 97 default_extensions), 98 help=('comma-separated list of file extensions to format, ' 99 'excluding the period and case-insensitive')), 100 p.add_argument('-f', '--force', action='store_true', 101 help='allow changes to unstaged files') 102 p.add_argument('-p', '--patch', action='store_true', 103 help='select hunks interactively') 104 p.add_argument('-q', '--quiet', action='count', default=0, 105 help='print less information') 106 p.add_argument('--style', 107 default=config.get('clangformat.style', None), 108 help='passed to clang-format'), 109 p.add_argument('-v', '--verbose', action='count', default=0, 110 help='print extra information') 111 # We gather all the remaining positional arguments into 'args' since we need 112 # to use some heuristics to determine whether or not <commit> was present. 113 # However, to print pretty messages, we make use of metavar and help. 114 p.add_argument('args', nargs='*', metavar='<commit>', 115 help='revision from which to compute the diff') 116 p.add_argument('ignored', nargs='*', metavar='<file>...', 117 help='if specified, only consider differences in these files') 118 opts = p.parse_args(argv) 119 120 opts.verbose -= opts.quiet 121 del opts.quiet 122 123 commit, files = interpret_args(opts.args, dash_dash, opts.commit) 124 changed_lines = compute_diff_and_extract_lines(commit, files) 125 if opts.verbose >= 1: 126 ignored_files = set(changed_lines) 127 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 128 if opts.verbose >= 1: 129 ignored_files.difference_update(changed_lines) 130 if ignored_files: 131 print 'Ignoring changes in the following files (wrong extension):' 132 for filename in ignored_files: 133 print ' ', filename 134 if changed_lines: 135 print 'Running clang-format on the following files:' 136 for filename in changed_lines: 137 print ' ', filename 138 if not changed_lines: 139 print 'no modified files to format' 140 return 141 # The computed diff outputs absolute paths, so we must cd before accessing 142 # those files. 143 cd_to_toplevel() 144 old_tree = create_tree_from_workdir(changed_lines) 145 new_tree = run_clang_format_and_save_to_tree(changed_lines, 146 binary=opts.binary, 147 style=opts.style) 148 if opts.verbose >= 1: 149 print 'old tree:', old_tree 150 print 'new tree:', new_tree 151 if old_tree == new_tree: 152 if opts.verbose >= 0: 153 print 'clang-format did not modify any files' 154 elif opts.diff: 155 print_diff(old_tree, new_tree) 156 else: 157 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 158 patch_mode=opts.patch) 159 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 160 print 'changed files:' 161 for filename in changed_files: 162 print ' ', filename 163 164 165 def load_git_config(non_string_options=None): 166 """Return the git configuration as a dictionary. 167 168 All options are assumed to be strings unless in `non_string_options`, in which 169 is a dictionary mapping option name (in lower case) to either "--bool" or 170 "--int".""" 171 if non_string_options is None: 172 non_string_options = {} 173 out = {} 174 for entry in run('git', 'config', '--list', '--null').split('\0'): 175 if entry: 176 name, value = entry.split('\n', 1) 177 if name in non_string_options: 178 value = run('git', 'config', non_string_options[name], name) 179 out[name] = value 180 return out 181 182 183 def interpret_args(args, dash_dash, default_commit): 184 """Interpret `args` as "[commit] [--] [files...]" and return (commit, files). 185 186 It is assumed that "--" and everything that follows has been removed from 187 args and placed in `dash_dash`. 188 189 If "--" is present (i.e., `dash_dash` is non-empty), the argument to its 190 left (if present) is taken as commit. Otherwise, the first argument is 191 checked if it is a commit or a file. If commit is not given, 192 `default_commit` is used.""" 193 if dash_dash: 194 if len(args) == 0: 195 commit = default_commit 196 elif len(args) > 1: 197 die('at most one commit allowed; %d given' % len(args)) 198 else: 199 commit = args[0] 200 object_type = get_object_type(commit) 201 if object_type not in ('commit', 'tag'): 202 if object_type is None: 203 die("'%s' is not a commit" % commit) 204 else: 205 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 206 files = dash_dash[1:] 207 elif args: 208 if disambiguate_revision(args[0]): 209 commit = args[0] 210 files = args[1:] 211 else: 212 commit = default_commit 213 files = args 214 else: 215 commit = default_commit 216 files = [] 217 return commit, files 218 219 220 def disambiguate_revision(value): 221 """Returns True if `value` is a revision, False if it is a file, or dies.""" 222 # If `value` is ambiguous (neither a commit nor a file), the following 223 # command will die with an appropriate error message. 224 run('git', 'rev-parse', value, verbose=False) 225 object_type = get_object_type(value) 226 if object_type is None: 227 return False 228 if object_type in ('commit', 'tag'): 229 return True 230 die('`%s` is a %s, but a commit or filename was expected' % 231 (value, object_type)) 232 233 234 def get_object_type(value): 235 """Returns a string description of an object's type, or None if it is not 236 a valid git object.""" 237 cmd = ['git', 'cat-file', '-t', value] 238 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 239 stdout, stderr = p.communicate() 240 if p.returncode != 0: 241 return None 242 return stdout.strip() 243 244 245 def compute_diff_and_extract_lines(commit, files): 246 """Calls compute_diff() followed by extract_lines().""" 247 diff_process = compute_diff(commit, files) 248 changed_lines = extract_lines(diff_process.stdout) 249 diff_process.stdout.close() 250 diff_process.wait() 251 if diff_process.returncode != 0: 252 # Assume error was already printed to stderr. 253 sys.exit(2) 254 return changed_lines 255 256 257 def compute_diff(commit, files): 258 """Return a subprocess object producing the diff from `commit`. 259 260 The return value's `stdin` file object will produce a patch with the 261 differences between the working directory and `commit`, filtered on `files` 262 (if non-empty). Zero context lines are used in the patch.""" 263 cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] 264 cmd.extend(files) 265 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 266 p.stdin.close() 267 return p 268 269 270 def extract_lines(patch_file): 271 """Extract the changed lines in `patch_file`. 272 273 The return value is a dictionary mapping filename to a list of (start_line, 274 line_count) pairs. 275 276 The input must have been produced with ``-U0``, meaning unidiff format with 277 zero lines of context. The return value is a dict mapping filename to a 278 list of line `Range`s.""" 279 matches = {} 280 for line in patch_file: 281 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 282 if match: 283 filename = match.group(1).rstrip('\r\n') 284 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 285 if match: 286 start_line = int(match.group(1)) 287 line_count = 1 288 if match.group(3): 289 line_count = int(match.group(3)) 290 if line_count > 0: 291 matches.setdefault(filename, []).append(Range(start_line, line_count)) 292 return matches 293 294 295 def filter_by_extension(dictionary, allowed_extensions): 296 """Delete every key in `dictionary` that doesn't have an allowed extension. 297 298 `allowed_extensions` must be a collection of lowercase file extensions, 299 excluding the period.""" 300 allowed_extensions = frozenset(allowed_extensions) 301 for filename in dictionary.keys(): 302 base_ext = filename.rsplit('.', 1) 303 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 304 del dictionary[filename] 305 306 307 def cd_to_toplevel(): 308 """Change to the top level of the git repository.""" 309 toplevel = run('git', 'rev-parse', '--show-toplevel') 310 os.chdir(toplevel) 311 312 313 def create_tree_from_workdir(filenames): 314 """Create a new git tree with the given files from the working directory. 315 316 Returns the object ID (SHA-1) of the created tree.""" 317 return create_tree(filenames, '--stdin') 318 319 320 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format', 321 style=None): 322 """Run clang-format on each file and save the result to a git tree. 323 324 Returns the object ID (SHA-1) of the created tree.""" 325 def index_info_generator(): 326 for filename, line_ranges in changed_lines.iteritems(): 327 mode = oct(os.stat(filename).st_mode) 328 blob_id = clang_format_to_blob(filename, line_ranges, binary=binary, 329 style=style) 330 yield '%s %s\t%s' % (mode, blob_id, filename) 331 return create_tree(index_info_generator(), '--index-info') 332 333 334 def create_tree(input_lines, mode): 335 """Create a tree object from the given input. 336 337 If mode is '--stdin', it must be a list of filenames. If mode is 338 '--index-info' is must be a list of values suitable for "git update-index 339 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 340 is invalid.""" 341 assert mode in ('--stdin', '--index-info') 342 cmd = ['git', 'update-index', '--add', '-z', mode] 343 with temporary_index_file(): 344 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 345 for line in input_lines: 346 p.stdin.write('%s\0' % line) 347 p.stdin.close() 348 if p.wait() != 0: 349 die('`%s` failed' % ' '.join(cmd)) 350 tree_id = run('git', 'write-tree') 351 return tree_id 352 353 354 def clang_format_to_blob(filename, line_ranges, binary='clang-format', 355 style=None): 356 """Run clang-format on the given file and save the result to a git blob. 357 358 Returns the object ID (SHA-1) of the created blob.""" 359 clang_format_cmd = [binary, filename] 360 if style: 361 clang_format_cmd.extend(['-style='+style]) 362 clang_format_cmd.extend([ 363 '-lines=%s:%s' % (start_line, start_line+line_count-1) 364 for start_line, line_count in line_ranges]) 365 try: 366 clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE, 367 stdout=subprocess.PIPE) 368 except OSError as e: 369 if e.errno == errno.ENOENT: 370 die('cannot find executable "%s"' % binary) 371 else: 372 raise 373 clang_format.stdin.close() 374 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 375 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 376 stdout=subprocess.PIPE) 377 clang_format.stdout.close() 378 stdout = hash_object.communicate()[0] 379 if hash_object.returncode != 0: 380 die('`%s` failed' % ' '.join(hash_object_cmd)) 381 if clang_format.wait() != 0: 382 die('`%s` failed' % ' '.join(clang_format_cmd)) 383 return stdout.rstrip('\r\n') 384 385 386 @contextlib.contextmanager 387 def temporary_index_file(tree=None): 388 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 389 the file afterward.""" 390 index_path = create_temporary_index(tree) 391 old_index_path = os.environ.get('GIT_INDEX_FILE') 392 os.environ['GIT_INDEX_FILE'] = index_path 393 try: 394 yield 395 finally: 396 if old_index_path is None: 397 del os.environ['GIT_INDEX_FILE'] 398 else: 399 os.environ['GIT_INDEX_FILE'] = old_index_path 400 os.remove(index_path) 401 402 403 def create_temporary_index(tree=None): 404 """Create a temporary index file and return the created file's path. 405 406 If `tree` is not None, use that as the tree to read in. Otherwise, an 407 empty index is created.""" 408 gitdir = run('git', 'rev-parse', '--git-dir') 409 path = os.path.join(gitdir, temp_index_basename) 410 if tree is None: 411 tree = '--empty' 412 run('git', 'read-tree', '--index-output='+path, tree) 413 return path 414 415 416 def print_diff(old_tree, new_tree): 417 """Print the diff between the two trees to stdout.""" 418 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 419 # is expected to be viewed by the user, and only the former does nice things 420 # like color and pagination. 421 subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) 422 423 424 def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 425 """Apply the changes in `new_tree` to the working directory. 426 427 Bails if there are local changes in those files and not `force`. If 428 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 429 changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree, 430 new_tree).rstrip('\0').split('\0') 431 if not force: 432 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 433 if unstaged_files: 434 print >>sys.stderr, ('The following files would be modified but ' 435 'have unstaged changes:') 436 print >>sys.stderr, unstaged_files 437 print >>sys.stderr, 'Please commit, stage, or stash them first.' 438 sys.exit(2) 439 if patch_mode: 440 # In patch mode, we could just as well create an index from the new tree 441 # and checkout from that, but then the user will be presented with a 442 # message saying "Discard ... from worktree". Instead, we use the old 443 # tree as the index and checkout from new_tree, which gives the slightly 444 # better message, "Apply ... to index and worktree". This is not quite 445 # right, since it won't be applied to the user's index, but oh well. 446 with temporary_index_file(old_tree): 447 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 448 index_tree = old_tree 449 else: 450 with temporary_index_file(new_tree): 451 run('git', 'checkout-index', '-a', '-f') 452 return changed_files 453 454 455 def run(*args, **kwargs): 456 stdin = kwargs.pop('stdin', '') 457 verbose = kwargs.pop('verbose', True) 458 strip = kwargs.pop('strip', True) 459 for name in kwargs: 460 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 461 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 462 stdin=subprocess.PIPE) 463 stdout, stderr = p.communicate(input=stdin) 464 if p.returncode == 0: 465 if stderr: 466 if verbose: 467 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) 468 print >>sys.stderr, stderr.rstrip() 469 if strip: 470 stdout = stdout.rstrip('\r\n') 471 return stdout 472 if verbose: 473 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) 474 if stderr: 475 print >>sys.stderr, stderr.rstrip() 476 sys.exit(2) 477 478 479 def die(message): 480 print >>sys.stderr, 'error:', message 481 sys.exit(2) 482 483 484 if __name__ == '__main__': 485 main() 486