1 #!/usr/bin/python 2 # 3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4 # 5 # The LLVM Compiler Infrastructure 6 # 7 # This file is distributed under the University of Illinois Open Source 8 # License. See LICENSE.TXT for details. 9 # 10 #===------------------------------------------------------------------------===# 11 12 r""" 13 clang-format git integration 14 ============================ 15 16 This file provides a clang-format integration for git. Put it somewhere in your 17 path and ensure that it is executable. Then, "git clang-format" will invoke 18 clang-format on the changes in current files or a specific commit. 19 20 For further details, run: 21 git clang-format -h 22 23 Requires Python 2.7 24 """ 25 26 import argparse 27 import collections 28 import contextlib 29 import errno 30 import os 31 import re 32 import subprocess 33 import sys 34 35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' 36 37 desc = ''' 38 Run clang-format on all lines that differ between the working directory 39 and <commit>, which defaults to HEAD. Changes are only applied to the working 40 directory. 41 42 The following git-config settings set the default of the corresponding option: 43 clangFormat.binary 44 clangFormat.commit 45 clangFormat.extension 46 clangFormat.style 47 ''' 48 49 # Name of the temporary index file in which save the output of clang-format. 50 # This file is created within the .git directory. 51 temp_index_basename = 'clang-format-index' 52 53 54 Range = collections.namedtuple('Range', 'start, count') 55 56 57 def main(): 58 config = load_git_config() 59 60 # In order to keep '--' yet allow options after positionals, we need to 61 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 62 # nargs=argparse.REMAINDER disallows options after positionals.) 63 argv = sys.argv[1:] 64 try: 65 idx = argv.index('--') 66 except ValueError: 67 dash_dash = [] 68 else: 69 dash_dash = argv[idx:] 70 argv = argv[:idx] 71 72 default_extensions = ','.join([ 73 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 74 'c', 'h', # C 75 'm', # ObjC 76 'mm', # ObjC++ 77 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 78 ]) 79 80 p = argparse.ArgumentParser( 81 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 82 description=desc) 83 p.add_argument('--binary', 84 default=config.get('clangformat.binary', 'clang-format'), 85 help='path to clang-format'), 86 p.add_argument('--commit', 87 default=config.get('clangformat.commit', 'HEAD'), 88 help='default commit to use if none is specified'), 89 p.add_argument('--diff', action='store_true', 90 help='print a diff instead of applying the changes') 91 p.add_argument('--extensions', 92 default=config.get('clangformat.extensions', 93 default_extensions), 94 help=('comma-separated list of file extensions to format, ' 95 'excluding the period and case-insensitive')), 96 p.add_argument('-f', '--force', action='store_true', 97 help='allow changes to unstaged files') 98 p.add_argument('-p', '--patch', action='store_true', 99 help='select hunks interactively') 100 p.add_argument('-q', '--quiet', action='count', default=0, 101 help='print less information') 102 p.add_argument('--style', 103 default=config.get('clangformat.style', None), 104 help='passed to clang-format'), 105 p.add_argument('-v', '--verbose', action='count', default=0, 106 help='print extra information') 107 # We gather all the remaining positional arguments into 'args' since we need 108 # to use some heuristics to determine whether or not <commit> was present. 109 # However, to print pretty messages, we make use of metavar and help. 110 p.add_argument('args', nargs='*', metavar='<commit>', 111 help='revision from which to compute the diff') 112 p.add_argument('ignored', nargs='*', metavar='<file>...', 113 help='if specified, only consider differences in these files') 114 opts = p.parse_args(argv) 115 116 opts.verbose -= opts.quiet 117 del opts.quiet 118 119 commit, files = interpret_args(opts.args, dash_dash, opts.commit) 120 changed_lines = compute_diff_and_extract_lines(commit, files) 121 if opts.verbose >= 1: 122 ignored_files = set(changed_lines) 123 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 124 if opts.verbose >= 1: 125 ignored_files.difference_update(changed_lines) 126 if ignored_files: 127 print 'Ignoring changes in the following files (wrong extension):' 128 for filename in ignored_files: 129 print ' ', filename 130 if changed_lines: 131 print 'Running clang-format on the following files:' 132 for filename in changed_lines: 133 print ' ', filename 134 if not changed_lines: 135 print 'no modified files to format' 136 return 137 # The computed diff outputs absolute paths, so we must cd before accessing 138 # those files. 139 cd_to_toplevel() 140 old_tree = create_tree_from_workdir(changed_lines) 141 new_tree = run_clang_format_and_save_to_tree(changed_lines, 142 binary=opts.binary, 143 style=opts.style) 144 if opts.verbose >= 1: 145 print 'old tree:', old_tree 146 print 'new tree:', new_tree 147 if old_tree == new_tree: 148 if opts.verbose >= 0: 149 print 'clang-format did not modify any files' 150 elif opts.diff: 151 print_diff(old_tree, new_tree) 152 else: 153 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 154 patch_mode=opts.patch) 155 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 156 print 'changed files:' 157 for filename in changed_files: 158 print ' ', filename 159 160 161 def load_git_config(non_string_options=None): 162 """Return the git configuration as a dictionary. 163 164 All options are assumed to be strings unless in `non_string_options`, in which 165 is a dictionary mapping option name (in lower case) to either "--bool" or 166 "--int".""" 167 if non_string_options is None: 168 non_string_options = {} 169 out = {} 170 for entry in run('git', 'config', '--list', '--null').split('\0'): 171 if entry: 172 name, value = entry.split('\n', 1) 173 if name in non_string_options: 174 value = run('git', 'config', non_string_options[name], name) 175 out[name] = value 176 return out 177 178 179 def interpret_args(args, dash_dash, default_commit): 180 """Interpret `args` as "[commit] [--] [files...]" and return (commit, files). 181 182 It is assumed that "--" and everything that follows has been removed from 183 args and placed in `dash_dash`. 184 185 If "--" is present (i.e., `dash_dash` is non-empty), the argument to its 186 left (if present) is taken as commit. Otherwise, the first argument is 187 checked if it is a commit or a file. If commit is not given, 188 `default_commit` is used.""" 189 if dash_dash: 190 if len(args) == 0: 191 commit = default_commit 192 elif len(args) > 1: 193 die('at most one commit allowed; %d given' % len(args)) 194 else: 195 commit = args[0] 196 object_type = get_object_type(commit) 197 if object_type not in ('commit', 'tag'): 198 if object_type is None: 199 die("'%s' is not a commit" % commit) 200 else: 201 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 202 files = dash_dash[1:] 203 elif args: 204 if disambiguate_revision(args[0]): 205 commit = args[0] 206 files = args[1:] 207 else: 208 commit = default_commit 209 files = args 210 else: 211 commit = default_commit 212 files = [] 213 return commit, files 214 215 216 def disambiguate_revision(value): 217 """Returns True if `value` is a revision, False if it is a file, or dies.""" 218 # If `value` is ambiguous (neither a commit nor a file), the following 219 # command will die with an appropriate error message. 220 run('git', 'rev-parse', value, verbose=False) 221 object_type = get_object_type(value) 222 if object_type is None: 223 return False 224 if object_type in ('commit', 'tag'): 225 return True 226 die('`%s` is a %s, but a commit or filename was expected' % 227 (value, object_type)) 228 229 230 def get_object_type(value): 231 """Returns a string description of an object's type, or None if it is not 232 a valid git object.""" 233 cmd = ['git', 'cat-file', '-t', value] 234 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 235 stdout, stderr = p.communicate() 236 if p.returncode != 0: 237 return None 238 return stdout.strip() 239 240 241 def compute_diff_and_extract_lines(commit, files): 242 """Calls compute_diff() followed by extract_lines().""" 243 diff_process = compute_diff(commit, files) 244 changed_lines = extract_lines(diff_process.stdout) 245 diff_process.stdout.close() 246 diff_process.wait() 247 if diff_process.returncode != 0: 248 # Assume error was already printed to stderr. 249 sys.exit(2) 250 return changed_lines 251 252 253 def compute_diff(commit, files): 254 """Return a subprocess object producing the diff from `commit`. 255 256 The return value's `stdin` file object will produce a patch with the 257 differences between the working directory and `commit`, filtered on `files` 258 (if non-empty). Zero context lines are used in the patch.""" 259 cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] 260 cmd.extend(files) 261 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 262 p.stdin.close() 263 return p 264 265 266 def extract_lines(patch_file): 267 """Extract the changed lines in `patch_file`. 268 269 The return value is a dictionary mapping filename to a list of (start_line, 270 line_count) pairs. 271 272 The input must have been produced with ``-U0``, meaning unidiff format with 273 zero lines of context. The return value is a dict mapping filename to a 274 list of line `Range`s.""" 275 matches = {} 276 for line in patch_file: 277 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 278 if match: 279 filename = match.group(1).rstrip('\r\n') 280 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 281 if match: 282 start_line = int(match.group(1)) 283 line_count = 1 284 if match.group(3): 285 line_count = int(match.group(3)) 286 if line_count > 0: 287 matches.setdefault(filename, []).append(Range(start_line, line_count)) 288 return matches 289 290 291 def filter_by_extension(dictionary, allowed_extensions): 292 """Delete every key in `dictionary` that doesn't have an allowed extension. 293 294 `allowed_extensions` must be a collection of lowercase file extensions, 295 excluding the period.""" 296 allowed_extensions = frozenset(allowed_extensions) 297 for filename in dictionary.keys(): 298 base_ext = filename.rsplit('.', 1) 299 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 300 del dictionary[filename] 301 302 303 def cd_to_toplevel(): 304 """Change to the top level of the git repository.""" 305 toplevel = run('git', 'rev-parse', '--show-toplevel') 306 os.chdir(toplevel) 307 308 309 def create_tree_from_workdir(filenames): 310 """Create a new git tree with the given files from the working directory. 311 312 Returns the object ID (SHA-1) of the created tree.""" 313 return create_tree(filenames, '--stdin') 314 315 316 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format', 317 style=None): 318 """Run clang-format on each file and save the result to a git tree. 319 320 Returns the object ID (SHA-1) of the created tree.""" 321 def index_info_generator(): 322 for filename, line_ranges in changed_lines.iteritems(): 323 mode = oct(os.stat(filename).st_mode) 324 blob_id = clang_format_to_blob(filename, line_ranges, binary=binary, 325 style=style) 326 yield '%s %s\t%s' % (mode, blob_id, filename) 327 return create_tree(index_info_generator(), '--index-info') 328 329 330 def create_tree(input_lines, mode): 331 """Create a tree object from the given input. 332 333 If mode is '--stdin', it must be a list of filenames. If mode is 334 '--index-info' is must be a list of values suitable for "git update-index 335 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 336 is invalid.""" 337 assert mode in ('--stdin', '--index-info') 338 cmd = ['git', 'update-index', '--add', '-z', mode] 339 with temporary_index_file(): 340 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 341 for line in input_lines: 342 p.stdin.write('%s\0' % line) 343 p.stdin.close() 344 if p.wait() != 0: 345 die('`%s` failed' % ' '.join(cmd)) 346 tree_id = run('git', 'write-tree') 347 return tree_id 348 349 350 def clang_format_to_blob(filename, line_ranges, binary='clang-format', 351 style=None): 352 """Run clang-format on the given file and save the result to a git blob. 353 354 Returns the object ID (SHA-1) of the created blob.""" 355 clang_format_cmd = [binary, filename] 356 if style: 357 clang_format_cmd.extend(['-style='+style]) 358 clang_format_cmd.extend([ 359 '-lines=%s:%s' % (start_line, start_line+line_count-1) 360 for start_line, line_count in line_ranges]) 361 try: 362 clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE, 363 stdout=subprocess.PIPE) 364 except OSError as e: 365 if e.errno == errno.ENOENT: 366 die('cannot find executable "%s"' % binary) 367 else: 368 raise 369 clang_format.stdin.close() 370 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 371 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 372 stdout=subprocess.PIPE) 373 clang_format.stdout.close() 374 stdout = hash_object.communicate()[0] 375 if hash_object.returncode != 0: 376 die('`%s` failed' % ' '.join(hash_object_cmd)) 377 if clang_format.wait() != 0: 378 die('`%s` failed' % ' '.join(clang_format_cmd)) 379 return stdout.rstrip('\r\n') 380 381 382 @contextlib.contextmanager 383 def temporary_index_file(tree=None): 384 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 385 the file afterward.""" 386 index_path = create_temporary_index(tree) 387 old_index_path = os.environ.get('GIT_INDEX_FILE') 388 os.environ['GIT_INDEX_FILE'] = index_path 389 try: 390 yield 391 finally: 392 if old_index_path is None: 393 del os.environ['GIT_INDEX_FILE'] 394 else: 395 os.environ['GIT_INDEX_FILE'] = old_index_path 396 os.remove(index_path) 397 398 399 def create_temporary_index(tree=None): 400 """Create a temporary index file and return the created file's path. 401 402 If `tree` is not None, use that as the tree to read in. Otherwise, an 403 empty index is created.""" 404 gitdir = run('git', 'rev-parse', '--git-dir') 405 path = os.path.join(gitdir, temp_index_basename) 406 if tree is None: 407 tree = '--empty' 408 run('git', 'read-tree', '--index-output='+path, tree) 409 return path 410 411 412 def print_diff(old_tree, new_tree): 413 """Print the diff between the two trees to stdout.""" 414 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 415 # is expected to be viewed by the user, and only the former does nice things 416 # like color and pagination. 417 subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) 418 419 420 def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 421 """Apply the changes in `new_tree` to the working directory. 422 423 Bails if there are local changes in those files and not `force`. If 424 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 425 changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree, 426 new_tree).rstrip('\0').split('\0') 427 if not force: 428 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 429 if unstaged_files: 430 print >>sys.stderr, ('The following files would be modified but ' 431 'have unstaged changes:') 432 print >>sys.stderr, unstaged_files 433 print >>sys.stderr, 'Please commit, stage, or stash them first.' 434 sys.exit(2) 435 if patch_mode: 436 # In patch mode, we could just as well create an index from the new tree 437 # and checkout from that, but then the user will be presented with a 438 # message saying "Discard ... from worktree". Instead, we use the old 439 # tree as the index and checkout from new_tree, which gives the slightly 440 # better message, "Apply ... to index and worktree". This is not quite 441 # right, since it won't be applied to the user's index, but oh well. 442 with temporary_index_file(old_tree): 443 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 444 index_tree = old_tree 445 else: 446 with temporary_index_file(new_tree): 447 run('git', 'checkout-index', '-a', '-f') 448 return changed_files 449 450 451 def run(*args, **kwargs): 452 stdin = kwargs.pop('stdin', '') 453 verbose = kwargs.pop('verbose', True) 454 strip = kwargs.pop('strip', True) 455 for name in kwargs: 456 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 457 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 458 stdin=subprocess.PIPE) 459 stdout, stderr = p.communicate(input=stdin) 460 if p.returncode == 0: 461 if stderr: 462 if verbose: 463 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) 464 print >>sys.stderr, stderr.rstrip() 465 if strip: 466 stdout = stdout.rstrip('\r\n') 467 return stdout 468 if verbose: 469 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) 470 if stderr: 471 print >>sys.stderr, stderr.rstrip() 472 sys.exit(2) 473 474 475 def die(message): 476 print >>sys.stderr, 'error:', message 477 sys.exit(2) 478 479 480 if __name__ == '__main__': 481 main() 482