Home | History | Annotate | Download | only in bin
      1 #!/usr/bin/env python
      2 #
      3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
      4 #
      5 #                     The LLVM Compiler Infrastructure
      6 #
      7 # This file is distributed under the University of Illinois Open Source
      8 # License. See LICENSE.TXT for details.
      9 #
     10 #===------------------------------------------------------------------------===#
     11 
     12 r"""                                                                             
     13 clang-format git integration                                                     
     14 ============================                                                     
     15                                                                                  
     16 This file provides a clang-format integration for git. Put it somewhere in your  
     17 path and ensure that it is executable. Then, "git clang-format" will invoke      
     18 clang-format on the changes in current files or a specific commit.               
     19                                                                                  
     20 For further details, run:                                                        
     21 git clang-format -h                                                              
     22                                                                                  
     23 Requires Python 2.7                                                              
     24 """               
     25 
     26 import argparse
     27 import collections
     28 import contextlib
     29 import errno
     30 import os
     31 import re
     32 import subprocess
     33 import sys
     34 
     35 usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
     36 
     37 desc = '''
     38 If zero or one commits are given, run clang-format on all lines that differ
     39 between the working directory and <commit>, which defaults to HEAD.  Changes are
     40 only applied to the working directory.
     41 
     42 If two commits are given (requires --diff), run clang-format on all lines in the
     43 second <commit> that differ from the first <commit>.
     44 
     45 The following git-config settings set the default of the corresponding option:
     46   clangFormat.binary
     47   clangFormat.commit
     48   clangFormat.extension
     49   clangFormat.style
     50 '''
     51 
     52 # Name of the temporary index file in which save the output of clang-format.
     53 # This file is created within the .git directory.
     54 temp_index_basename = 'clang-format-index'
     55 
     56 
     57 Range = collections.namedtuple('Range', 'start, count')
     58 
     59 
     60 def main():
     61   config = load_git_config()
     62 
     63   # In order to keep '--' yet allow options after positionals, we need to
     64   # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
     65   # nargs=argparse.REMAINDER disallows options after positionals.)
     66   argv = sys.argv[1:]
     67   try:
     68     idx = argv.index('--')
     69   except ValueError:
     70     dash_dash = []
     71   else:
     72     dash_dash = argv[idx:]
     73     argv = argv[:idx]
     74 
     75   default_extensions = ','.join([
     76       # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
     77       'c', 'h',  # C
     78       'm',  # ObjC
     79       'mm',  # ObjC++
     80       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
     81       # Other languages that clang-format supports
     82       'proto', 'protodevel',  # Protocol Buffers
     83       'java',  # Java
     84       'js',  # JavaScript
     85       'ts',  # TypeScript
     86       ])
     87 
     88   p = argparse.ArgumentParser(
     89     usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
     90     description=desc)
     91   p.add_argument('--binary',
     92                  default=config.get('clangformat.binary', 'clang-format'),
     93                  help='path to clang-format'),
     94   p.add_argument('--commit',
     95                  default=config.get('clangformat.commit', 'HEAD'),
     96                  help='default commit to use if none is specified'),
     97   p.add_argument('--diff', action='store_true',
     98                  help='print a diff instead of applying the changes')
     99   p.add_argument('--extensions',
    100                  default=config.get('clangformat.extensions',
    101                                     default_extensions),
    102                  help=('comma-separated list of file extensions to format, '
    103                        'excluding the period and case-insensitive')),
    104   p.add_argument('-f', '--force', action='store_true',
    105                  help='allow changes to unstaged files')
    106   p.add_argument('-p', '--patch', action='store_true',
    107                  help='select hunks interactively')
    108   p.add_argument('-q', '--quiet', action='count', default=0,
    109                  help='print less information')
    110   p.add_argument('--style',
    111                  default=config.get('clangformat.style', None),
    112                  help='passed to clang-format'),
    113   p.add_argument('-v', '--verbose', action='count', default=0,
    114                  help='print extra information')
    115   # We gather all the remaining positional arguments into 'args' since we need
    116   # to use some heuristics to determine whether or not <commit> was present.
    117   # However, to print pretty messages, we make use of metavar and help.
    118   p.add_argument('args', nargs='*', metavar='<commit>',
    119                  help='revision from which to compute the diff')
    120   p.add_argument('ignored', nargs='*', metavar='<file>...',
    121                  help='if specified, only consider differences in these files')
    122   opts = p.parse_args(argv)
    123 
    124   opts.verbose -= opts.quiet
    125   del opts.quiet
    126 
    127   commits, files = interpret_args(opts.args, dash_dash, opts.commit)
    128   if len(commits) > 1:
    129     if not opts.diff:
    130       die('--diff is required when two commits are given')
    131   else:
    132     if len(commits) > 2:
    133       die('at most two commits allowed; %d given' % len(commits))
    134   changed_lines = compute_diff_and_extract_lines(commits, files)
    135   if opts.verbose >= 1:
    136     ignored_files = set(changed_lines)
    137   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
    138   if opts.verbose >= 1:
    139     ignored_files.difference_update(changed_lines)
    140     if ignored_files:
    141       print 'Ignoring changes in the following files (wrong extension):'
    142       for filename in ignored_files:
    143         print '   ', filename
    144     if changed_lines:
    145       print 'Running clang-format on the following files:'
    146       for filename in changed_lines:
    147         print '   ', filename
    148   if not changed_lines:
    149     print 'no modified files to format'
    150     return
    151   # The computed diff outputs absolute paths, so we must cd before accessing
    152   # those files.
    153   cd_to_toplevel()
    154   if len(commits) > 1:
    155     old_tree = commits[1]
    156     new_tree = run_clang_format_and_save_to_tree(changed_lines,
    157                                                  revision=commits[1],
    158                                                  binary=opts.binary,
    159                                                  style=opts.style)
    160   else:
    161     old_tree = create_tree_from_workdir(changed_lines)
    162     new_tree = run_clang_format_and_save_to_tree(changed_lines,
    163                                                  binary=opts.binary,
    164                                                  style=opts.style)
    165   if opts.verbose >= 1:
    166     print 'old tree:', old_tree
    167     print 'new tree:', new_tree
    168   if old_tree == new_tree:
    169     if opts.verbose >= 0:
    170       print 'clang-format did not modify any files'
    171   elif opts.diff:
    172     print_diff(old_tree, new_tree)
    173   else:
    174     changed_files = apply_changes(old_tree, new_tree, force=opts.force,
    175                                   patch_mode=opts.patch)
    176     if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
    177       print 'changed files:'
    178       for filename in changed_files:
    179         print '   ', filename
    180 
    181 
    182 def load_git_config(non_string_options=None):
    183   """Return the git configuration as a dictionary.
    184 
    185   All options are assumed to be strings unless in `non_string_options`, in which
    186   is a dictionary mapping option name (in lower case) to either "--bool" or
    187   "--int"."""
    188   if non_string_options is None:
    189     non_string_options = {}
    190   out = {}
    191   for entry in run('git', 'config', '--list', '--null').split('\0'):
    192     if entry:
    193       name, value = entry.split('\n', 1)
    194       if name in non_string_options:
    195         value = run('git', 'config', non_string_options[name], name)
    196       out[name] = value
    197   return out
    198 
    199 
    200 def interpret_args(args, dash_dash, default_commit):
    201   """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
    202 
    203   It is assumed that "--" and everything that follows has been removed from
    204   args and placed in `dash_dash`.
    205 
    206   If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
    207   left (if present) are taken as commits.  Otherwise, the arguments are checked
    208   from left to right if they are commits or files.  If commits are not given,
    209   a list with `default_commit` is used."""
    210   if dash_dash:
    211     if len(args) == 0:
    212       commits = [default_commit]
    213     else:
    214       commits = args
    215     for commit in commits:
    216       object_type = get_object_type(commit)
    217       if object_type not in ('commit', 'tag'):
    218         if object_type is None:
    219           die("'%s' is not a commit" % commit)
    220         else:
    221           die("'%s' is a %s, but a commit was expected" % (commit, object_type))
    222     files = dash_dash[1:]
    223   elif args:
    224     commits = []
    225     while args:
    226       if not disambiguate_revision(args[0]):
    227         break
    228       commits.append(args.pop(0))
    229     if not commits:
    230       commits = [default_commit]
    231     files = args
    232   else:
    233     commits = [default_commit]
    234     files = []
    235   return commits, files
    236 
    237 
    238 def disambiguate_revision(value):
    239   """Returns True if `value` is a revision, False if it is a file, or dies."""
    240   # If `value` is ambiguous (neither a commit nor a file), the following
    241   # command will die with an appropriate error message.
    242   run('git', 'rev-parse', value, verbose=False)
    243   object_type = get_object_type(value)
    244   if object_type is None:
    245     return False
    246   if object_type in ('commit', 'tag'):
    247     return True
    248   die('`%s` is a %s, but a commit or filename was expected' %
    249       (value, object_type))
    250 
    251 
    252 def get_object_type(value):
    253   """Returns a string description of an object's type, or None if it is not
    254   a valid git object."""
    255   cmd = ['git', 'cat-file', '-t', value]
    256   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    257   stdout, stderr = p.communicate()
    258   if p.returncode != 0:
    259     return None
    260   return stdout.strip()
    261 
    262 
    263 def compute_diff_and_extract_lines(commits, files):
    264   """Calls compute_diff() followed by extract_lines()."""
    265   diff_process = compute_diff(commits, files)
    266   changed_lines = extract_lines(diff_process.stdout)
    267   diff_process.stdout.close()
    268   diff_process.wait()
    269   if diff_process.returncode != 0:
    270     # Assume error was already printed to stderr.
    271     sys.exit(2)
    272   return changed_lines
    273 
    274 
    275 def compute_diff(commits, files):
    276   """Return a subprocess object producing the diff from `commits`.
    277 
    278   The return value's `stdin` file object will produce a patch with the
    279   differences between the working directory and the first commit if a single
    280   one was specified, or the difference between both specified commits, filtered
    281   on `files` (if non-empty).  Zero context lines are used in the patch."""
    282   git_tool = 'diff-index'
    283   if len(commits) > 1:
    284     git_tool = 'diff-tree'
    285   cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
    286   cmd.extend(files)
    287   p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    288   p.stdin.close()
    289   return p
    290 
    291 
    292 def extract_lines(patch_file):
    293   """Extract the changed lines in `patch_file`.
    294 
    295   The return value is a dictionary mapping filename to a list of (start_line,
    296   line_count) pairs.
    297 
    298   The input must have been produced with ``-U0``, meaning unidiff format with
    299   zero lines of context.  The return value is a dict mapping filename to a
    300   list of line `Range`s."""
    301   matches = {}
    302   for line in patch_file:
    303     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
    304     if match:
    305       filename = match.group(1).rstrip('\r\n')
    306     match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
    307     if match:
    308       start_line = int(match.group(1))
    309       line_count = 1
    310       if match.group(3):
    311         line_count = int(match.group(3))
    312       if line_count > 0:
    313         matches.setdefault(filename, []).append(Range(start_line, line_count))
    314   return matches
    315 
    316 
    317 def filter_by_extension(dictionary, allowed_extensions):
    318   """Delete every key in `dictionary` that doesn't have an allowed extension.
    319 
    320   `allowed_extensions` must be a collection of lowercase file extensions,
    321   excluding the period."""
    322   allowed_extensions = frozenset(allowed_extensions)
    323   for filename in dictionary.keys():
    324     base_ext = filename.rsplit('.', 1)
    325     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
    326       del dictionary[filename]
    327 
    328 
    329 def cd_to_toplevel():
    330   """Change to the top level of the git repository."""
    331   toplevel = run('git', 'rev-parse', '--show-toplevel')
    332   os.chdir(toplevel)
    333 
    334 
    335 def create_tree_from_workdir(filenames):
    336   """Create a new git tree with the given files from the working directory.
    337 
    338   Returns the object ID (SHA-1) of the created tree."""
    339   return create_tree(filenames, '--stdin')
    340 
    341 
    342 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
    343                                       binary='clang-format', style=None):
    344   """Run clang-format on each file and save the result to a git tree.
    345 
    346   Returns the object ID (SHA-1) of the created tree."""
    347   def index_info_generator():
    348     for filename, line_ranges in changed_lines.iteritems():
    349       mode = oct(os.stat(filename).st_mode)
    350       blob_id = clang_format_to_blob(filename, line_ranges,
    351                                      revision=revision,
    352                                      binary=binary,
    353                                      style=style)
    354       yield '%s %s\t%s' % (mode, blob_id, filename)
    355   return create_tree(index_info_generator(), '--index-info')
    356 
    357 
    358 def create_tree(input_lines, mode):
    359   """Create a tree object from the given input.
    360 
    361   If mode is '--stdin', it must be a list of filenames.  If mode is
    362   '--index-info' is must be a list of values suitable for "git update-index
    363   --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
    364   is invalid."""
    365   assert mode in ('--stdin', '--index-info')
    366   cmd = ['git', 'update-index', '--add', '-z', mode]
    367   with temporary_index_file():
    368     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    369     for line in input_lines:
    370       p.stdin.write('%s\0' % line)
    371     p.stdin.close()
    372     if p.wait() != 0:
    373       die('`%s` failed' % ' '.join(cmd))
    374     tree_id = run('git', 'write-tree')
    375     return tree_id
    376 
    377 
    378 def clang_format_to_blob(filename, line_ranges, revision=None,
    379                          binary='clang-format', style=None):
    380   """Run clang-format on the given file and save the result to a git blob.
    381 
    382   Runs on the file in `revision` if not None, or on the file in the working
    383   directory if `revision` is None.
    384 
    385   Returns the object ID (SHA-1) of the created blob."""
    386   clang_format_cmd = [binary]
    387   if style:
    388     clang_format_cmd.extend(['-style='+style])
    389   clang_format_cmd.extend([
    390       '-lines=%s:%s' % (start_line, start_line+line_count-1)
    391       for start_line, line_count in line_ranges])
    392   if revision:
    393     clang_format_cmd.extend(['-assume-filename='+filename])
    394     git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
    395     git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
    396                                 stdout=subprocess.PIPE)
    397     git_show.stdin.close()
    398     clang_format_stdin = git_show.stdout
    399   else:
    400     clang_format_cmd.extend([filename])
    401     git_show = None
    402     clang_format_stdin = subprocess.PIPE
    403   try:
    404     clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
    405                                     stdout=subprocess.PIPE)
    406     if clang_format_stdin == subprocess.PIPE:
    407       clang_format_stdin = clang_format.stdin
    408   except OSError as e:
    409     if e.errno == errno.ENOENT:
    410       die('cannot find executable "%s"' % binary)
    411     else:
    412       raise
    413   clang_format_stdin.close()
    414   hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
    415   hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
    416                                  stdout=subprocess.PIPE)
    417   clang_format.stdout.close()
    418   stdout = hash_object.communicate()[0]
    419   if hash_object.returncode != 0:
    420     die('`%s` failed' % ' '.join(hash_object_cmd))
    421   if clang_format.wait() != 0:
    422     die('`%s` failed' % ' '.join(clang_format_cmd))
    423   if git_show and git_show.wait() != 0:
    424     die('`%s` failed' % ' '.join(git_show_cmd))
    425   return stdout.rstrip('\r\n')
    426 
    427 
    428 @contextlib.contextmanager
    429 def temporary_index_file(tree=None):
    430   """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
    431   the file afterward."""
    432   index_path = create_temporary_index(tree)
    433   old_index_path = os.environ.get('GIT_INDEX_FILE')
    434   os.environ['GIT_INDEX_FILE'] = index_path
    435   try:
    436     yield
    437   finally:
    438     if old_index_path is None:
    439       del os.environ['GIT_INDEX_FILE']
    440     else:
    441       os.environ['GIT_INDEX_FILE'] = old_index_path
    442     os.remove(index_path)
    443 
    444 
    445 def create_temporary_index(tree=None):
    446   """Create a temporary index file and return the created file's path.
    447 
    448   If `tree` is not None, use that as the tree to read in.  Otherwise, an
    449   empty index is created."""
    450   gitdir = run('git', 'rev-parse', '--git-dir')
    451   path = os.path.join(gitdir, temp_index_basename)
    452   if tree is None:
    453     tree = '--empty'
    454   run('git', 'read-tree', '--index-output='+path, tree)
    455   return path
    456 
    457 
    458 def print_diff(old_tree, new_tree):
    459   """Print the diff between the two trees to stdout."""
    460   # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
    461   # is expected to be viewed by the user, and only the former does nice things
    462   # like color and pagination.
    463   #
    464   # We also only print modified files since `new_tree` only contains the files
    465   # that were modified, so unmodified files would show as deleted without the
    466   # filter.
    467   subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
    468                          '--'])
    469 
    470 
    471 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
    472   """Apply the changes in `new_tree` to the working directory.
    473 
    474   Bails if there are local changes in those files and not `force`.  If
    475   `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
    476   changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
    477                       '--name-only', old_tree,
    478                       new_tree).rstrip('\0').split('\0')
    479   if not force:
    480     unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
    481     if unstaged_files:
    482       print >>sys.stderr, ('The following files would be modified but '
    483                            'have unstaged changes:')
    484       print >>sys.stderr, unstaged_files
    485       print >>sys.stderr, 'Please commit, stage, or stash them first.'
    486       sys.exit(2)
    487   if patch_mode:
    488     # In patch mode, we could just as well create an index from the new tree
    489     # and checkout from that, but then the user will be presented with a
    490     # message saying "Discard ... from worktree".  Instead, we use the old
    491     # tree as the index and checkout from new_tree, which gives the slightly
    492     # better message, "Apply ... to index and worktree".  This is not quite
    493     # right, since it won't be applied to the user's index, but oh well.
    494     with temporary_index_file(old_tree):
    495       subprocess.check_call(['git', 'checkout', '--patch', new_tree])
    496     index_tree = old_tree
    497   else:
    498     with temporary_index_file(new_tree):
    499       run('git', 'checkout-index', '-a', '-f')
    500   return changed_files
    501 
    502 
    503 def run(*args, **kwargs):
    504   stdin = kwargs.pop('stdin', '')
    505   verbose = kwargs.pop('verbose', True)
    506   strip = kwargs.pop('strip', True)
    507   for name in kwargs:
    508     raise TypeError("run() got an unexpected keyword argument '%s'" % name)
    509   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    510                        stdin=subprocess.PIPE)
    511   stdout, stderr = p.communicate(input=stdin)
    512   if p.returncode == 0:
    513     if stderr:
    514       if verbose:
    515         print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
    516       print >>sys.stderr, stderr.rstrip()
    517     if strip:
    518       stdout = stdout.rstrip('\r\n')
    519     return stdout
    520   if verbose:
    521     print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
    522   if stderr:
    523     print >>sys.stderr, stderr.rstrip()
    524   sys.exit(2)
    525 
    526 
    527 def die(message):
    528   print >>sys.stderr, 'error:', message
    529   sys.exit(2)
    530 
    531 
    532 if __name__ == '__main__':
    533   main()
    534