Home | History | Annotate | Download | only in bin
      1 #!/usr/bin/env python
      2 #
      3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
      4 #
      5 #                     The LLVM Compiler Infrastructure
      6 #
      7 # This file is distributed under the University of Illinois Open Source
      8 # License. See LICENSE.TXT for details.
      9 #
     10 #===------------------------------------------------------------------------===#
     11 
     12 r"""                                                                             
     13 clang-format git integration                                                     
     14 ============================                                                     
     15                                                                                  
     16 This file provides a clang-format integration for git. Put it somewhere in your  
     17 path and ensure that it is executable. Then, "git clang-format" will invoke      
     18 clang-format on the changes in current files or a specific commit.               
     19                                                                                  
     20 For further details, run:                                                        
     21 git clang-format -h                                                              
     22                                                                                  
     23 Requires Python 2.7                                                              
     24 """               
     25 
     26 import argparse
     27 import collections
     28 import contextlib
     29 import errno
     30 import os
     31 import re
     32 import subprocess
     33 import sys
     34 
     35 usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
     36 
     37 desc = '''
     38 If zero or one commits are given, run clang-format on all lines that differ
     39 between the working directory and <commit>, which defaults to HEAD.  Changes are
     40 only applied to the working directory.
     41 
     42 If two commits are given (requires --diff), run clang-format on all lines in the
     43 second <commit> that differ from the first <commit>.
     44 
     45 The following git-config settings set the default of the corresponding option:
     46   clangFormat.binary
     47   clangFormat.commit
     48   clangFormat.extension
     49   clangFormat.style
     50 '''
     51 
     52 # Name of the temporary index file in which save the output of clang-format.
     53 # This file is created within the .git directory.
     54 temp_index_basename = 'clang-format-index'
     55 
     56 
     57 Range = collections.namedtuple('Range', 'start, count')
     58 
     59 
     60 def main():
     61   config = load_git_config()
     62 
     63   # In order to keep '--' yet allow options after positionals, we need to
     64   # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
     65   # nargs=argparse.REMAINDER disallows options after positionals.)
     66   argv = sys.argv[1:]
     67   try:
     68     idx = argv.index('--')
     69   except ValueError:
     70     dash_dash = []
     71   else:
     72     dash_dash = argv[idx:]
     73     argv = argv[:idx]
     74 
     75   default_extensions = ','.join([
     76       # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
     77       'c', 'h',  # C
     78       'm',  # ObjC
     79       'mm',  # ObjC++
     80       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
     81       # Other languages that clang-format supports
     82       'proto', 'protodevel',  # Protocol Buffers
     83       'java',  # Java
     84       'js',  # JavaScript
     85       'ts',  # TypeScript
     86       ])
     87 
     88   p = argparse.ArgumentParser(
     89     usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
     90     description=desc)
     91   p.add_argument('--binary',
     92                  default=config.get('clangformat.binary', 'clang-format'),
     93                  help='path to clang-format'),
     94   p.add_argument('--commit',
     95                  default=config.get('clangformat.commit', 'HEAD'),
     96                  help='default commit to use if none is specified'),
     97   p.add_argument('--diff', action='store_true',
     98                  help='print a diff instead of applying the changes')
     99   p.add_argument('--extensions',
    100                  default=config.get('clangformat.extensions',
    101                                     default_extensions),
    102                  help=('comma-separated list of file extensions to format, '
    103                        'excluding the period and case-insensitive')),
    104   p.add_argument('-f', '--force', action='store_true',
    105                  help='allow changes to unstaged files')
    106   p.add_argument('-p', '--patch', action='store_true',
    107                  help='select hunks interactively')
    108   p.add_argument('-q', '--quiet', action='count', default=0,
    109                  help='print less information')
    110   p.add_argument('--style',
    111                  default=config.get('clangformat.style', None),
    112                  help='passed to clang-format'),
    113   p.add_argument('-v', '--verbose', action='count', default=0,
    114                  help='print extra information')
    115   # We gather all the remaining positional arguments into 'args' since we need
    116   # to use some heuristics to determine whether or not <commit> was present.
    117   # However, to print pretty messages, we make use of metavar and help.
    118   p.add_argument('args', nargs='*', metavar='<commit>',
    119                  help='revision from which to compute the diff')
    120   p.add_argument('ignored', nargs='*', metavar='<file>...',
    121                  help='if specified, only consider differences in these files')
    122   opts = p.parse_args(argv)
    123 
    124   opts.verbose -= opts.quiet
    125   del opts.quiet
    126 
    127   commits, files = interpret_args(opts.args, dash_dash, opts.commit)
    128   if len(commits) > 1:
    129     if not opts.diff:
    130       die('--diff is required when two commits are given')
    131   else:
    132     if len(commits) > 2:
    133       die('at most two commits allowed; %d given' % len(commits))
    134   changed_lines = compute_diff_and_extract_lines(commits, files)
    135   if opts.verbose >= 1:
    136     ignored_files = set(changed_lines)
    137   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
    138   if opts.verbose >= 1:
    139     ignored_files.difference_update(changed_lines)
    140     if ignored_files:
    141       print 'Ignoring changes in the following files (wrong extension):'
    142       for filename in ignored_files:
    143         print '   ', filename
    144     if changed_lines:
    145       print 'Running clang-format on the following files:'
    146       for filename in changed_lines:
    147         print '   ', filename
    148   if not changed_lines:
    149     print 'no modified files to format'
    150     return
    151   # The computed diff outputs absolute paths, so we must cd before accessing
    152   # those files.
    153   cd_to_toplevel()
    154   if len(commits) > 1:
    155     old_tree = commits[1]
    156     new_tree = run_clang_format_and_save_to_tree(changed_lines,
    157                                                  revision=commits[1],
    158                                                  binary=opts.binary,
    159                                                  style=opts.style)
    160   else:
    161     old_tree = create_tree_from_workdir(changed_lines)
    162     new_tree = run_clang_format_and_save_to_tree(changed_lines,
    163                                                  binary=opts.binary,
    164                                                  style=opts.style)
    165   if opts.verbose >= 1:
    166     print 'old tree:', old_tree
    167     print 'new tree:', new_tree
    168   if old_tree == new_tree:
    169     if opts.verbose >= 0:
    170       print 'clang-format did not modify any files'
    171   elif opts.diff:
    172     print_diff(old_tree, new_tree)
    173   else:
    174     changed_files = apply_changes(old_tree, new_tree, force=opts.force,
    175                                   patch_mode=opts.patch)
    176     if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
    177       print 'changed files:'
    178       for filename in changed_files:
    179         print '   ', filename
    180 
    181 
    182 def load_git_config(non_string_options=None):
    183   """Return the git configuration as a dictionary.
    184 
    185   All options are assumed to be strings unless in `non_string_options`, in which
    186   is a dictionary mapping option name (in lower case) to either "--bool" or
    187   "--int"."""
    188   if non_string_options is None:
    189     non_string_options = {}
    190   out = {}
    191   for entry in run('git', 'config', '--list', '--null').split('\0'):
    192     if entry:
    193       name, value = entry.split('\n', 1)
    194       if name in non_string_options:
    195         value = run('git', 'config', non_string_options[name], name)
    196       out[name] = value
    197   return out
    198 
    199 
    200 def interpret_args(args, dash_dash, default_commit):
    201   """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
    202 
    203   It is assumed that "--" and everything that follows has been removed from
    204   args and placed in `dash_dash`.
    205 
    206   If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
    207   left (if present) are taken as commits.  Otherwise, the arguments are checked
    208   from left to right if they are commits or files.  If commits are not given,
    209   a list with `default_commit` is used."""
    210   if dash_dash:
    211     if len(args) == 0:
    212       commits = [default_commit]
    213     else:
    214       commits = args
    215     for commit in commits:
    216       object_type = get_object_type(commit)
    217       if object_type not in ('commit', 'tag'):
    218         if object_type is None:
    219           die("'%s' is not a commit" % commit)
    220         else:
    221           die("'%s' is a %s, but a commit was expected" % (commit, object_type))
    222     files = dash_dash[1:]
    223   elif args:
    224     commits = []
    225     while args:
    226       if not disambiguate_revision(args[0]):
    227         break
    228       commits.append(args.pop(0))
    229     if not commits:
    230       commits = [default_commit]
    231     files = args
    232   else:
    233     commits = [default_commit]
    234     files = []
    235   return commits, files
    236 
    237 
    238 def disambiguate_revision(value):
    239   """Returns True if `value` is a revision, False if it is a file, or dies."""
    240   # If `value` is ambiguous (neither a commit nor a file), the following
    241   # command will die with an appropriate error message.
    242   run('git', 'rev-parse', value, verbose=False)
    243   object_type = get_object_type(value)
    244   if object_type is None:
    245     return False
    246   if object_type in ('commit', 'tag'):
    247     return True
    248   die('`%s` is a %s, but a commit or filename was expected' %
    249       (value, object_type))
    250 
    251 
    252 def get_object_type(value):
    253   """Returns a string description of an object's type, or None if it is not
    254   a valid git object."""
    255   cmd = ['git', 'cat-file', '-t', value]
    256   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    257   stdout, stderr = p.communicate()
    258   if p.returncode != 0:
    259     return None
    260   return stdout.strip()
    261 
    262 
    263 def compute_diff_and_extract_lines(commits, files):
    264   """Calls compute_diff() followed by extract_lines()."""
    265   diff_process = compute_diff(commits, files)
    266   changed_lines = extract_lines(diff_process.stdout)
    267   diff_process.stdout.close()
    268   diff_process.wait()
    269   if diff_process.returncode != 0:
    270     # Assume error was already printed to stderr.
    271     sys.exit(2)
    272   return changed_lines
    273 
    274 
    275 def compute_diff(commits, files):
    276   """Return a subprocess object producing the diff from `commits`.
    277 
    278   The return value's `stdin` file object will produce a patch with the
    279   differences between the working directory and the first commit if a single
    280   one was specified, or the difference between both specified commits, filtered
    281   on `files` (if non-empty).  Zero context lines are used in the patch."""
    282   git_tool = 'diff-index'
    283   if len(commits) > 1:
    284     git_tool = 'diff-tree'
    285   cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
    286   cmd.extend(files)
    287   p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    288   p.stdin.close()
    289   return p
    290 
    291 
    292 def extract_lines(patch_file):
    293   """Extract the changed lines in `patch_file`.
    294 
    295   The return value is a dictionary mapping filename to a list of (start_line,
    296   line_count) pairs.
    297 
    298   The input must have been produced with ``-U0``, meaning unidiff format with
    299   zero lines of context.  The return value is a dict mapping filename to a
    300   list of line `Range`s."""
    301   matches = {}
    302   for line in patch_file:
    303     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
    304     if match:
    305       filename = match.group(1).rstrip('\r\n')
    306     match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
    307     if match:
    308       start_line = int(match.group(1))
    309       line_count = 1
    310       if match.group(3):
    311         line_count = int(match.group(3))
    312       if line_count > 0:
    313         matches.setdefault(filename, []).append(Range(start_line, line_count))
    314   return matches
    315 
    316 
    317 def filter_by_extension(dictionary, allowed_extensions):
    318   """Delete every key in `dictionary` that doesn't have an allowed extension.
    319 
    320   `allowed_extensions` must be a collection of lowercase file extensions,
    321   excluding the period."""
    322   allowed_extensions = frozenset(allowed_extensions)
    323   for filename in dictionary.keys():
    324     base_ext = filename.rsplit('.', 1)
    325     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
    326       del dictionary[filename]
    327 
    328 
    329 def cd_to_toplevel():
    330   """Change to the top level of the git repository."""
    331   toplevel = run('git', 'rev-parse', '--show-toplevel')
    332   os.chdir(toplevel)
    333 
    334 
    335 def create_tree_from_workdir(filenames):
    336   """Create a new git tree with the given files from the working directory.
    337 
    338   Returns the object ID (SHA-1) of the created tree."""
    339   return create_tree(filenames, '--stdin')
    340 
    341 
    342 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
    343                                       binary='clang-format', style=None):
    344   """Run clang-format on each file and save the result to a git tree.
    345 
    346   Returns the object ID (SHA-1) of the created tree."""
    347   def index_info_generator():
    348     for filename, line_ranges in changed_lines.iteritems():
    349       if revision:
    350         git_metadata_cmd = ['git', 'ls-tree',
    351                             '%s:%s' % (revision, os.path.dirname(filename)),
    352                             os.path.basename(filename)]
    353         git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
    354                                         stdout=subprocess.PIPE)
    355         stdout = git_metadata.communicate()[0]
    356         mode = oct(int(stdout.split()[0], 8))
    357       else:
    358         mode = oct(os.stat(filename).st_mode)
    359       blob_id = clang_format_to_blob(filename, line_ranges,
    360                                      revision=revision,
    361                                      binary=binary,
    362                                      style=style)
    363       yield '%s %s\t%s' % (mode, blob_id, filename)
    364   return create_tree(index_info_generator(), '--index-info')
    365 
    366 
    367 def create_tree(input_lines, mode):
    368   """Create a tree object from the given input.
    369 
    370   If mode is '--stdin', it must be a list of filenames.  If mode is
    371   '--index-info' is must be a list of values suitable for "git update-index
    372   --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
    373   is invalid."""
    374   assert mode in ('--stdin', '--index-info')
    375   cmd = ['git', 'update-index', '--add', '-z', mode]
    376   with temporary_index_file():
    377     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    378     for line in input_lines:
    379       p.stdin.write('%s\0' % line)
    380     p.stdin.close()
    381     if p.wait() != 0:
    382       die('`%s` failed' % ' '.join(cmd))
    383     tree_id = run('git', 'write-tree')
    384     return tree_id
    385 
    386 
    387 def clang_format_to_blob(filename, line_ranges, revision=None,
    388                          binary='clang-format', style=None):
    389   """Run clang-format on the given file and save the result to a git blob.
    390 
    391   Runs on the file in `revision` if not None, or on the file in the working
    392   directory if `revision` is None.
    393 
    394   Returns the object ID (SHA-1) of the created blob."""
    395   clang_format_cmd = [binary]
    396   if style:
    397     clang_format_cmd.extend(['-style='+style])
    398   clang_format_cmd.extend([
    399       '-lines=%s:%s' % (start_line, start_line+line_count-1)
    400       for start_line, line_count in line_ranges])
    401   if revision:
    402     clang_format_cmd.extend(['-assume-filename='+filename])
    403     git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
    404     git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
    405                                 stdout=subprocess.PIPE)
    406     git_show.stdin.close()
    407     clang_format_stdin = git_show.stdout
    408   else:
    409     clang_format_cmd.extend([filename])
    410     git_show = None
    411     clang_format_stdin = subprocess.PIPE
    412   try:
    413     clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
    414                                     stdout=subprocess.PIPE)
    415     if clang_format_stdin == subprocess.PIPE:
    416       clang_format_stdin = clang_format.stdin
    417   except OSError as e:
    418     if e.errno == errno.ENOENT:
    419       die('cannot find executable "%s"' % binary)
    420     else:
    421       raise
    422   clang_format_stdin.close()
    423   hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
    424   hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
    425                                  stdout=subprocess.PIPE)
    426   clang_format.stdout.close()
    427   stdout = hash_object.communicate()[0]
    428   if hash_object.returncode != 0:
    429     die('`%s` failed' % ' '.join(hash_object_cmd))
    430   if clang_format.wait() != 0:
    431     die('`%s` failed' % ' '.join(clang_format_cmd))
    432   if git_show and git_show.wait() != 0:
    433     die('`%s` failed' % ' '.join(git_show_cmd))
    434   return stdout.rstrip('\r\n')
    435 
    436 
    437 @contextlib.contextmanager
    438 def temporary_index_file(tree=None):
    439   """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
    440   the file afterward."""
    441   index_path = create_temporary_index(tree)
    442   old_index_path = os.environ.get('GIT_INDEX_FILE')
    443   os.environ['GIT_INDEX_FILE'] = index_path
    444   try:
    445     yield
    446   finally:
    447     if old_index_path is None:
    448       del os.environ['GIT_INDEX_FILE']
    449     else:
    450       os.environ['GIT_INDEX_FILE'] = old_index_path
    451     os.remove(index_path)
    452 
    453 
    454 def create_temporary_index(tree=None):
    455   """Create a temporary index file and return the created file's path.
    456 
    457   If `tree` is not None, use that as the tree to read in.  Otherwise, an
    458   empty index is created."""
    459   gitdir = run('git', 'rev-parse', '--git-dir')
    460   path = os.path.join(gitdir, temp_index_basename)
    461   if tree is None:
    462     tree = '--empty'
    463   run('git', 'read-tree', '--index-output='+path, tree)
    464   return path
    465 
    466 
    467 def print_diff(old_tree, new_tree):
    468   """Print the diff between the two trees to stdout."""
    469   # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
    470   # is expected to be viewed by the user, and only the former does nice things
    471   # like color and pagination.
    472   #
    473   # We also only print modified files since `new_tree` only contains the files
    474   # that were modified, so unmodified files would show as deleted without the
    475   # filter.
    476   subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
    477                          '--'])
    478 
    479 
    480 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
    481   """Apply the changes in `new_tree` to the working directory.
    482 
    483   Bails if there are local changes in those files and not `force`.  If
    484   `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
    485   changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
    486                       '--name-only', old_tree,
    487                       new_tree).rstrip('\0').split('\0')
    488   if not force:
    489     unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
    490     if unstaged_files:
    491       print >>sys.stderr, ('The following files would be modified but '
    492                            'have unstaged changes:')
    493       print >>sys.stderr, unstaged_files
    494       print >>sys.stderr, 'Please commit, stage, or stash them first.'
    495       sys.exit(2)
    496   if patch_mode:
    497     # In patch mode, we could just as well create an index from the new tree
    498     # and checkout from that, but then the user will be presented with a
    499     # message saying "Discard ... from worktree".  Instead, we use the old
    500     # tree as the index and checkout from new_tree, which gives the slightly
    501     # better message, "Apply ... to index and worktree".  This is not quite
    502     # right, since it won't be applied to the user's index, but oh well.
    503     with temporary_index_file(old_tree):
    504       subprocess.check_call(['git', 'checkout', '--patch', new_tree])
    505     index_tree = old_tree
    506   else:
    507     with temporary_index_file(new_tree):
    508       run('git', 'checkout-index', '-a', '-f')
    509   return changed_files
    510 
    511 
    512 def run(*args, **kwargs):
    513   stdin = kwargs.pop('stdin', '')
    514   verbose = kwargs.pop('verbose', True)
    515   strip = kwargs.pop('strip', True)
    516   for name in kwargs:
    517     raise TypeError("run() got an unexpected keyword argument '%s'" % name)
    518   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    519                        stdin=subprocess.PIPE)
    520   stdout, stderr = p.communicate(input=stdin)
    521   if p.returncode == 0:
    522     if stderr:
    523       if verbose:
    524         print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
    525       print >>sys.stderr, stderr.rstrip()
    526     if strip:
    527       stdout = stdout.rstrip('\r\n')
    528     return stdout
    529   if verbose:
    530     print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
    531   if stderr:
    532     print >>sys.stderr, stderr.rstrip()
    533   sys.exit(2)
    534 
    535 
    536 def die(message):
    537   print >>sys.stderr, 'error:', message
    538   sys.exit(2)
    539 
    540 
    541 if __name__ == '__main__':
    542   main()
    543