Home | History | Annotate | Download | only in clang-format
      1 #!/usr/bin/python
      2 #
      3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
      4 #
      5 #                     The LLVM Compiler Infrastructure
      6 #
      7 # This file is distributed under the University of Illinois Open Source
      8 # License. See LICENSE.TXT for details.
      9 #
     10 #===------------------------------------------------------------------------===#
     11 
     12 r"""                                                                             
     13 clang-format git integration                                                     
     14 ============================                                                     
     15                                                                                  
     16 This file provides a clang-format integration for git. Put it somewhere in your  
     17 path and ensure that it is executable. Then, "git clang-format" will invoke      
     18 clang-format on the changes in current files or a specific commit.               
     19                                                                                  
     20 For further details, run:                                                        
     21 git clang-format -h                                                              
     22                                                                                  
     23 Requires Python 2.7                                                              
     24 """               
     25 
     26 import argparse
     27 import collections
     28 import contextlib
     29 import errno
     30 import os
     31 import re
     32 import subprocess
     33 import sys
     34 
     35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
     36 
     37 desc = '''
     38 Run clang-format on all lines that differ between the working directory
     39 and <commit>, which defaults to HEAD.  Changes are only applied to the working
     40 directory.
     41 
     42 The following git-config settings set the default of the corresponding option:
     43   clangFormat.binary
     44   clangFormat.commit
     45   clangFormat.extension
     46   clangFormat.style
     47 '''
     48 
     49 # Name of the temporary index file in which save the output of clang-format.
     50 # This file is created within the .git directory.
     51 temp_index_basename = 'clang-format-index'
     52 
     53 
     54 Range = collections.namedtuple('Range', 'start, count')
     55 
     56 
     57 def main():
     58   config = load_git_config()
     59 
     60   # In order to keep '--' yet allow options after positionals, we need to
     61   # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
     62   # nargs=argparse.REMAINDER disallows options after positionals.)
     63   argv = sys.argv[1:]
     64   try:
     65     idx = argv.index('--')
     66   except ValueError:
     67     dash_dash = []
     68   else:
     69     dash_dash = argv[idx:]
     70     argv = argv[:idx]
     71 
     72   default_extensions = ','.join([
     73       # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
     74       'c', 'h',  # C
     75       'm',  # ObjC
     76       'mm',  # ObjC++
     77       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
     78       # Other languages that clang-format supports
     79       'proto', 'protodevel',  # Protocol Buffers
     80       'js',  # JavaScript
     81       ])
     82 
     83   p = argparse.ArgumentParser(
     84     usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
     85     description=desc)
     86   p.add_argument('--binary',
     87                  default=config.get('clangformat.binary', 'clang-format'),
     88                  help='path to clang-format'),
     89   p.add_argument('--commit',
     90                  default=config.get('clangformat.commit', 'HEAD'),
     91                  help='default commit to use if none is specified'),
     92   p.add_argument('--diff', action='store_true',
     93                  help='print a diff instead of applying the changes')
     94   p.add_argument('--extensions',
     95                  default=config.get('clangformat.extensions',
     96                                     default_extensions),
     97                  help=('comma-separated list of file extensions to format, '
     98                        'excluding the period and case-insensitive')),
     99   p.add_argument('-f', '--force', action='store_true',
    100                  help='allow changes to unstaged files')
    101   p.add_argument('-p', '--patch', action='store_true',
    102                  help='select hunks interactively')
    103   p.add_argument('-q', '--quiet', action='count', default=0,
    104                  help='print less information')
    105   p.add_argument('--style',
    106                  default=config.get('clangformat.style', None),
    107                  help='passed to clang-format'),
    108   p.add_argument('-v', '--verbose', action='count', default=0,
    109                  help='print extra information')
    110   # We gather all the remaining positional arguments into 'args' since we need
    111   # to use some heuristics to determine whether or not <commit> was present.
    112   # However, to print pretty messages, we make use of metavar and help.
    113   p.add_argument('args', nargs='*', metavar='<commit>',
    114                  help='revision from which to compute the diff')
    115   p.add_argument('ignored', nargs='*', metavar='<file>...',
    116                  help='if specified, only consider differences in these files')
    117   opts = p.parse_args(argv)
    118 
    119   opts.verbose -= opts.quiet
    120   del opts.quiet
    121 
    122   commit, files = interpret_args(opts.args, dash_dash, opts.commit)
    123   changed_lines = compute_diff_and_extract_lines(commit, files)
    124   if opts.verbose >= 1:
    125     ignored_files = set(changed_lines)
    126   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
    127   if opts.verbose >= 1:
    128     ignored_files.difference_update(changed_lines)
    129     if ignored_files:
    130       print 'Ignoring changes in the following files (wrong extension):'
    131       for filename in ignored_files:
    132         print '   ', filename
    133     if changed_lines:
    134       print 'Running clang-format on the following files:'
    135       for filename in changed_lines:
    136         print '   ', filename
    137   if not changed_lines:
    138     print 'no modified files to format'
    139     return
    140   # The computed diff outputs absolute paths, so we must cd before accessing
    141   # those files.
    142   cd_to_toplevel()
    143   old_tree = create_tree_from_workdir(changed_lines)
    144   new_tree = run_clang_format_and_save_to_tree(changed_lines,
    145                                                binary=opts.binary,
    146                                                style=opts.style)
    147   if opts.verbose >= 1:
    148     print 'old tree:', old_tree
    149     print 'new tree:', new_tree
    150   if old_tree == new_tree:
    151     if opts.verbose >= 0:
    152       print 'clang-format did not modify any files'
    153   elif opts.diff:
    154     print_diff(old_tree, new_tree)
    155   else:
    156     changed_files = apply_changes(old_tree, new_tree, force=opts.force,
    157                                   patch_mode=opts.patch)
    158     if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
    159       print 'changed files:'
    160       for filename in changed_files:
    161         print '   ', filename
    162 
    163 
    164 def load_git_config(non_string_options=None):
    165   """Return the git configuration as a dictionary.
    166 
    167   All options are assumed to be strings unless in `non_string_options`, in which
    168   is a dictionary mapping option name (in lower case) to either "--bool" or
    169   "--int"."""
    170   if non_string_options is None:
    171     non_string_options = {}
    172   out = {}
    173   for entry in run('git', 'config', '--list', '--null').split('\0'):
    174     if entry:
    175       name, value = entry.split('\n', 1)
    176       if name in non_string_options:
    177         value = run('git', 'config', non_string_options[name], name)
    178       out[name] = value
    179   return out
    180 
    181 
    182 def interpret_args(args, dash_dash, default_commit):
    183   """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
    184 
    185   It is assumed that "--" and everything that follows has been removed from
    186   args and placed in `dash_dash`.
    187 
    188   If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
    189   left (if present) is taken as commit.  Otherwise, the first argument is
    190   checked if it is a commit or a file.  If commit is not given,
    191   `default_commit` is used."""
    192   if dash_dash:
    193     if len(args) == 0:
    194       commit = default_commit
    195     elif len(args) > 1:
    196       die('at most one commit allowed; %d given' % len(args))
    197     else:
    198       commit = args[0]
    199     object_type = get_object_type(commit)
    200     if object_type not in ('commit', 'tag'):
    201       if object_type is None:
    202         die("'%s' is not a commit" % commit)
    203       else:
    204         die("'%s' is a %s, but a commit was expected" % (commit, object_type))
    205     files = dash_dash[1:]
    206   elif args:
    207     if disambiguate_revision(args[0]):
    208       commit = args[0]
    209       files = args[1:]
    210     else:
    211       commit = default_commit
    212       files = args
    213   else:
    214     commit = default_commit
    215     files = []
    216   return commit, files
    217 
    218 
    219 def disambiguate_revision(value):
    220   """Returns True if `value` is a revision, False if it is a file, or dies."""
    221   # If `value` is ambiguous (neither a commit nor a file), the following
    222   # command will die with an appropriate error message.
    223   run('git', 'rev-parse', value, verbose=False)
    224   object_type = get_object_type(value)
    225   if object_type is None:
    226     return False
    227   if object_type in ('commit', 'tag'):
    228     return True
    229   die('`%s` is a %s, but a commit or filename was expected' %
    230       (value, object_type))
    231 
    232 
    233 def get_object_type(value):
    234   """Returns a string description of an object's type, or None if it is not
    235   a valid git object."""
    236   cmd = ['git', 'cat-file', '-t', value]
    237   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    238   stdout, stderr = p.communicate()
    239   if p.returncode != 0:
    240     return None
    241   return stdout.strip()
    242 
    243 
    244 def compute_diff_and_extract_lines(commit, files):
    245   """Calls compute_diff() followed by extract_lines()."""
    246   diff_process = compute_diff(commit, files)
    247   changed_lines = extract_lines(diff_process.stdout)
    248   diff_process.stdout.close()
    249   diff_process.wait()
    250   if diff_process.returncode != 0:
    251     # Assume error was already printed to stderr.
    252     sys.exit(2)
    253   return changed_lines
    254 
    255 
    256 def compute_diff(commit, files):
    257   """Return a subprocess object producing the diff from `commit`.
    258 
    259   The return value's `stdin` file object will produce a patch with the
    260   differences between the working directory and `commit`, filtered on `files`
    261   (if non-empty).  Zero context lines are used in the patch."""
    262   cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
    263   cmd.extend(files)
    264   p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    265   p.stdin.close()
    266   return p
    267 
    268 
    269 def extract_lines(patch_file):
    270   """Extract the changed lines in `patch_file`.
    271 
    272   The return value is a dictionary mapping filename to a list of (start_line,
    273   line_count) pairs.
    274 
    275   The input must have been produced with ``-U0``, meaning unidiff format with
    276   zero lines of context.  The return value is a dict mapping filename to a
    277   list of line `Range`s."""
    278   matches = {}
    279   for line in patch_file:
    280     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
    281     if match:
    282       filename = match.group(1).rstrip('\r\n')
    283     match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
    284     if match:
    285       start_line = int(match.group(1))
    286       line_count = 1
    287       if match.group(3):
    288         line_count = int(match.group(3))
    289       if line_count > 0:
    290         matches.setdefault(filename, []).append(Range(start_line, line_count))
    291   return matches
    292 
    293 
    294 def filter_by_extension(dictionary, allowed_extensions):
    295   """Delete every key in `dictionary` that doesn't have an allowed extension.
    296 
    297   `allowed_extensions` must be a collection of lowercase file extensions,
    298   excluding the period."""
    299   allowed_extensions = frozenset(allowed_extensions)
    300   for filename in dictionary.keys():
    301     base_ext = filename.rsplit('.', 1)
    302     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
    303       del dictionary[filename]
    304 
    305 
    306 def cd_to_toplevel():
    307   """Change to the top level of the git repository."""
    308   toplevel = run('git', 'rev-parse', '--show-toplevel')
    309   os.chdir(toplevel)
    310 
    311 
    312 def create_tree_from_workdir(filenames):
    313   """Create a new git tree with the given files from the working directory.
    314 
    315   Returns the object ID (SHA-1) of the created tree."""
    316   return create_tree(filenames, '--stdin')
    317 
    318 
    319 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
    320                                       style=None):
    321   """Run clang-format on each file and save the result to a git tree.
    322 
    323   Returns the object ID (SHA-1) of the created tree."""
    324   def index_info_generator():
    325     for filename, line_ranges in changed_lines.iteritems():
    326       mode = oct(os.stat(filename).st_mode)
    327       blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
    328                                      style=style)
    329       yield '%s %s\t%s' % (mode, blob_id, filename)
    330   return create_tree(index_info_generator(), '--index-info')
    331 
    332 
    333 def create_tree(input_lines, mode):
    334   """Create a tree object from the given input.
    335 
    336   If mode is '--stdin', it must be a list of filenames.  If mode is
    337   '--index-info' is must be a list of values suitable for "git update-index
    338   --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
    339   is invalid."""
    340   assert mode in ('--stdin', '--index-info')
    341   cmd = ['git', 'update-index', '--add', '-z', mode]
    342   with temporary_index_file():
    343     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    344     for line in input_lines:
    345       p.stdin.write('%s\0' % line)
    346     p.stdin.close()
    347     if p.wait() != 0:
    348       die('`%s` failed' % ' '.join(cmd))
    349     tree_id = run('git', 'write-tree')
    350     return tree_id
    351 
    352 
    353 def clang_format_to_blob(filename, line_ranges, binary='clang-format',
    354                          style=None):
    355   """Run clang-format on the given file and save the result to a git blob.
    356 
    357   Returns the object ID (SHA-1) of the created blob."""
    358   clang_format_cmd = [binary, filename]
    359   if style:
    360     clang_format_cmd.extend(['-style='+style])
    361   clang_format_cmd.extend([
    362       '-lines=%s:%s' % (start_line, start_line+line_count-1)
    363       for start_line, line_count in line_ranges])
    364   try:
    365     clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
    366                                     stdout=subprocess.PIPE)
    367   except OSError as e:
    368     if e.errno == errno.ENOENT:
    369       die('cannot find executable "%s"' % binary)
    370     else:
    371       raise
    372   clang_format.stdin.close()
    373   hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
    374   hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
    375                                  stdout=subprocess.PIPE)
    376   clang_format.stdout.close()
    377   stdout = hash_object.communicate()[0]
    378   if hash_object.returncode != 0:
    379     die('`%s` failed' % ' '.join(hash_object_cmd))
    380   if clang_format.wait() != 0:
    381     die('`%s` failed' % ' '.join(clang_format_cmd))
    382   return stdout.rstrip('\r\n')
    383 
    384 
    385 @contextlib.contextmanager
    386 def temporary_index_file(tree=None):
    387   """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
    388   the file afterward."""
    389   index_path = create_temporary_index(tree)
    390   old_index_path = os.environ.get('GIT_INDEX_FILE')
    391   os.environ['GIT_INDEX_FILE'] = index_path
    392   try:
    393     yield
    394   finally:
    395     if old_index_path is None:
    396       del os.environ['GIT_INDEX_FILE']
    397     else:
    398       os.environ['GIT_INDEX_FILE'] = old_index_path
    399     os.remove(index_path)
    400 
    401 
    402 def create_temporary_index(tree=None):
    403   """Create a temporary index file and return the created file's path.
    404 
    405   If `tree` is not None, use that as the tree to read in.  Otherwise, an
    406   empty index is created."""
    407   gitdir = run('git', 'rev-parse', '--git-dir')
    408   path = os.path.join(gitdir, temp_index_basename)
    409   if tree is None:
    410     tree = '--empty'
    411   run('git', 'read-tree', '--index-output='+path, tree)
    412   return path
    413 
    414 
    415 def print_diff(old_tree, new_tree):
    416   """Print the diff between the two trees to stdout."""
    417   # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
    418   # is expected to be viewed by the user, and only the former does nice things
    419   # like color and pagination.
    420   subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
    421 
    422 
    423 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
    424   """Apply the changes in `new_tree` to the working directory.
    425 
    426   Bails if there are local changes in those files and not `force`.  If
    427   `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
    428   changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
    429                       new_tree).rstrip('\0').split('\0')
    430   if not force:
    431     unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
    432     if unstaged_files:
    433       print >>sys.stderr, ('The following files would be modified but '
    434                            'have unstaged changes:')
    435       print >>sys.stderr, unstaged_files
    436       print >>sys.stderr, 'Please commit, stage, or stash them first.'
    437       sys.exit(2)
    438   if patch_mode:
    439     # In patch mode, we could just as well create an index from the new tree
    440     # and checkout from that, but then the user will be presented with a
    441     # message saying "Discard ... from worktree".  Instead, we use the old
    442     # tree as the index and checkout from new_tree, which gives the slightly
    443     # better message, "Apply ... to index and worktree".  This is not quite
    444     # right, since it won't be applied to the user's index, but oh well.
    445     with temporary_index_file(old_tree):
    446       subprocess.check_call(['git', 'checkout', '--patch', new_tree])
    447     index_tree = old_tree
    448   else:
    449     with temporary_index_file(new_tree):
    450       run('git', 'checkout-index', '-a', '-f')
    451   return changed_files
    452 
    453 
    454 def run(*args, **kwargs):
    455   stdin = kwargs.pop('stdin', '')
    456   verbose = kwargs.pop('verbose', True)
    457   strip = kwargs.pop('strip', True)
    458   for name in kwargs:
    459     raise TypeError("run() got an unexpected keyword argument '%s'" % name)
    460   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    461                        stdin=subprocess.PIPE)
    462   stdout, stderr = p.communicate(input=stdin)
    463   if p.returncode == 0:
    464     if stderr:
    465       if verbose:
    466         print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
    467       print >>sys.stderr, stderr.rstrip()
    468     if strip:
    469       stdout = stdout.rstrip('\r\n')
    470     return stdout
    471   if verbose:
    472     print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
    473   if stderr:
    474     print >>sys.stderr, stderr.rstrip()
    475   sys.exit(2)
    476 
    477 
    478 def die(message):
    479   print >>sys.stderr, 'error:', message
    480   sys.exit(2)
    481 
    482 
    483 if __name__ == '__main__':
    484   main()
    485