Home | History | Annotate | Download | only in clang-format
      1 #!/usr/bin/python
      2 #
      3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
      4 #
      5 #                     The LLVM Compiler Infrastructure
      6 #
      7 # This file is distributed under the University of Illinois Open Source
      8 # License. See LICENSE.TXT for details.
      9 #
     10 #===------------------------------------------------------------------------===#
     11 
     12 r"""                                                                             
     13 clang-format git integration                                                     
     14 ============================                                                     
     15                                                                                  
     16 This file provides a clang-format integration for git. Put it somewhere in your  
     17 path and ensure that it is executable. Then, "git clang-format" will invoke      
     18 clang-format on the changes in current files or a specific commit.               
     19                                                                                  
     20 For further details, run:                                                        
     21 git clang-format -h                                                              
     22                                                                                  
     23 Requires Python 2.7                                                              
     24 """               
     25 
     26 import argparse
     27 import collections
     28 import contextlib
     29 import errno
     30 import os
     31 import re
     32 import subprocess
     33 import sys
     34 
     35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
     36 
     37 desc = '''
     38 Run clang-format on all lines that differ between the working directory
     39 and <commit>, which defaults to HEAD.  Changes are only applied to the working
     40 directory.
     41 
     42 The following git-config settings set the default of the corresponding option:
     43   clangFormat.binary
     44   clangFormat.commit
     45   clangFormat.extension
     46   clangFormat.style
     47 '''
     48 
     49 # Name of the temporary index file in which save the output of clang-format.
     50 # This file is created within the .git directory.
     51 temp_index_basename = 'clang-format-index'
     52 
     53 
     54 Range = collections.namedtuple('Range', 'start, count')
     55 
     56 
     57 def main():
     58   config = load_git_config()
     59 
     60   # In order to keep '--' yet allow options after positionals, we need to
     61   # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
     62   # nargs=argparse.REMAINDER disallows options after positionals.)
     63   argv = sys.argv[1:]
     64   try:
     65     idx = argv.index('--')
     66   except ValueError:
     67     dash_dash = []
     68   else:
     69     dash_dash = argv[idx:]
     70     argv = argv[:idx]
     71 
     72   default_extensions = ','.join([
     73       # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
     74       'c', 'h',  # C
     75       'm',  # ObjC
     76       'mm',  # ObjC++
     77       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
     78       ])
     79 
     80   p = argparse.ArgumentParser(
     81     usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
     82     description=desc)
     83   p.add_argument('--binary',
     84                  default=config.get('clangformat.binary', 'clang-format'),
     85                  help='path to clang-format'),
     86   p.add_argument('--commit',
     87                  default=config.get('clangformat.commit', 'HEAD'),
     88                  help='default commit to use if none is specified'),
     89   p.add_argument('--diff', action='store_true',
     90                  help='print a diff instead of applying the changes')
     91   p.add_argument('--extensions',
     92                  default=config.get('clangformat.extensions',
     93                                     default_extensions),
     94                  help=('comma-separated list of file extensions to format, '
     95                        'excluding the period and case-insensitive')),
     96   p.add_argument('-f', '--force', action='store_true',
     97                  help='allow changes to unstaged files')
     98   p.add_argument('-p', '--patch', action='store_true',
     99                  help='select hunks interactively')
    100   p.add_argument('-q', '--quiet', action='count', default=0,
    101                  help='print less information')
    102   p.add_argument('--style',
    103                  default=config.get('clangformat.style', None),
    104                  help='passed to clang-format'),
    105   p.add_argument('-v', '--verbose', action='count', default=0,
    106                  help='print extra information')
    107   # We gather all the remaining positional arguments into 'args' since we need
    108   # to use some heuristics to determine whether or not <commit> was present.
    109   # However, to print pretty messages, we make use of metavar and help.
    110   p.add_argument('args', nargs='*', metavar='<commit>',
    111                  help='revision from which to compute the diff')
    112   p.add_argument('ignored', nargs='*', metavar='<file>...',
    113                  help='if specified, only consider differences in these files')
    114   opts = p.parse_args(argv)
    115 
    116   opts.verbose -= opts.quiet
    117   del opts.quiet
    118 
    119   commit, files = interpret_args(opts.args, dash_dash, opts.commit)
    120   changed_lines = compute_diff_and_extract_lines(commit, files)
    121   if opts.verbose >= 1:
    122     ignored_files = set(changed_lines)
    123   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
    124   if opts.verbose >= 1:
    125     ignored_files.difference_update(changed_lines)
    126     if ignored_files:
    127       print 'Ignoring changes in the following files (wrong extension):'
    128       for filename in ignored_files:
    129         print '   ', filename
    130     if changed_lines:
    131       print 'Running clang-format on the following files:'
    132       for filename in changed_lines:
    133         print '   ', filename
    134   if not changed_lines:
    135     print 'no modified files to format'
    136     return
    137   # The computed diff outputs absolute paths, so we must cd before accessing
    138   # those files.
    139   cd_to_toplevel()
    140   old_tree = create_tree_from_workdir(changed_lines)
    141   new_tree = run_clang_format_and_save_to_tree(changed_lines,
    142                                                binary=opts.binary,
    143                                                style=opts.style)
    144   if opts.verbose >= 1:
    145     print 'old tree:', old_tree
    146     print 'new tree:', new_tree
    147   if old_tree == new_tree:
    148     if opts.verbose >= 0:
    149       print 'clang-format did not modify any files'
    150   elif opts.diff:
    151     print_diff(old_tree, new_tree)
    152   else:
    153     changed_files = apply_changes(old_tree, new_tree, force=opts.force,
    154                                   patch_mode=opts.patch)
    155     if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
    156       print 'changed files:'
    157       for filename in changed_files:
    158         print '   ', filename
    159 
    160 
    161 def load_git_config(non_string_options=None):
    162   """Return the git configuration as a dictionary.
    163 
    164   All options are assumed to be strings unless in `non_string_options`, in which
    165   is a dictionary mapping option name (in lower case) to either "--bool" or
    166   "--int"."""
    167   if non_string_options is None:
    168     non_string_options = {}
    169   out = {}
    170   for entry in run('git', 'config', '--list', '--null').split('\0'):
    171     if entry:
    172       name, value = entry.split('\n', 1)
    173       if name in non_string_options:
    174         value = run('git', 'config', non_string_options[name], name)
    175       out[name] = value
    176   return out
    177 
    178 
    179 def interpret_args(args, dash_dash, default_commit):
    180   """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
    181 
    182   It is assumed that "--" and everything that follows has been removed from
    183   args and placed in `dash_dash`.
    184 
    185   If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
    186   left (if present) is taken as commit.  Otherwise, the first argument is
    187   checked if it is a commit or a file.  If commit is not given,
    188   `default_commit` is used."""
    189   if dash_dash:
    190     if len(args) == 0:
    191       commit = default_commit
    192     elif len(args) > 1:
    193       die('at most one commit allowed; %d given' % len(args))
    194     else:
    195       commit = args[0]
    196     object_type = get_object_type(commit)
    197     if object_type not in ('commit', 'tag'):
    198       if object_type is None:
    199         die("'%s' is not a commit" % commit)
    200       else:
    201         die("'%s' is a %s, but a commit was expected" % (commit, object_type))
    202     files = dash_dash[1:]
    203   elif args:
    204     if disambiguate_revision(args[0]):
    205       commit = args[0]
    206       files = args[1:]
    207     else:
    208       commit = default_commit
    209       files = args
    210   else:
    211     commit = default_commit
    212     files = []
    213   return commit, files
    214 
    215 
    216 def disambiguate_revision(value):
    217   """Returns True if `value` is a revision, False if it is a file, or dies."""
    218   # If `value` is ambiguous (neither a commit nor a file), the following
    219   # command will die with an appropriate error message.
    220   run('git', 'rev-parse', value, verbose=False)
    221   object_type = get_object_type(value)
    222   if object_type is None:
    223     return False
    224   if object_type in ('commit', 'tag'):
    225     return True
    226   die('`%s` is a %s, but a commit or filename was expected' %
    227       (value, object_type))
    228 
    229 
    230 def get_object_type(value):
    231   """Returns a string description of an object's type, or None if it is not
    232   a valid git object."""
    233   cmd = ['git', 'cat-file', '-t', value]
    234   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    235   stdout, stderr = p.communicate()
    236   if p.returncode != 0:
    237     return None
    238   return stdout.strip()
    239 
    240 
    241 def compute_diff_and_extract_lines(commit, files):
    242   """Calls compute_diff() followed by extract_lines()."""
    243   diff_process = compute_diff(commit, files)
    244   changed_lines = extract_lines(diff_process.stdout)
    245   diff_process.stdout.close()
    246   diff_process.wait()
    247   if diff_process.returncode != 0:
    248     # Assume error was already printed to stderr.
    249     sys.exit(2)
    250   return changed_lines
    251 
    252 
    253 def compute_diff(commit, files):
    254   """Return a subprocess object producing the diff from `commit`.
    255 
    256   The return value's `stdin` file object will produce a patch with the
    257   differences between the working directory and `commit`, filtered on `files`
    258   (if non-empty).  Zero context lines are used in the patch."""
    259   cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
    260   cmd.extend(files)
    261   p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    262   p.stdin.close()
    263   return p
    264 
    265 
    266 def extract_lines(patch_file):
    267   """Extract the changed lines in `patch_file`.
    268 
    269   The return value is a dictionary mapping filename to a list of (start_line,
    270   line_count) pairs.
    271 
    272   The input must have been produced with ``-U0``, meaning unidiff format with
    273   zero lines of context.  The return value is a dict mapping filename to a
    274   list of line `Range`s."""
    275   matches = {}
    276   for line in patch_file:
    277     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
    278     if match:
    279       filename = match.group(1).rstrip('\r\n')
    280     match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
    281     if match:
    282       start_line = int(match.group(1))
    283       line_count = 1
    284       if match.group(3):
    285         line_count = int(match.group(3))
    286       if line_count > 0:
    287         matches.setdefault(filename, []).append(Range(start_line, line_count))
    288   return matches
    289 
    290 
    291 def filter_by_extension(dictionary, allowed_extensions):
    292   """Delete every key in `dictionary` that doesn't have an allowed extension.
    293 
    294   `allowed_extensions` must be a collection of lowercase file extensions,
    295   excluding the period."""
    296   allowed_extensions = frozenset(allowed_extensions)
    297   for filename in dictionary.keys():
    298     base_ext = filename.rsplit('.', 1)
    299     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
    300       del dictionary[filename]
    301 
    302 
    303 def cd_to_toplevel():
    304   """Change to the top level of the git repository."""
    305   toplevel = run('git', 'rev-parse', '--show-toplevel')
    306   os.chdir(toplevel)
    307 
    308 
    309 def create_tree_from_workdir(filenames):
    310   """Create a new git tree with the given files from the working directory.
    311 
    312   Returns the object ID (SHA-1) of the created tree."""
    313   return create_tree(filenames, '--stdin')
    314 
    315 
    316 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
    317                                       style=None):
    318   """Run clang-format on each file and save the result to a git tree.
    319 
    320   Returns the object ID (SHA-1) of the created tree."""
    321   def index_info_generator():
    322     for filename, line_ranges in changed_lines.iteritems():
    323       mode = oct(os.stat(filename).st_mode)
    324       blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
    325                                      style=style)
    326       yield '%s %s\t%s' % (mode, blob_id, filename)
    327   return create_tree(index_info_generator(), '--index-info')
    328 
    329 
    330 def create_tree(input_lines, mode):
    331   """Create a tree object from the given input.
    332 
    333   If mode is '--stdin', it must be a list of filenames.  If mode is
    334   '--index-info' is must be a list of values suitable for "git update-index
    335   --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
    336   is invalid."""
    337   assert mode in ('--stdin', '--index-info')
    338   cmd = ['git', 'update-index', '--add', '-z', mode]
    339   with temporary_index_file():
    340     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    341     for line in input_lines:
    342       p.stdin.write('%s\0' % line)
    343     p.stdin.close()
    344     if p.wait() != 0:
    345       die('`%s` failed' % ' '.join(cmd))
    346     tree_id = run('git', 'write-tree')
    347     return tree_id
    348 
    349 
    350 def clang_format_to_blob(filename, line_ranges, binary='clang-format',
    351                          style=None):
    352   """Run clang-format on the given file and save the result to a git blob.
    353 
    354   Returns the object ID (SHA-1) of the created blob."""
    355   clang_format_cmd = [binary, filename]
    356   if style:
    357     clang_format_cmd.extend(['-style='+style])
    358   clang_format_cmd.extend([
    359       '-lines=%s:%s' % (start_line, start_line+line_count-1)
    360       for start_line, line_count in line_ranges])
    361   try:
    362     clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
    363                                     stdout=subprocess.PIPE)
    364   except OSError as e:
    365     if e.errno == errno.ENOENT:
    366       die('cannot find executable "%s"' % binary)
    367     else:
    368       raise
    369   clang_format.stdin.close()
    370   hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
    371   hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
    372                                  stdout=subprocess.PIPE)
    373   clang_format.stdout.close()
    374   stdout = hash_object.communicate()[0]
    375   if hash_object.returncode != 0:
    376     die('`%s` failed' % ' '.join(hash_object_cmd))
    377   if clang_format.wait() != 0:
    378     die('`%s` failed' % ' '.join(clang_format_cmd))
    379   return stdout.rstrip('\r\n')
    380 
    381 
    382 @contextlib.contextmanager
    383 def temporary_index_file(tree=None):
    384   """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
    385   the file afterward."""
    386   index_path = create_temporary_index(tree)
    387   old_index_path = os.environ.get('GIT_INDEX_FILE')
    388   os.environ['GIT_INDEX_FILE'] = index_path
    389   try:
    390     yield
    391   finally:
    392     if old_index_path is None:
    393       del os.environ['GIT_INDEX_FILE']
    394     else:
    395       os.environ['GIT_INDEX_FILE'] = old_index_path
    396     os.remove(index_path)
    397 
    398 
    399 def create_temporary_index(tree=None):
    400   """Create a temporary index file and return the created file's path.
    401 
    402   If `tree` is not None, use that as the tree to read in.  Otherwise, an
    403   empty index is created."""
    404   gitdir = run('git', 'rev-parse', '--git-dir')
    405   path = os.path.join(gitdir, temp_index_basename)
    406   if tree is None:
    407     tree = '--empty'
    408   run('git', 'read-tree', '--index-output='+path, tree)
    409   return path
    410 
    411 
    412 def print_diff(old_tree, new_tree):
    413   """Print the diff between the two trees to stdout."""
    414   # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
    415   # is expected to be viewed by the user, and only the former does nice things
    416   # like color and pagination.
    417   subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
    418 
    419 
    420 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
    421   """Apply the changes in `new_tree` to the working directory.
    422 
    423   Bails if there are local changes in those files and not `force`.  If
    424   `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
    425   changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
    426                       new_tree).rstrip('\0').split('\0')
    427   if not force:
    428     unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
    429     if unstaged_files:
    430       print >>sys.stderr, ('The following files would be modified but '
    431                            'have unstaged changes:')
    432       print >>sys.stderr, unstaged_files
    433       print >>sys.stderr, 'Please commit, stage, or stash them first.'
    434       sys.exit(2)
    435   if patch_mode:
    436     # In patch mode, we could just as well create an index from the new tree
    437     # and checkout from that, but then the user will be presented with a
    438     # message saying "Discard ... from worktree".  Instead, we use the old
    439     # tree as the index and checkout from new_tree, which gives the slightly
    440     # better message, "Apply ... to index and worktree".  This is not quite
    441     # right, since it won't be applied to the user's index, but oh well.
    442     with temporary_index_file(old_tree):
    443       subprocess.check_call(['git', 'checkout', '--patch', new_tree])
    444     index_tree = old_tree
    445   else:
    446     with temporary_index_file(new_tree):
    447       run('git', 'checkout-index', '-a', '-f')
    448   return changed_files
    449 
    450 
    451 def run(*args, **kwargs):
    452   stdin = kwargs.pop('stdin', '')
    453   verbose = kwargs.pop('verbose', True)
    454   strip = kwargs.pop('strip', True)
    455   for name in kwargs:
    456     raise TypeError("run() got an unexpected keyword argument '%s'" % name)
    457   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    458                        stdin=subprocess.PIPE)
    459   stdout, stderr = p.communicate(input=stdin)
    460   if p.returncode == 0:
    461     if stderr:
    462       if verbose:
    463         print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
    464       print >>sys.stderr, stderr.rstrip()
    465     if strip:
    466       stdout = stdout.rstrip('\r\n')
    467     return stdout
    468   if verbose:
    469     print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
    470   if stderr:
    471     print >>sys.stderr, stderr.rstrip()
    472   sys.exit(2)
    473 
    474 
    475 def die(message):
    476   print >>sys.stderr, 'error:', message
    477   sys.exit(2)
    478 
    479 
    480 if __name__ == '__main__':
    481   main()
    482