Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python2
      2 #
      3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
      4 #
      5 #                     The LLVM Compiler Infrastructure
      6 #
      7 # This file is distributed under the University of Illinois Open Source
      8 # License. See LICENSE.TXT for details.
      9 #
     10 #===------------------------------------------------------------------------===#
     11 
     12 r"""                                                                             
     13 clang-format git integration                                                     
     14 ============================                                                     
     15                                                                                  
     16 This file provides a clang-format integration for git. Put it somewhere in your  
     17 path and ensure that it is executable. Then, "git clang-format" will invoke      
     18 clang-format on the changes in current files or a specific commit.               
     19                                                                                  
     20 For further details, run:                                                        
     21 git clang-format -h                                                              
     22                                                                                  
     23 Requires Python 2.7                                                              
     24 """               
     25 
     26 import argparse
     27 import collections
     28 import contextlib
     29 import errno
     30 import os
     31 import re
     32 import subprocess
     33 import sys
     34 
     35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
     36 
     37 desc = '''
     38 Run clang-format on all lines that differ between the working directory
     39 and <commit>, which defaults to HEAD.  Changes are only applied to the working
     40 directory.
     41 
     42 The following git-config settings set the default of the corresponding option:
     43   clangFormat.binary
     44   clangFormat.commit
     45   clangFormat.extension
     46   clangFormat.style
     47 '''
     48 
     49 # Name of the temporary index file in which save the output of clang-format.
     50 # This file is created within the .git directory.
     51 temp_index_basename = 'clang-format-index'
     52 
     53 
     54 Range = collections.namedtuple('Range', 'start, count')
     55 
     56 
     57 def main():
     58   config = load_git_config()
     59 
     60   # In order to keep '--' yet allow options after positionals, we need to
     61   # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
     62   # nargs=argparse.REMAINDER disallows options after positionals.)
     63   argv = sys.argv[1:]
     64   try:
     65     idx = argv.index('--')
     66   except ValueError:
     67     dash_dash = []
     68   else:
     69     dash_dash = argv[idx:]
     70     argv = argv[:idx]
     71 
     72   default_extensions = ','.join([
     73       # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
     74       'c', 'h',  # C
     75       'm',  # ObjC
     76       'mm',  # ObjC++
     77       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
     78       # Other languages that clang-format supports
     79       'proto', 'protodevel',  # Protocol Buffers
     80       'js',  # JavaScript
     81       'ts',  # TypeScript
     82       ])
     83 
     84   p = argparse.ArgumentParser(
     85     usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
     86     description=desc)
     87   p.add_argument('--binary',
     88                  default=config.get('clangformat.binary', 'clang-format'),
     89                  help='path to clang-format'),
     90   p.add_argument('--commit',
     91                  default=config.get('clangformat.commit', 'HEAD'),
     92                  help='default commit to use if none is specified'),
     93   p.add_argument('--diff', action='store_true',
     94                  help='print a diff instead of applying the changes')
     95   p.add_argument('--extensions',
     96                  default=config.get('clangformat.extensions',
     97                                     default_extensions),
     98                  help=('comma-separated list of file extensions to format, '
     99                        'excluding the period and case-insensitive')),
    100   p.add_argument('-f', '--force', action='store_true',
    101                  help='allow changes to unstaged files')
    102   p.add_argument('-p', '--patch', action='store_true',
    103                  help='select hunks interactively')
    104   p.add_argument('-q', '--quiet', action='count', default=0,
    105                  help='print less information')
    106   p.add_argument('--style',
    107                  default=config.get('clangformat.style', None),
    108                  help='passed to clang-format'),
    109   p.add_argument('-v', '--verbose', action='count', default=0,
    110                  help='print extra information')
    111   # We gather all the remaining positional arguments into 'args' since we need
    112   # to use some heuristics to determine whether or not <commit> was present.
    113   # However, to print pretty messages, we make use of metavar and help.
    114   p.add_argument('args', nargs='*', metavar='<commit>',
    115                  help='revision from which to compute the diff')
    116   p.add_argument('ignored', nargs='*', metavar='<file>...',
    117                  help='if specified, only consider differences in these files')
    118   opts = p.parse_args(argv)
    119 
    120   opts.verbose -= opts.quiet
    121   del opts.quiet
    122 
    123   commit, files = interpret_args(opts.args, dash_dash, opts.commit)
    124   changed_lines = compute_diff_and_extract_lines(commit, files)
    125   if opts.verbose >= 1:
    126     ignored_files = set(changed_lines)
    127   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
    128   if opts.verbose >= 1:
    129     ignored_files.difference_update(changed_lines)
    130     if ignored_files:
    131       print 'Ignoring changes in the following files (wrong extension):'
    132       for filename in ignored_files:
    133         print '   ', filename
    134     if changed_lines:
    135       print 'Running clang-format on the following files:'
    136       for filename in changed_lines:
    137         print '   ', filename
    138     else:
    139       print 'no modified files to format'
    140       return
    141   # The computed diff outputs absolute paths, so we must cd before accessing
    142   # those files.
    143   cd_to_toplevel()
    144   old_tree = create_tree_from_workdir(changed_lines)
    145   new_tree = run_clang_format_and_save_to_tree(changed_lines,
    146                                                binary=opts.binary,
    147                                                style=opts.style)
    148   if opts.verbose >= 1:
    149     print 'old tree:', old_tree
    150     print 'new tree:', new_tree
    151   if old_tree == new_tree:
    152     if opts.verbose >= 0:
    153       print 'clang-format did not modify any files'
    154   elif opts.diff:
    155     print_diff(old_tree, new_tree)
    156   else:
    157     changed_files = apply_changes(old_tree, new_tree, force=opts.force,
    158                                   patch_mode=opts.patch)
    159     if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
    160       print 'changed files:'
    161       for filename in changed_files:
    162         print '   ', filename
    163 
    164 
    165 def load_git_config(non_string_options=None):
    166   """Return the git configuration as a dictionary.
    167 
    168   All options are assumed to be strings unless in `non_string_options`, in which
    169   is a dictionary mapping option name (in lower case) to either "--bool" or
    170   "--int"."""
    171   if non_string_options is None:
    172     non_string_options = {}
    173   out = {}
    174   for entry in run('git', 'config', '--list', '--null').split('\0'):
    175     if entry:
    176       name, value = entry.split('\n', 1)
    177       if name in non_string_options:
    178         value = run('git', 'config', non_string_options[name], name)
    179       out[name] = value
    180   return out
    181 
    182 
    183 def interpret_args(args, dash_dash, default_commit):
    184   """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
    185 
    186   It is assumed that "--" and everything that follows has been removed from
    187   args and placed in `dash_dash`.
    188 
    189   If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
    190   left (if present) is taken as commit.  Otherwise, the first argument is
    191   checked if it is a commit or a file.  If commit is not given,
    192   `default_commit` is used."""
    193   if dash_dash:
    194     if len(args) == 0:
    195       commit = default_commit
    196     elif len(args) > 1:
    197       die('at most one commit allowed; %d given' % len(args))
    198     else:
    199       commit = args[0]
    200     object_type = get_object_type(commit)
    201     if object_type not in ('commit', 'tag'):
    202       if object_type is None:
    203         die("'%s' is not a commit" % commit)
    204       else:
    205         die("'%s' is a %s, but a commit was expected" % (commit, object_type))
    206     files = dash_dash[1:]
    207   elif args:
    208     if disambiguate_revision(args[0]):
    209       commit = args[0]
    210       files = args[1:]
    211     else:
    212       commit = default_commit
    213       files = args
    214   else:
    215     commit = default_commit
    216     files = []
    217   return commit, files
    218 
    219 
    220 def disambiguate_revision(value):
    221   """Returns True if `value` is a revision, False if it is a file, or dies."""
    222   # If `value` is ambiguous (neither a commit nor a file), the following
    223   # command will die with an appropriate error message.
    224   run('git', 'rev-parse', value, verbose=False)
    225   object_type = get_object_type(value)
    226   if object_type is None:
    227     return False
    228   if object_type in ('commit', 'tag'):
    229     return True
    230   die('`%s` is a %s, but a commit or filename was expected' %
    231       (value, object_type))
    232 
    233 
    234 def get_object_type(value):
    235   """Returns a string description of an object's type, or None if it is not
    236   a valid git object."""
    237   cmd = ['git', 'cat-file', '-t', value]
    238   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    239   stdout, stderr = p.communicate()
    240   if p.returncode != 0:
    241     return None
    242   return stdout.strip()
    243 
    244 
    245 def compute_diff_and_extract_lines(commit, files):
    246   """Calls compute_diff() followed by extract_lines()."""
    247   diff_process = compute_diff(commit, files)
    248   changed_lines = extract_lines(diff_process.stdout)
    249   diff_process.stdout.close()
    250   diff_process.wait()
    251   if diff_process.returncode != 0:
    252     # Assume error was already printed to stderr.
    253     sys.exit(2)
    254   return changed_lines
    255 
    256 
    257 def compute_diff(commit, files):
    258   """Return a subprocess object producing the diff from `commit`.
    259 
    260   The return value's `stdin` file object will produce a patch with the
    261   differences between the working directory and `commit`, filtered on `files`
    262   (if non-empty).  Zero context lines are used in the patch."""
    263   cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
    264   cmd.extend(files)
    265   p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    266   p.stdin.close()
    267   return p
    268 
    269 
    270 def extract_lines(patch_file):
    271   """Extract the changed lines in `patch_file`.
    272 
    273   The return value is a dictionary mapping filename to a list of (start_line,
    274   line_count) pairs.
    275 
    276   The input must have been produced with ``-U0``, meaning unidiff format with
    277   zero lines of context.  The return value is a dict mapping filename to a
    278   list of line `Range`s."""
    279   matches = {}
    280   for line in patch_file:
    281     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
    282     if match:
    283       filename = match.group(1).rstrip('\r\n')
    284     match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
    285     if match:
    286       start_line = int(match.group(1))
    287       line_count = 1
    288       if match.group(3):
    289         line_count = int(match.group(3))
    290       if line_count > 0:
    291         matches.setdefault(filename, []).append(Range(start_line, line_count))
    292   return matches
    293 
    294 
    295 def filter_by_extension(dictionary, allowed_extensions):
    296   """Delete every key in `dictionary` that doesn't have an allowed extension.
    297 
    298   `allowed_extensions` must be a collection of lowercase file extensions,
    299   excluding the period."""
    300   allowed_extensions = frozenset(allowed_extensions)
    301   for filename in dictionary.keys():
    302     base_ext = filename.rsplit('.', 1)
    303     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
    304       del dictionary[filename]
    305 
    306 
    307 def cd_to_toplevel():
    308   """Change to the top level of the git repository."""
    309   toplevel = run('git', 'rev-parse', '--show-toplevel')
    310   os.chdir(toplevel)
    311 
    312 
    313 def create_tree_from_workdir(filenames):
    314   """Create a new git tree with the given files from the working directory.
    315 
    316   Returns the object ID (SHA-1) of the created tree."""
    317   return create_tree(filenames, '--stdin')
    318 
    319 
    320 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
    321                                       style=None):
    322   """Run clang-format on each file and save the result to a git tree.
    323 
    324   Returns the object ID (SHA-1) of the created tree."""
    325   def index_info_generator():
    326     for filename, line_ranges in changed_lines.iteritems():
    327       mode = oct(os.stat(filename).st_mode)
    328       blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
    329                                      style=style)
    330       yield '%s %s\t%s' % (mode, blob_id, filename)
    331   return create_tree(index_info_generator(), '--index-info')
    332 
    333 
    334 def create_tree(input_lines, mode):
    335   """Create a tree object from the given input.
    336 
    337   If mode is '--stdin', it must be a list of filenames.  If mode is
    338   '--index-info' is must be a list of values suitable for "git update-index
    339   --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
    340   is invalid."""
    341   assert mode in ('--stdin', '--index-info')
    342   cmd = ['git', 'update-index', '--add', '-z', mode]
    343   with temporary_index_file():
    344     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    345     for line in input_lines:
    346       p.stdin.write('%s\0' % line)
    347     p.stdin.close()
    348     if p.wait() != 0:
    349       die('`%s` failed' % ' '.join(cmd))
    350     tree_id = run('git', 'write-tree')
    351     return tree_id
    352 
    353 
    354 def clang_format_to_blob(filename, line_ranges, binary='clang-format',
    355                          style=None):
    356   """Run clang-format on the given file and save the result to a git blob.
    357 
    358   Returns the object ID (SHA-1) of the created blob."""
    359   clang_format_cmd = [binary, filename]
    360   if style:
    361     clang_format_cmd.extend(['-style='+style])
    362   clang_format_cmd.extend([
    363       '-lines=%s:%s' % (start_line, start_line+line_count-1)
    364       for start_line, line_count in line_ranges])
    365   try:
    366     clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
    367                                     stdout=subprocess.PIPE)
    368   except OSError as e:
    369     if e.errno == errno.ENOENT:
    370       die('cannot find executable "%s"' % binary)
    371     else:
    372       raise
    373   clang_format.stdin.close()
    374   hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
    375   hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
    376                                  stdout=subprocess.PIPE)
    377   clang_format.stdout.close()
    378   stdout = hash_object.communicate()[0]
    379   if hash_object.returncode != 0:
    380     die('`%s` failed' % ' '.join(hash_object_cmd))
    381   if clang_format.wait() != 0:
    382     die('`%s` failed' % ' '.join(clang_format_cmd))
    383   return stdout.rstrip('\r\n')
    384 
    385 
    386 @contextlib.contextmanager
    387 def temporary_index_file(tree=None):
    388   """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
    389   the file afterward."""
    390   index_path = create_temporary_index(tree)
    391   old_index_path = os.environ.get('GIT_INDEX_FILE')
    392   os.environ['GIT_INDEX_FILE'] = index_path
    393   try:
    394     yield
    395   finally:
    396     if old_index_path is None:
    397       del os.environ['GIT_INDEX_FILE']
    398     else:
    399       os.environ['GIT_INDEX_FILE'] = old_index_path
    400     os.remove(index_path)
    401 
    402 
    403 def create_temporary_index(tree=None):
    404   """Create a temporary index file and return the created file's path.
    405 
    406   If `tree` is not None, use that as the tree to read in.  Otherwise, an
    407   empty index is created."""
    408   gitdir = run('git', 'rev-parse', '--git-dir')
    409   path = os.path.join(gitdir, temp_index_basename)
    410   if tree is None:
    411     tree = '--empty'
    412   run('git', 'read-tree', '--index-output='+path, tree)
    413   return path
    414 
    415 
    416 def print_diff(old_tree, new_tree):
    417   """Print the diff between the two trees to stdout."""
    418   # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
    419   # is expected to be viewed by the user, and only the former does nice things
    420   # like color and pagination.
    421   subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
    422 
    423 
    424 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
    425   """Apply the changes in `new_tree` to the working directory.
    426 
    427   Bails if there are local changes in those files and not `force`.  If
    428   `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
    429   changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
    430                       new_tree).rstrip('\0').split('\0')
    431   if not force:
    432     unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
    433     if unstaged_files:
    434       print >>sys.stderr, ('The following files would be modified but '
    435                            'have unstaged changes:')
    436       print >>sys.stderr, unstaged_files
    437       print >>sys.stderr, 'Please commit, stage, or stash them first.'
    438       sys.exit(2)
    439   if patch_mode:
    440     # In patch mode, we could just as well create an index from the new tree
    441     # and checkout from that, but then the user will be presented with a
    442     # message saying "Discard ... from worktree".  Instead, we use the old
    443     # tree as the index and checkout from new_tree, which gives the slightly
    444     # better message, "Apply ... to index and worktree".  This is not quite
    445     # right, since it won't be applied to the user's index, but oh well.
    446     with temporary_index_file(old_tree):
    447       subprocess.check_call(['git', 'checkout', '--patch', new_tree])
    448     index_tree = old_tree
    449   else:
    450     with temporary_index_file(new_tree):
    451       run('git', 'checkout-index', '-a', '-f')
    452   return changed_files
    453 
    454 
    455 def run(*args, **kwargs):
    456   stdin = kwargs.pop('stdin', '')
    457   verbose = kwargs.pop('verbose', True)
    458   strip = kwargs.pop('strip', True)
    459   for name in kwargs:
    460     raise TypeError("run() got an unexpected keyword argument '%s'" % name)
    461   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    462                        stdin=subprocess.PIPE)
    463   stdout, stderr = p.communicate(input=stdin)
    464   if p.returncode == 0:
    465     if stderr:
    466       if verbose:
    467         print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
    468       print >>sys.stderr, stderr.rstrip()
    469     if strip:
    470       stdout = stdout.rstrip('\r\n')
    471     return stdout
    472   if verbose:
    473     print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
    474   if stderr:
    475     print >>sys.stderr, stderr.rstrip()
    476   sys.exit(2)
    477 
    478 
    479 def die(message):
    480   print >>sys.stderr, 'error:', message
    481   sys.exit(2)
    482 
    483 
    484 if __name__ == '__main__':
    485   main()
    486