Home | History | Annotate | Download | only in bin
      1 #!/usr/bin/env python
      2 #
      3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
      4 #
      5 #                     The LLVM Compiler Infrastructure
      6 #
      7 # This file is distributed under the University of Illinois Open Source
      8 # License. See LICENSE.TXT for details.
      9 #
     10 #===------------------------------------------------------------------------===#
     11 
     12 r"""                                                                             
     13 clang-format git integration                                                     
     14 ============================                                                     
     15                                                                                  
     16 This file provides a clang-format integration for git. Put it somewhere in your  
     17 path and ensure that it is executable. Then, "git clang-format" will invoke      
     18 clang-format on the changes in current files or a specific commit.               
     19                                                                                  
     20 For further details, run:                                                        
     21 git clang-format -h                                                              
     22                                                                                  
     23 Requires Python 2.7 or Python 3                                                  
     24 """               
     25 
     26 from __future__ import print_function
     27 import argparse
     28 import collections
     29 import contextlib
     30 import errno
     31 import os
     32 import re
     33 import subprocess
     34 import sys
     35 
     36 usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
     37 
     38 desc = '''
     39 If zero or one commits are given, run clang-format on all lines that differ
     40 between the working directory and <commit>, which defaults to HEAD.  Changes are
     41 only applied to the working directory.
     42 
     43 If two commits are given (requires --diff), run clang-format on all lines in the
     44 second <commit> that differ from the first <commit>.
     45 
     46 The following git-config settings set the default of the corresponding option:
     47   clangFormat.binary
     48   clangFormat.commit
     49   clangFormat.extension
     50   clangFormat.style
     51 '''
     52 
     53 # Name of the temporary index file in which save the output of clang-format.
     54 # This file is created within the .git directory.
     55 temp_index_basename = 'clang-format-index'
     56 
     57 
     58 Range = collections.namedtuple('Range', 'start, count')
     59 
     60 
     61 def main():
     62   config = load_git_config()
     63 
     64   # In order to keep '--' yet allow options after positionals, we need to
     65   # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
     66   # nargs=argparse.REMAINDER disallows options after positionals.)
     67   argv = sys.argv[1:]
     68   try:
     69     idx = argv.index('--')
     70   except ValueError:
     71     dash_dash = []
     72   else:
     73     dash_dash = argv[idx:]
     74     argv = argv[:idx]
     75 
     76   default_extensions = ','.join([
     77       # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
     78       'c', 'h',  # C
     79       'm',  # ObjC
     80       'mm',  # ObjC++
     81       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
     82       # Other languages that clang-format supports
     83       'proto', 'protodevel',  # Protocol Buffers
     84       'java',  # Java
     85       'js',  # JavaScript
     86       'ts',  # TypeScript
     87       ])
     88 
     89   p = argparse.ArgumentParser(
     90     usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
     91     description=desc)
     92   p.add_argument('--binary',
     93                  default=config.get('clangformat.binary', 'clang-format'),
     94                  help='path to clang-format'),
     95   p.add_argument('--commit',
     96                  default=config.get('clangformat.commit', 'HEAD'),
     97                  help='default commit to use if none is specified'),
     98   p.add_argument('--diff', action='store_true',
     99                  help='print a diff instead of applying the changes')
    100   p.add_argument('--extensions',
    101                  default=config.get('clangformat.extensions',
    102                                     default_extensions),
    103                  help=('comma-separated list of file extensions to format, '
    104                        'excluding the period and case-insensitive')),
    105   p.add_argument('-f', '--force', action='store_true',
    106                  help='allow changes to unstaged files')
    107   p.add_argument('-p', '--patch', action='store_true',
    108                  help='select hunks interactively')
    109   p.add_argument('-q', '--quiet', action='count', default=0,
    110                  help='print less information')
    111   p.add_argument('--style',
    112                  default=config.get('clangformat.style', None),
    113                  help='passed to clang-format'),
    114   p.add_argument('-v', '--verbose', action='count', default=0,
    115                  help='print extra information')
    116   # We gather all the remaining positional arguments into 'args' since we need
    117   # to use some heuristics to determine whether or not <commit> was present.
    118   # However, to print pretty messages, we make use of metavar and help.
    119   p.add_argument('args', nargs='*', metavar='<commit>',
    120                  help='revision from which to compute the diff')
    121   p.add_argument('ignored', nargs='*', metavar='<file>...',
    122                  help='if specified, only consider differences in these files')
    123   opts = p.parse_args(argv)
    124 
    125   opts.verbose -= opts.quiet
    126   del opts.quiet
    127 
    128   commits, files = interpret_args(opts.args, dash_dash, opts.commit)
    129   if len(commits) > 1:
    130     if not opts.diff:
    131       die('--diff is required when two commits are given')
    132   else:
    133     if len(commits) > 2:
    134       die('at most two commits allowed; %d given' % len(commits))
    135   changed_lines = compute_diff_and_extract_lines(commits, files)
    136   if opts.verbose >= 1:
    137     ignored_files = set(changed_lines)
    138   filter_by_extension(changed_lines, opts.extensions.lower().split(','))
    139   if opts.verbose >= 1:
    140     ignored_files.difference_update(changed_lines)
    141     if ignored_files:
    142       print('Ignoring changes in the following files (wrong extension):')
    143       for filename in ignored_files:
    144         print('    %s' % filename)
    145     if changed_lines:
    146       print('Running clang-format on the following files:')
    147       for filename in changed_lines:
    148         print('    %s' % filename)
    149   if not changed_lines:
    150     print('no modified files to format')
    151     return
    152   # The computed diff outputs absolute paths, so we must cd before accessing
    153   # those files.
    154   cd_to_toplevel()
    155   if len(commits) > 1:
    156     old_tree = commits[1]
    157     new_tree = run_clang_format_and_save_to_tree(changed_lines,
    158                                                  revision=commits[1],
    159                                                  binary=opts.binary,
    160                                                  style=opts.style)
    161   else:
    162     old_tree = create_tree_from_workdir(changed_lines)
    163     new_tree = run_clang_format_and_save_to_tree(changed_lines,
    164                                                  binary=opts.binary,
    165                                                  style=opts.style)
    166   if opts.verbose >= 1:
    167     print('old tree: %s' % old_tree)
    168     print('new tree: %s' % new_tree)
    169   if old_tree == new_tree:
    170     if opts.verbose >= 0:
    171       print('clang-format did not modify any files')
    172   elif opts.diff:
    173     print_diff(old_tree, new_tree)
    174   else:
    175     changed_files = apply_changes(old_tree, new_tree, force=opts.force,
    176                                   patch_mode=opts.patch)
    177     if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
    178       print('changed files:')
    179       for filename in changed_files:
    180         print('    %s' % filename)
    181 
    182 
    183 def load_git_config(non_string_options=None):
    184   """Return the git configuration as a dictionary.
    185 
    186   All options are assumed to be strings unless in `non_string_options`, in which
    187   is a dictionary mapping option name (in lower case) to either "--bool" or
    188   "--int"."""
    189   if non_string_options is None:
    190     non_string_options = {}
    191   out = {}
    192   for entry in run('git', 'config', '--list', '--null').split('\0'):
    193     if entry:
    194       name, value = entry.split('\n', 1)
    195       if name in non_string_options:
    196         value = run('git', 'config', non_string_options[name], name)
    197       out[name] = value
    198   return out
    199 
    200 
    201 def interpret_args(args, dash_dash, default_commit):
    202   """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
    203 
    204   It is assumed that "--" and everything that follows has been removed from
    205   args and placed in `dash_dash`.
    206 
    207   If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
    208   left (if present) are taken as commits.  Otherwise, the arguments are checked
    209   from left to right if they are commits or files.  If commits are not given,
    210   a list with `default_commit` is used."""
    211   if dash_dash:
    212     if len(args) == 0:
    213       commits = [default_commit]
    214     else:
    215       commits = args
    216     for commit in commits:
    217       object_type = get_object_type(commit)
    218       if object_type not in ('commit', 'tag'):
    219         if object_type is None:
    220           die("'%s' is not a commit" % commit)
    221         else:
    222           die("'%s' is a %s, but a commit was expected" % (commit, object_type))
    223     files = dash_dash[1:]
    224   elif args:
    225     commits = []
    226     while args:
    227       if not disambiguate_revision(args[0]):
    228         break
    229       commits.append(args.pop(0))
    230     if not commits:
    231       commits = [default_commit]
    232     files = args
    233   else:
    234     commits = [default_commit]
    235     files = []
    236   return commits, files
    237 
    238 
    239 def disambiguate_revision(value):
    240   """Returns True if `value` is a revision, False if it is a file, or dies."""
    241   # If `value` is ambiguous (neither a commit nor a file), the following
    242   # command will die with an appropriate error message.
    243   run('git', 'rev-parse', value, verbose=False)
    244   object_type = get_object_type(value)
    245   if object_type is None:
    246     return False
    247   if object_type in ('commit', 'tag'):
    248     return True
    249   die('`%s` is a %s, but a commit or filename was expected' %
    250       (value, object_type))
    251 
    252 
    253 def get_object_type(value):
    254   """Returns a string description of an object's type, or None if it is not
    255   a valid git object."""
    256   cmd = ['git', 'cat-file', '-t', value]
    257   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    258   stdout, stderr = p.communicate()
    259   if p.returncode != 0:
    260     return None
    261   return convert_string(stdout.strip())
    262 
    263 
    264 def compute_diff_and_extract_lines(commits, files):
    265   """Calls compute_diff() followed by extract_lines()."""
    266   diff_process = compute_diff(commits, files)
    267   changed_lines = extract_lines(diff_process.stdout)
    268   diff_process.stdout.close()
    269   diff_process.wait()
    270   if diff_process.returncode != 0:
    271     # Assume error was already printed to stderr.
    272     sys.exit(2)
    273   return changed_lines
    274 
    275 
    276 def compute_diff(commits, files):
    277   """Return a subprocess object producing the diff from `commits`.
    278 
    279   The return value's `stdin` file object will produce a patch with the
    280   differences between the working directory and the first commit if a single
    281   one was specified, or the difference between both specified commits, filtered
    282   on `files` (if non-empty).  Zero context lines are used in the patch."""
    283   git_tool = 'diff-index'
    284   if len(commits) > 1:
    285     git_tool = 'diff-tree'
    286   cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
    287   cmd.extend(files)
    288   p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    289   p.stdin.close()
    290   return p
    291 
    292 
    293 def extract_lines(patch_file):
    294   """Extract the changed lines in `patch_file`.
    295 
    296   The return value is a dictionary mapping filename to a list of (start_line,
    297   line_count) pairs.
    298 
    299   The input must have been produced with ``-U0``, meaning unidiff format with
    300   zero lines of context.  The return value is a dict mapping filename to a
    301   list of line `Range`s."""
    302   matches = {}
    303   for line in patch_file:
    304     line = convert_string(line)
    305     match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
    306     if match:
    307       filename = match.group(1).rstrip('\r\n')
    308     match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
    309     if match:
    310       start_line = int(match.group(1))
    311       line_count = 1
    312       if match.group(3):
    313         line_count = int(match.group(3))
    314       if line_count > 0:
    315         matches.setdefault(filename, []).append(Range(start_line, line_count))
    316   return matches
    317 
    318 
    319 def filter_by_extension(dictionary, allowed_extensions):
    320   """Delete every key in `dictionary` that doesn't have an allowed extension.
    321 
    322   `allowed_extensions` must be a collection of lowercase file extensions,
    323   excluding the period."""
    324   allowed_extensions = frozenset(allowed_extensions)
    325   for filename in list(dictionary.keys()):
    326     base_ext = filename.rsplit('.', 1)
    327     if len(base_ext) == 1 and '' in allowed_extensions:
    328         continue
    329     if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
    330       del dictionary[filename]
    331 
    332 
    333 def cd_to_toplevel():
    334   """Change to the top level of the git repository."""
    335   toplevel = run('git', 'rev-parse', '--show-toplevel')
    336   os.chdir(toplevel)
    337 
    338 
    339 def create_tree_from_workdir(filenames):
    340   """Create a new git tree with the given files from the working directory.
    341 
    342   Returns the object ID (SHA-1) of the created tree."""
    343   return create_tree(filenames, '--stdin')
    344 
    345 
    346 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
    347                                       binary='clang-format', style=None):
    348   """Run clang-format on each file and save the result to a git tree.
    349 
    350   Returns the object ID (SHA-1) of the created tree."""
    351   def iteritems(container):
    352       try:
    353           return container.iteritems() # Python 2
    354       except AttributeError:
    355           return container.items() # Python 3
    356   def index_info_generator():
    357     for filename, line_ranges in iteritems(changed_lines):
    358       if revision:
    359         git_metadata_cmd = ['git', 'ls-tree',
    360                             '%s:%s' % (revision, os.path.dirname(filename)),
    361                             os.path.basename(filename)]
    362         git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
    363                                         stdout=subprocess.PIPE)
    364         stdout = git_metadata.communicate()[0]
    365         mode = oct(int(stdout.split()[0], 8))
    366       else:
    367         mode = oct(os.stat(filename).st_mode)
    368       # Adjust python3 octal format so that it matches what git expects
    369       if mode.startswith('0o'):
    370           mode = '0' + mode[2:]
    371       blob_id = clang_format_to_blob(filename, line_ranges,
    372                                      revision=revision,
    373                                      binary=binary,
    374                                      style=style)
    375       yield '%s %s\t%s' % (mode, blob_id, filename)
    376   return create_tree(index_info_generator(), '--index-info')
    377 
    378 
    379 def create_tree(input_lines, mode):
    380   """Create a tree object from the given input.
    381 
    382   If mode is '--stdin', it must be a list of filenames.  If mode is
    383   '--index-info' is must be a list of values suitable for "git update-index
    384   --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
    385   is invalid."""
    386   assert mode in ('--stdin', '--index-info')
    387   cmd = ['git', 'update-index', '--add', '-z', mode]
    388   with temporary_index_file():
    389     p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    390     for line in input_lines:
    391       p.stdin.write(to_bytes('%s\0' % line))
    392     p.stdin.close()
    393     if p.wait() != 0:
    394       die('`%s` failed' % ' '.join(cmd))
    395     tree_id = run('git', 'write-tree')
    396     return tree_id
    397 
    398 
    399 def clang_format_to_blob(filename, line_ranges, revision=None,
    400                          binary='clang-format', style=None):
    401   """Run clang-format on the given file and save the result to a git blob.
    402 
    403   Runs on the file in `revision` if not None, or on the file in the working
    404   directory if `revision` is None.
    405 
    406   Returns the object ID (SHA-1) of the created blob."""
    407   clang_format_cmd = [binary]
    408   if style:
    409     clang_format_cmd.extend(['-style='+style])
    410   clang_format_cmd.extend([
    411       '-lines=%s:%s' % (start_line, start_line+line_count-1)
    412       for start_line, line_count in line_ranges])
    413   if revision:
    414     clang_format_cmd.extend(['-assume-filename='+filename])
    415     git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
    416     git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
    417                                 stdout=subprocess.PIPE)
    418     git_show.stdin.close()
    419     clang_format_stdin = git_show.stdout
    420   else:
    421     clang_format_cmd.extend([filename])
    422     git_show = None
    423     clang_format_stdin = subprocess.PIPE
    424   try:
    425     clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
    426                                     stdout=subprocess.PIPE)
    427     if clang_format_stdin == subprocess.PIPE:
    428       clang_format_stdin = clang_format.stdin
    429   except OSError as e:
    430     if e.errno == errno.ENOENT:
    431       die('cannot find executable "%s"' % binary)
    432     else:
    433       raise
    434   clang_format_stdin.close()
    435   hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
    436   hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
    437                                  stdout=subprocess.PIPE)
    438   clang_format.stdout.close()
    439   stdout = hash_object.communicate()[0]
    440   if hash_object.returncode != 0:
    441     die('`%s` failed' % ' '.join(hash_object_cmd))
    442   if clang_format.wait() != 0:
    443     die('`%s` failed' % ' '.join(clang_format_cmd))
    444   if git_show and git_show.wait() != 0:
    445     die('`%s` failed' % ' '.join(git_show_cmd))
    446   return convert_string(stdout).rstrip('\r\n')
    447 
    448 
    449 @contextlib.contextmanager
    450 def temporary_index_file(tree=None):
    451   """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
    452   the file afterward."""
    453   index_path = create_temporary_index(tree)
    454   old_index_path = os.environ.get('GIT_INDEX_FILE')
    455   os.environ['GIT_INDEX_FILE'] = index_path
    456   try:
    457     yield
    458   finally:
    459     if old_index_path is None:
    460       del os.environ['GIT_INDEX_FILE']
    461     else:
    462       os.environ['GIT_INDEX_FILE'] = old_index_path
    463     os.remove(index_path)
    464 
    465 
    466 def create_temporary_index(tree=None):
    467   """Create a temporary index file and return the created file's path.
    468 
    469   If `tree` is not None, use that as the tree to read in.  Otherwise, an
    470   empty index is created."""
    471   gitdir = run('git', 'rev-parse', '--git-dir')
    472   path = os.path.join(gitdir, temp_index_basename)
    473   if tree is None:
    474     tree = '--empty'
    475   run('git', 'read-tree', '--index-output='+path, tree)
    476   return path
    477 
    478 
    479 def print_diff(old_tree, new_tree):
    480   """Print the diff between the two trees to stdout."""
    481   # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
    482   # is expected to be viewed by the user, and only the former does nice things
    483   # like color and pagination.
    484   #
    485   # We also only print modified files since `new_tree` only contains the files
    486   # that were modified, so unmodified files would show as deleted without the
    487   # filter.
    488   subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
    489                          '--'])
    490 
    491 
    492 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
    493   """Apply the changes in `new_tree` to the working directory.
    494 
    495   Bails if there are local changes in those files and not `force`.  If
    496   `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
    497   changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
    498                       '--name-only', old_tree,
    499                       new_tree).rstrip('\0').split('\0')
    500   if not force:
    501     unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
    502     if unstaged_files:
    503       print('The following files would be modified but '
    504                 'have unstaged changes:', file=sys.stderr)
    505       print(unstaged_files, file=sys.stderr)
    506       print('Please commit, stage, or stash them first.', file=sys.stderr)
    507       sys.exit(2)
    508   if patch_mode:
    509     # In patch mode, we could just as well create an index from the new tree
    510     # and checkout from that, but then the user will be presented with a
    511     # message saying "Discard ... from worktree".  Instead, we use the old
    512     # tree as the index and checkout from new_tree, which gives the slightly
    513     # better message, "Apply ... to index and worktree".  This is not quite
    514     # right, since it won't be applied to the user's index, but oh well.
    515     with temporary_index_file(old_tree):
    516       subprocess.check_call(['git', 'checkout', '--patch', new_tree])
    517     index_tree = old_tree
    518   else:
    519     with temporary_index_file(new_tree):
    520       run('git', 'checkout-index', '-a', '-f')
    521   return changed_files
    522 
    523 
    524 def run(*args, **kwargs):
    525   stdin = kwargs.pop('stdin', '')
    526   verbose = kwargs.pop('verbose', True)
    527   strip = kwargs.pop('strip', True)
    528   for name in kwargs:
    529     raise TypeError("run() got an unexpected keyword argument '%s'" % name)
    530   p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    531                        stdin=subprocess.PIPE)
    532   stdout, stderr = p.communicate(input=stdin)
    533 
    534   stdout = convert_string(stdout)
    535   stderr = convert_string(stderr)
    536 
    537   if p.returncode == 0:
    538     if stderr:
    539       if verbose:
    540         print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
    541       print(stderr.rstrip(), file=sys.stderr)
    542     if strip:
    543       stdout = stdout.rstrip('\r\n')
    544     return stdout
    545   if verbose:
    546     print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
    547   if stderr:
    548     print(stderr.rstrip(), file=sys.stderr)
    549   sys.exit(2)
    550 
    551 
    552 def die(message):
    553   print('error:', message, file=sys.stderr)
    554   sys.exit(2)
    555 
    556 
    557 def to_bytes(str_input):
    558     # Encode to UTF-8 to get binary data.
    559     if isinstance(str_input, bytes):
    560         return str_input
    561     return str_input.encode('utf-8')
    562 
    563 
    564 def to_string(bytes_input):
    565     if isinstance(bytes_input, str):
    566         return bytes_input
    567     return bytes_input.encode('utf-8')
    568 
    569 
    570 def convert_string(bytes_input):
    571     try:
    572         return to_string(bytes_input.decode('utf-8'))
    573     except AttributeError: # 'str' object has no attribute 'decode'.
    574         return str(bytes_input)
    575     except UnicodeError:
    576         return str(bytes_input)
    577 
    578 if __name__ == '__main__':
    579   main()
    580