Home | History | Annotate | Download | only in repo_diff
      1 """List downstream commits that are not upstream and are visible in the diff.
      2 
      3 Only include changes that are visible when you diff
      4 the downstream and usptream branches.
      5 
      6 This will naturally exclude changes that already landed upstream
      7 in some form but were not merged or cherry picked.
      8 
      9 This will also exclude changes that were added then reverted downstream.
     10 
     11 """
     12 
     13 from __future__ import absolute_import
     14 from __future__ import division
     15 from __future__ import print_function
     16 import argparse
     17 import os
     18 import subprocess
     19 
     20 
     21 def git(args):
     22   """Git command.
     23 
     24   Args:
     25     args: A list of arguments to be sent to the git command.
     26 
     27   Returns:
     28     The output of the git command.
     29   """
     30 
     31   command = ['git']
     32   command.extend(args)
     33   with open(os.devnull, 'w') as devull:
     34     return subprocess.check_output(command, stderr=devull)
     35 
     36 
     37 class CommitFinder(object):
     38 
     39   def __init__(self, working_dir, upstream, downstream):
     40     self.working_dir = working_dir
     41     self.upstream = upstream
     42     self.downstream = downstream
     43 
     44   def __call__(self, filename):
     45     insertion_commits = set()
     46 
     47     if os.path.isfile(os.path.join(self.working_dir, filename)):
     48       blame_output = git(['-C', self.working_dir, 'blame', '-l',
     49                           '%s..%s' % (self.upstream, self.downstream),
     50                           '--', filename])
     51       for line in blame_output.splitlines():
     52         # The commit is the first field of a line
     53         blame_fields = line.split(' ', 1)
     54         # Some lines can be empty
     55         if blame_fields:
     56           insertion_commits.add(blame_fields[0])
     57 
     58     return insertion_commits
     59 
     60 
     61 def find_insertion_commits(upstream, downstream, working_dir):
     62   """Finds all commits that insert lines on top of the upstream baseline.
     63 
     64   Args:
     65     upstream: Upstream branch to be used as a baseline.
     66     downstream: Downstream branch to search for commits missing upstream.
     67     working_dir: Run as if git was started in this directory.
     68 
     69   Returns:
     70     A set of commits that insert lines on top of the upstream baseline.
     71   """
     72 
     73   insertion_commits = set()
     74 
     75   diff_files = git(['-C', working_dir, 'diff',
     76                     '--name-only',
     77                     '--diff-filter=d',
     78                     upstream,
     79                     downstream])
     80   diff_files = diff_files.splitlines()
     81 
     82   finder = CommitFinder(working_dir, upstream, downstream)
     83   commits_per_file = [finder(filename) for filename in diff_files]
     84 
     85   for commits in commits_per_file:
     86     insertion_commits.update(commits)
     87 
     88   return insertion_commits
     89 
     90 
     91 def find(upstream, downstream, working_dir):
     92   """Finds downstream commits that are not upstream and are visible in the diff.
     93 
     94   Args:
     95     upstream: Upstream branch to be used as a baseline.
     96     downstream: Downstream branch to search for commits missing upstream.
     97     working_dir: Run as if git was started in thid directory.
     98 
     99   Returns:
    100     A set of downstream commits missing upstream.
    101   """
    102 
    103   commits_not_upstreamed = set()
    104   revlist_output = git(['-C', working_dir, 'rev-list', '--no-merges',
    105                         '%s..%s' % (upstream, downstream)])
    106   downstream_only_commits = set(revlist_output.splitlines())
    107   insertion_commits = set()
    108 
    109   # If there are no downstream-only commits there's no point in
    110   # futher filtering
    111   if downstream_only_commits:
    112     insertion_commits = find_insertion_commits(upstream, downstream,
    113                                                working_dir)
    114 
    115   # The commits that are only downstream and are visible in 'git blame' are the
    116   # ones that insert lines in the diff between upstream and downstream.
    117   commits_not_upstreamed.update(
    118       downstream_only_commits.intersection(insertion_commits))
    119 
    120   # TODO(diegowilson) add commits that deleted lines
    121 
    122   return commits_not_upstreamed
    123 
    124 
    125 def main():
    126   parser = argparse.ArgumentParser(
    127       description='Finds commits yet to be applied upstream.')
    128   parser.add_argument(
    129       'upstream',
    130       help='Upstream branch to be used as a baseline.',
    131   )
    132   parser.add_argument(
    133       'downstream',
    134       help='Downstream branch to search for commits missing upstream.',
    135   )
    136   parser.add_argument(
    137       '-C',
    138       '--working_directory',
    139       help='Run as if git was started in thid directory',
    140       default='.',)
    141   args = parser.parse_args()
    142   upstream = args.upstream
    143   downstream = args.downstream
    144   working_dir = os.path.abspath(args.working_directory)
    145 
    146   print('\n'.join(find(upstream, downstream, working_dir)))
    147 
    148 
    149 if __name__ == '__main__':
    150   main()
    151