Home | History | Annotate | Download | only in checkout
      1 # Copyright (C) 2009 Google Inc. All rights reserved.
      2 #
      3 # Redistribution and use in source and binary forms, with or without
      4 # modification, are permitted provided that the following conditions are
      5 # met:
      6 #
      7 #    * Redistributions of source code must retain the above copyright
      8 # notice, this list of conditions and the following disclaimer.
      9 #    * Redistributions in binary form must reproduce the above
     10 # copyright notice, this list of conditions and the following disclaimer
     11 # in the documentation and/or other materials provided with the
     12 # distribution.
     13 #    * Neither the name of Google Inc. nor the names of its
     14 # contributors may be used to endorse or promote products derived from
     15 # this software without specific prior written permission.
     16 #
     17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 """WebKit's Python module for interacting with patches."""
     30 
     31 import logging
     32 import re
     33 
     34 _log = logging.getLogger(__name__)
     35 
     36 
     37 # FIXME: This is broken. We should compile our regexps up-front
     38 # instead of using a custom cache.
     39 _regexp_compile_cache = {}
     40 
     41 
     42 # FIXME: This function should be removed.
     43 def match(pattern, string):
     44     """Matches the string with the pattern, caching the compiled regexp."""
     45     if not pattern in _regexp_compile_cache:
     46         _regexp_compile_cache[pattern] = re.compile(pattern)
     47     return _regexp_compile_cache[pattern].match(string)
     48 
     49 
     50 # FIXME: This belongs on DiffParser (e.g. as to_svn_diff()).
     51 def git_diff_to_svn_diff(line):
     52     """Converts a git formatted diff line to a svn formatted line.
     53 
     54     Args:
     55       line: A string representing a line of the diff.
     56     """
     57     # FIXME: This list should be a class member on DiffParser.
     58     # These regexp patterns should be compiled once instead of every time.
     59     conversion_patterns = (("^diff --git \w/(.+) \w/(?P<FilePath>.+)", lambda matched: "Index: " + matched.group('FilePath') + "\n"),
     60                            ("^new file.*", lambda matched: "\n"),
     61                            ("^index (([0-9a-f]{7}\.\.[0-9a-f]{7})|([0-9a-f]{40}\.\.[0-9a-f]{40})) [0-9]{6}", lambda matched: "===================================================================\n"),
     62                            ("^--- \w/(?P<FilePath>.+)", lambda matched: "--- " + matched.group('FilePath') + "\n"),
     63                            ("^\+\+\+ \w/(?P<FilePath>.+)", lambda matched: "+++ " + matched.group('FilePath') + "\n"))
     64 
     65     for pattern, conversion in conversion_patterns:
     66         matched = match(pattern, line)
     67         if matched:
     68             return conversion(matched)
     69     return line
     70 
     71 
     72 # This function exists so we can unittest get_diff_converter function
     73 def svn_diff_to_svn_diff(line):
     74     return line
     75 
     76 
     77 # FIXME: This method belongs on DiffParser
     78 def get_diff_converter(lines):
     79     """Gets a converter function of diff lines.
     80 
     81     Args:
     82       lines: The lines of a diff file.
     83              If this line is git formatted, we'll return a
     84              converter from git to SVN.
     85     """
     86     for i, line in enumerate(lines[:-1]):
     87         # Stop when we find the first patch
     88         if line[:3] == "+++" and lines[i + 1] == "---":
     89             break
     90         if match(r"^diff --git \w/", line):
     91             return git_diff_to_svn_diff
     92     return svn_diff_to_svn_diff
     93 
     94 _INITIAL_STATE = 1
     95 _DECLARED_FILE_PATH = 2
     96 _PROCESSING_CHUNK = 3
     97 
     98 
     99 class DiffFile(object):
    100     """Contains the information for one file in a patch.
    101 
    102     The field "lines" is a list which contains tuples in this format:
    103        (deleted_line_number, new_line_number, line_string)
    104     If deleted_line_number is zero, it means this line is newly added.
    105     If new_line_number is zero, it means this line is deleted.
    106     """
    107     # FIXME: Tuples generally grow into classes.  We should consider
    108     # adding a DiffLine object.
    109 
    110     def added_or_modified_line_numbers(self):
    111         # This logic was moved from patchreader.py, but may not be
    112         # the right API for this object long-term.
    113         return [line[1] for line in self.lines if not line[0]]
    114 
    115     def __init__(self, filename):
    116         self.filename = filename
    117         self.lines = []
    118 
    119     def add_new_line(self, line_number, line):
    120         self.lines.append((0, line_number, line))
    121 
    122     def add_deleted_line(self, line_number, line):
    123         self.lines.append((line_number, 0, line))
    124 
    125     def add_unchanged_line(self, deleted_line_number, new_line_number, line):
    126         self.lines.append((deleted_line_number, new_line_number, line))
    127 
    128 
    129 # If this is going to be called DiffParser, it should be a re-useable parser.
    130 # Otherwise we should rename it to ParsedDiff or just Diff.
    131 class DiffParser(object):
    132     """A parser for a patch file.
    133 
    134     The field "files" is a dict whose key is the filename and value is
    135     a DiffFile object.
    136     """
    137 
    138     def __init__(self, diff_input):
    139         """Parses a diff.
    140 
    141         Args:
    142           diff_input: An iterable object.
    143         """
    144         self.files = self._parse_into_diff_files(diff_input)
    145 
    146     # FIXME: This function is way too long and needs to be broken up.
    147     def _parse_into_diff_files(self, diff_input):
    148         files = {}
    149         state = _INITIAL_STATE
    150         current_file = None
    151         old_diff_line = None
    152         new_diff_line = None
    153         transform_line = get_diff_converter(diff_input)
    154         for line in diff_input:
    155             line = line.rstrip("\n")
    156             line = transform_line(line)
    157 
    158             file_declaration = match(r"^Index: (?P<FilePath>.+)", line)
    159             if file_declaration:
    160                 filename = file_declaration.group('FilePath')
    161                 current_file = DiffFile(filename)
    162                 files[filename] = current_file
    163                 state = _DECLARED_FILE_PATH
    164                 continue
    165 
    166             lines_changed = match(r"^@@ -(?P<OldStartLine>\d+)(,\d+)? \+(?P<NewStartLine>\d+)(,\d+)? @@", line)
    167             if lines_changed:
    168                 if state != _DECLARED_FILE_PATH and state != _PROCESSING_CHUNK:
    169                     _log.error('Unexpected line change without file path '
    170                                'declaration: %r' % line)
    171                 old_diff_line = int(lines_changed.group('OldStartLine'))
    172                 new_diff_line = int(lines_changed.group('NewStartLine'))
    173                 state = _PROCESSING_CHUNK
    174                 continue
    175 
    176             if state == _PROCESSING_CHUNK:
    177                 if line.startswith('+'):
    178                     current_file.add_new_line(new_diff_line, line[1:])
    179                     new_diff_line += 1
    180                 elif line.startswith('-'):
    181                     current_file.add_deleted_line(old_diff_line, line[1:])
    182                     old_diff_line += 1
    183                 elif line.startswith(' '):
    184                     current_file.add_unchanged_line(old_diff_line, new_diff_line, line[1:])
    185                     old_diff_line += 1
    186                     new_diff_line += 1
    187                 elif line == '\\ No newline at end of file':
    188                     # Nothing to do.  We may still have some added lines.
    189                     pass
    190                 else:
    191                     _log.error('Unexpected diff format when parsing a '
    192                                'chunk: %r' % line)
    193         return files
    194