Home | History | Annotate | Download | only in debug
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 """
      4 Module to find differences over time in a filesystem
      5 
      6 Basically this takes a snapshot of a directory, then sees what changes
      7 were made.  The contents of the files are not checked, so you can
      8 detect that the content was changed, but not what the old version of
      9 the file was.
     10 """
     11 
     12 import os
     13 from fnmatch import fnmatch
     14 from datetime import datetime
     15 
     16 try:
     17     # Python 3
     18     import collections.UserDict as IterableUserDict
     19 except ImportError:
     20     # Python 2.5-2.7
     21     from UserDict import IterableUserDict
     22 import operator
     23 import re
     24 
     25 __all__ = ['Diff', 'Snapshot', 'File', 'Dir', 'report_expected_diffs',
     26            'show_diff']
     27 
     28 class Diff(object):
     29 
     30     """
     31     Represents the difference between two snapshots
     32     """
     33 
     34     def __init__(self, before, after):
     35         self.before = before
     36         self.after = after
     37         self._calculate()
     38 
     39     def _calculate(self):
     40         before = self.before.data
     41         after = self.after.data
     42         self.deleted = {}
     43         self.updated = {}
     44         self.created = after.copy()
     45         for path, f in before.items():
     46             if path not in after:
     47                 self.deleted[path] = f
     48                 continue
     49             del self.created[path]
     50             if f.mtime < after[path].mtime:
     51                 self.updated[path] = after[path]
     52 
     53     def __str__(self):
     54         return self.report()
     55 
     56     def report(self, header=True, dates=False):
     57         s = []
     58         if header:
     59             s.append('Difference in %s from %s to %s:' %
     60                      (self.before.base_path,
     61                       self.before.calculated,
     62                       self.after.calculated))
     63         for name, files, show_size in [
     64             ('created', self.created, True),
     65             ('deleted', self.deleted, True),
     66             ('updated', self.updated, True)]:
     67             if files:
     68                 s.append('-- %s: -------------------' % name)
     69                 files = files.items()
     70                 files.sort()
     71                 last = ''
     72                 for path, f in files:
     73                     t = '  %s' % _space_prefix(last, path, indent=4,
     74                                                include_sep=False)
     75                     last = path
     76                     if show_size and f.size != 'N/A':
     77                         t += '  (%s bytes)' % f.size
     78                     if dates:
     79                         parts = []
     80                         if self.before.get(path):
     81                             parts.append(self.before[path].mtime)
     82                         if self.after.get(path):
     83                             parts.append(self.after[path].mtime)
     84                         t += ' (mtime: %s)' % ('->'.join(map(repr, parts)))
     85                     s.append(t)
     86         if len(s) == 1:
     87             s.append('  (no changes)')
     88         return '\n'.join(s)
     89 
     90 class Snapshot(IterableUserDict):
     91 
     92     """
     93     Represents a snapshot of a set of files.  Has a dictionary-like
     94     interface, keyed relative to ``base_path``
     95     """
     96 
     97     def __init__(self, base_path, files=None, ignore_wildcards=(),
     98                  ignore_paths=(), ignore_hidden=True):
     99         self.base_path = base_path
    100         self.ignore_wildcards = ignore_wildcards
    101         self.ignore_hidden = ignore_hidden
    102         self.ignore_paths = ignore_paths
    103         self.calculated = None
    104         self.data = files or {}
    105         if files is None:
    106             self.find_files()
    107 
    108     ############################################################
    109     ## File finding
    110     ############################################################
    111 
    112     def find_files(self):
    113         """
    114         Find all the files under the base path, and put them in
    115         ``self.data``
    116         """
    117         self._find_traverse('', self.data)
    118         self.calculated = datetime.now()
    119 
    120     def _ignore_file(self, fn):
    121         if fn in self.ignore_paths:
    122             return True
    123         if self.ignore_hidden and os.path.basename(fn).startswith('.'):
    124             return True
    125         for pat in self.ignore_wildcards:
    126             if fnmatch(fn, pat):
    127                 return True
    128         return False
    129 
    130     def _find_traverse(self, path, result):
    131         full = os.path.join(self.base_path, path)
    132         if os.path.isdir(full):
    133             if path:
    134                 # Don't actually include the base path
    135                 result[path] = Dir(self.base_path, path)
    136             for fn in os.listdir(full):
    137                 fn = os.path.join(path, fn)
    138                 if self._ignore_file(fn):
    139                     continue
    140                 self._find_traverse(fn, result)
    141         else:
    142             result[path] = File(self.base_path, path)
    143 
    144     def __repr__(self):
    145         return '<%s in %r from %r>' % (
    146             self.__class__.__name__, self.base_path,
    147             self.calculated or '(no calculation done)')
    148 
    149     def compare_expected(self, expected, comparison=operator.eq,
    150                          differ=None, not_found=None,
    151                          include_success=False):
    152         """
    153         Compares a dictionary of ``path: content`` to the
    154         found files.  Comparison is done by equality, or the
    155         ``comparison(actual_content, expected_content)`` function given.
    156 
    157         Returns dictionary of differences, keyed by path.  Each
    158         difference is either noted, or the output of
    159         ``differ(actual_content, expected_content)`` is given.
    160 
    161         If a file does not exist and ``not_found`` is given, then
    162         ``not_found(path)`` is put in.
    163         """
    164         result = {}
    165         for path in expected:
    166             orig_path = path
    167             path = path.strip('/')
    168             if path not in self.data:
    169                 if not_found:
    170                     msg = not_found(path)
    171                 else:
    172                     msg = 'not found'
    173                 result[path] = msg
    174                 continue
    175             expected_content = expected[orig_path]
    176             file = self.data[path]
    177             actual_content = file.bytes
    178             if not comparison(actual_content, expected_content):
    179                 if differ:
    180                     msg = differ(actual_content, expected_content)
    181                 else:
    182                     if len(actual_content) < len(expected_content):
    183                         msg = 'differ (%i bytes smaller)' % (
    184                             len(expected_content) - len(actual_content))
    185                     elif len(actual_content) > len(expected_content):
    186                         msg = 'differ (%i bytes larger)' % (
    187                             len(actual_content) - len(expected_content))
    188                     else:
    189                         msg = 'diff (same size)'
    190                 result[path] = msg
    191             elif include_success:
    192                 result[path] = 'same!'
    193         return result
    194 
    195     def diff_to_now(self):
    196         return Diff(self, self.clone())
    197 
    198     def clone(self):
    199         return self.__class__(base_path=self.base_path,
    200                               ignore_wildcards=self.ignore_wildcards,
    201                               ignore_paths=self.ignore_paths,
    202                               ignore_hidden=self.ignore_hidden)
    203 
    204 class File(object):
    205 
    206     """
    207     Represents a single file found as the result of a command.
    208 
    209     Has attributes:
    210 
    211     ``path``:
    212         The path of the file, relative to the ``base_path``
    213 
    214     ``full``:
    215         The full path
    216 
    217     ``stat``:
    218         The results of ``os.stat``.  Also ``mtime`` and ``size``
    219         contain the ``.st_mtime`` and ``st_size`` of the stat.
    220 
    221     ``bytes``:
    222         The contents of the file.
    223 
    224     You may use the ``in`` operator with these objects (tested against
    225     the contents of the file), and the ``.mustcontain()`` method.
    226     """
    227 
    228     file = True
    229     dir = False
    230 
    231     def __init__(self, base_path, path):
    232         self.base_path = base_path
    233         self.path = path
    234         self.full = os.path.join(base_path, path)
    235         self.stat = os.stat(self.full)
    236         self.mtime = self.stat.st_mtime
    237         self.size = self.stat.st_size
    238         self._bytes = None
    239 
    240     def bytes__get(self):
    241         if self._bytes is None:
    242             f = open(self.full, 'rb')
    243             self._bytes = f.read()
    244             f.close()
    245         return self._bytes
    246     bytes = property(bytes__get)
    247 
    248     def __contains__(self, s):
    249         return s in self.bytes
    250 
    251     def mustcontain(self, s):
    252         __tracebackhide__ = True
    253         bytes = self.bytes
    254         if s not in bytes:
    255             print('Could not find %r in:' % s)
    256             print(bytes)
    257             assert s in bytes
    258 
    259     def __repr__(self):
    260         return '<%s %s:%s>' % (
    261             self.__class__.__name__,
    262             self.base_path, self.path)
    263 
    264 class Dir(File):
    265 
    266     """
    267     Represents a directory created by a command.
    268     """
    269 
    270     file = False
    271     dir = True
    272 
    273     def __init__(self, base_path, path):
    274         self.base_path = base_path
    275         self.path = path
    276         self.full = os.path.join(base_path, path)
    277         self.size = 'N/A'
    278         self.mtime = 'N/A'
    279 
    280     def __repr__(self):
    281         return '<%s %s:%s>' % (
    282             self.__class__.__name__,
    283             self.base_path, self.path)
    284 
    285     def bytes__get(self):
    286         raise NotImplementedError(
    287             "Directory %r doesn't have content" % self)
    288 
    289     bytes = property(bytes__get)
    290 
    291 
    292 def _space_prefix(pref, full, sep=None, indent=None, include_sep=True):
    293     """
    294     Anything shared by pref and full will be replaced with spaces
    295     in full, and full returned.
    296 
    297     Example::
    298 
    299         >>> _space_prefix('/foo/bar', '/foo')
    300         '    /bar'
    301     """
    302     if sep is None:
    303         sep = os.path.sep
    304     pref = pref.split(sep)
    305     full = full.split(sep)
    306     padding = []
    307     while pref and full and pref[0] == full[0]:
    308         if indent is None:
    309             padding.append(' ' * (len(full[0]) + len(sep)))
    310         else:
    311             padding.append(' ' * indent)
    312         full.pop(0)
    313         pref.pop(0)
    314     if padding:
    315         if include_sep:
    316             return ''.join(padding) + sep + sep.join(full)
    317         else:
    318             return ''.join(padding) + sep.join(full)
    319     else:
    320         return sep.join(full)
    321 
    322 def report_expected_diffs(diffs, colorize=False):
    323     """
    324     Takes the output of compare_expected, and returns a string
    325     description of the differences.
    326     """
    327     if not diffs:
    328         return 'No differences'
    329     diffs = diffs.items()
    330     diffs.sort()
    331     s = []
    332     last = ''
    333     for path, desc in diffs:
    334         t = _space_prefix(last, path, indent=4, include_sep=False)
    335         if colorize:
    336             t = color_line(t, 11)
    337         last = path
    338         if len(desc.splitlines()) > 1:
    339             cur_indent = len(re.search(r'^[ ]*', t).group(0))
    340             desc = indent(cur_indent+2, desc)
    341             if colorize:
    342                 t += '\n'
    343                 for line in desc.splitlines():
    344                     if line.strip().startswith('+'):
    345                         line = color_line(line, 10)
    346                     elif line.strip().startswith('-'):
    347                         line = color_line(line, 9)
    348                     else:
    349                         line = color_line(line, 14)
    350                     t += line+'\n'
    351             else:
    352                 t += '\n' + desc
    353         else:
    354             t += ' '+desc
    355         s.append(t)
    356     s.append('Files with differences: %s' % len(diffs))
    357     return '\n'.join(s)
    358 
    359 def color_code(foreground=None, background=None):
    360     """
    361     0  black
    362     1  red
    363     2  green
    364     3  yellow
    365     4  blue
    366     5  magenta (purple)
    367     6  cyan
    368     7  white (gray)
    369 
    370     Add 8 to get high-intensity
    371     """
    372     if foreground is None and background is None:
    373         # Reset
    374         return '\x1b[0m'
    375     codes = []
    376     if foreground is None:
    377         codes.append('[39m')
    378     elif foreground > 7:
    379         codes.append('[1m')
    380         codes.append('[%im' % (22+foreground))
    381     else:
    382         codes.append('[%im' % (30+foreground))
    383     if background is None:
    384         codes.append('[49m')
    385     else:
    386         codes.append('[%im' % (40+background))
    387     return '\x1b' + '\x1b'.join(codes)
    388 
    389 def color_line(line, foreground=None, background=None):
    390     match = re.search(r'^(\s*)', line)
    391     return (match.group(1) + color_code(foreground, background)
    392             + line[match.end():] + color_code())
    393 
    394 def indent(indent, text):
    395     return '\n'.join(
    396         [' '*indent + l for l in text.splitlines()])
    397 
    398 def show_diff(actual_content, expected_content):
    399     actual_lines = [l.strip() for l in actual_content.splitlines()
    400                     if l.strip()]
    401     expected_lines = [l.strip() for l in expected_content.splitlines()
    402                       if l.strip()]
    403     if len(actual_lines) == len(expected_lines) == 1:
    404         return '%r not %r' % (actual_lines[0], expected_lines[0])
    405     if not actual_lines:
    406         return 'Empty; should have:\n'+expected_content
    407     import difflib
    408     return '\n'.join(difflib.ndiff(actual_lines, expected_lines))
    409