Home | History | Annotate | Download | only in coverage
      1 """File wrangling."""
      2 
      3 from coverage.backward import to_string
      4 from coverage.misc import CoverageException
      5 import fnmatch, os, re, sys
      6 
      7 class FileLocator(object):
      8     """Understand how filenames work."""
      9 
     10     def __init__(self):
     11         # The absolute path to our current directory.
     12         self.relative_dir = self.abs_file(os.curdir) + os.sep
     13 
     14         # Cache of results of calling the canonical_filename() method, to
     15         # avoid duplicating work.
     16         self.canonical_filename_cache = {}
     17 
     18     def abs_file(self, filename):
     19         """Return the absolute normalized form of `filename`."""
     20         return os.path.normcase(os.path.abspath(os.path.realpath(filename)))
     21 
     22     def relative_filename(self, filename):
     23         """Return the relative form of `filename`.
     24 
     25         The filename will be relative to the current directory when the
     26         `FileLocator` was constructed.
     27 
     28         """
     29         if filename.startswith(self.relative_dir):
     30             filename = filename.replace(self.relative_dir, "")
     31         return filename
     32 
     33     def canonical_filename(self, filename):
     34         """Return a canonical filename for `filename`.
     35 
     36         An absolute path with no redundant components and normalized case.
     37 
     38         """
     39         if filename not in self.canonical_filename_cache:
     40             f = filename
     41             if os.path.isabs(f) and not os.path.exists(f):
     42                 if self.get_zip_data(f) is None:
     43                     f = os.path.basename(f)
     44             if not os.path.isabs(f):
     45                 for path in [os.curdir] + sys.path:
     46                     if path is None:
     47                         continue
     48                     g = os.path.join(path, f)
     49                     if os.path.exists(g):
     50                         f = g
     51                         break
     52             cf = self.abs_file(f)
     53             self.canonical_filename_cache[filename] = cf
     54         return self.canonical_filename_cache[filename]
     55 
     56     def get_zip_data(self, filename):
     57         """Get data from `filename` if it is a zip file path.
     58 
     59         Returns the string data read from the zip file, or None if no zip file
     60         could be found or `filename` isn't in it.  The data returned will be
     61         an empty string if the file is empty.
     62 
     63         """
     64         import zipimport
     65         markers = ['.zip'+os.sep, '.egg'+os.sep]
     66         for marker in markers:
     67             if marker in filename:
     68                 parts = filename.split(marker)
     69                 try:
     70                     zi = zipimport.zipimporter(parts[0]+marker[:-1])
     71                 except zipimport.ZipImportError:
     72                     continue
     73                 try:
     74                     data = zi.get_data(parts[1])
     75                 except IOError:
     76                     continue
     77                 return to_string(data)
     78         return None
     79 
     80 
     81 class TreeMatcher(object):
     82     """A matcher for files in a tree."""
     83     def __init__(self, directories):
     84         self.dirs = directories[:]
     85 
     86     def __repr__(self):
     87         return "<TreeMatcher %r>" % self.dirs
     88 
     89     def add(self, directory):
     90         """Add another directory to the list we match for."""
     91         self.dirs.append(directory)
     92 
     93     def match(self, fpath):
     94         """Does `fpath` indicate a file in one of our trees?"""
     95         for d in self.dirs:
     96             if fpath.startswith(d):
     97                 if fpath == d:
     98                     # This is the same file!
     99                     return True
    100                 if fpath[len(d)] == os.sep:
    101                     # This is a file in the directory
    102                     return True
    103         return False
    104 
    105 
    106 class FnmatchMatcher(object):
    107     """A matcher for files by filename pattern."""
    108     def __init__(self, pats):
    109         self.pats = pats[:]
    110 
    111     def __repr__(self):
    112         return "<FnmatchMatcher %r>" % self.pats
    113 
    114     def match(self, fpath):
    115         """Does `fpath` match one of our filename patterns?"""
    116         for pat in self.pats:
    117             if fnmatch.fnmatch(fpath, pat):
    118                 return True
    119         return False
    120 
    121 
    122 def sep(s):
    123     """Find the path separator used in this string, or os.sep if none."""
    124     sep_match = re.search(r"[\\/]", s)
    125     if sep_match:
    126         the_sep = sep_match.group(0)
    127     else:
    128         the_sep = os.sep
    129     return the_sep
    130 
    131 
    132 class PathAliases(object):
    133     """A collection of aliases for paths.
    134 
    135     When combining data files from remote machines, often the paths to source
    136     code are different, for example, due to OS differences, or because of
    137     serialized checkouts on continuous integration machines.
    138 
    139     A `PathAliases` object tracks a list of pattern/result pairs, and can
    140     map a path through those aliases to produce a unified path.
    141 
    142     `locator` is a FileLocator that is used to canonicalize the results.
    143 
    144     """
    145     def __init__(self, locator=None):
    146         self.aliases = []
    147         self.locator = locator
    148 
    149     def add(self, pattern, result):
    150         """Add the `pattern`/`result` pair to the list of aliases.
    151 
    152         `pattern` is an `fnmatch`-style pattern.  `result` is a simple
    153         string.  When mapping paths, if a path starts with a match against
    154         `pattern`, then that match is replaced with `result`.  This models
    155         isomorphic source trees being rooted at different places on two
    156         different machines.
    157 
    158         `pattern` can't end with a wildcard component, since that would
    159         match an entire tree, and not just its root.
    160 
    161         """
    162         # The pattern can't end with a wildcard component.
    163         pattern = pattern.rstrip(r"\/")
    164         if pattern.endswith("*"):
    165             raise CoverageException("Pattern must not end with wildcards.")
    166         pattern_sep = sep(pattern)
    167         pattern += pattern_sep
    168 
    169         # Make a regex from the pattern.  fnmatch always adds a \Z or $ to
    170         # match the whole string, which we don't want.
    171         regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
    172         if regex_pat.endswith("$"):
    173             regex_pat = regex_pat[:-1]
    174         # We want */a/b.py to match on Windows to, so change slash to match
    175         # either separator.
    176         regex_pat = regex_pat.replace(r"\/", r"[\\/]")
    177         # We want case-insensitive matching, so add that flag.
    178         regex = re.compile("(?i)" + regex_pat)
    179 
    180         # Normalize the result: it must end with a path separator.
    181         result_sep = sep(result)
    182         result = result.rstrip(r"\/") + result_sep
    183         self.aliases.append((regex, result, pattern_sep, result_sep))
    184 
    185     def map(self, path):
    186         """Map `path` through the aliases.
    187 
    188         `path` is checked against all of the patterns.  The first pattern to
    189         match is used to replace the root of the path with the result root.
    190         Only one pattern is ever used.  If no patterns match, `path` is
    191         returned unchanged.
    192 
    193         The separator style in the result is made to match that of the result
    194         in the alias.
    195 
    196         """
    197         for regex, result, pattern_sep, result_sep in self.aliases:
    198             m = regex.match(path)
    199             if m:
    200                 new = path.replace(m.group(0), result)
    201                 if pattern_sep != result_sep:
    202                     new = new.replace(pattern_sep, result_sep)
    203                 if self.locator:
    204                     new = self.locator.canonical_filename(new)
    205                 return new
    206         return path
    207 
    208 
    209 def find_python_files(dirname):
    210     """Yield all of the importable Python files in `dirname`, recursively."""
    211     for dirpath, dirnames, filenames in os.walk(dirname, topdown=True):
    212         if '__init__.py' not in filenames:
    213             # If a directory doesn't have __init__.py, then it isn't
    214             # importable and neither are its files
    215             del dirnames[:]
    216             continue
    217         for filename in filenames:
    218             if fnmatch.fnmatch(filename, "*.py"):
    219                 yield os.path.join(dirpath, filename)
    220