Home | History | Annotate | Download | only in coverage
      1 """Coverage data for Coverage."""
      2 
      3 import os
      4 
      5 from coverage.backward import pickle, sorted        # pylint: disable=W0622
      6 from coverage.files import PathAliases
      7 
      8 
      9 class CoverageData(object):
     10     """Manages collected coverage data, including file storage.
     11 
     12     The data file format is a pickled dict, with these keys:
     13 
     14         * collector: a string identifying the collecting software
     15 
     16         * lines: a dict mapping filenames to sorted lists of line numbers
     17           executed:
     18             { 'file1': [17,23,45],  'file2': [1,2,3], ... }
     19 
     20         * arcs: a dict mapping filenames to sorted lists of line number pairs:
     21             { 'file1': [(17,23), (17,25), (25,26)], ... }
     22 
     23     """
     24 
     25     def __init__(self, basename=None, collector=None):
     26         """Create a CoverageData.
     27 
     28         `basename` is the name of the file to use for storing data.
     29 
     30         `collector` is a string describing the coverage measurement software.
     31 
     32         """
     33         self.collector = collector or 'unknown'
     34 
     35         self.use_file = True
     36 
     37         # Construct the filename that will be used for data file storage, if we
     38         # ever do any file storage.
     39         self.filename = basename or ".coverage"
     40         self.filename = os.path.abspath(self.filename)
     41 
     42         # A map from canonical Python source file name to a dictionary in
     43         # which there's an entry for each line number that has been
     44         # executed:
     45         #
     46         #   {
     47         #       'filename1.py': { 12: None, 47: None, ... },
     48         #       ...
     49         #       }
     50         #
     51         self.lines = {}
     52 
     53         # A map from canonical Python source file name to a dictionary with an
     54         # entry for each pair of line numbers forming an arc:
     55         #
     56         #   {
     57         #       'filename1.py': { (12,14): None, (47,48): None, ... },
     58         #       ...
     59         #       }
     60         #
     61         self.arcs = {}
     62 
     63         self.os = os
     64         self.sorted = sorted
     65         self.pickle = pickle
     66 
     67     def usefile(self, use_file=True):
     68         """Set whether or not to use a disk file for data."""
     69         self.use_file = use_file
     70 
     71     def read(self):
     72         """Read coverage data from the coverage data file (if it exists)."""
     73         if self.use_file:
     74             self.lines, self.arcs = self._read_file(self.filename)
     75         else:
     76             self.lines, self.arcs = {}, {}
     77 
     78     def write(self, suffix=None):
     79         """Write the collected coverage data to a file.
     80 
     81         `suffix` is a suffix to append to the base file name. This can be used
     82         for multiple or parallel execution, so that many coverage data files
     83         can exist simultaneously.  A dot will be used to join the base name and
     84         the suffix.
     85 
     86         """
     87         if self.use_file:
     88             filename = self.filename
     89             if suffix:
     90                 filename += "." + suffix
     91             self.write_file(filename)
     92 
     93     def erase(self):
     94         """Erase the data, both in this object, and from its file storage."""
     95         if self.use_file:
     96             if self.filename and os.path.exists(self.filename):
     97                 os.remove(self.filename)
     98         self.lines = {}
     99         self.arcs = {}
    100 
    101     def line_data(self):
    102         """Return the map from filenames to lists of line numbers executed."""
    103         return dict(
    104             [(f, self.sorted(lmap.keys())) for f, lmap in self.lines.items()]
    105             )
    106 
    107     def arc_data(self):
    108         """Return the map from filenames to lists of line number pairs."""
    109         return dict(
    110             [(f, self.sorted(amap.keys())) for f, amap in self.arcs.items()]
    111             )
    112 
    113     def write_file(self, filename):
    114         """Write the coverage data to `filename`."""
    115 
    116         # Create the file data.
    117         data = {}
    118 
    119         data['lines'] = self.line_data()
    120         arcs = self.arc_data()
    121         if arcs:
    122             data['arcs'] = arcs
    123 
    124         if self.collector:
    125             data['collector'] = self.collector
    126 
    127         # Write the pickle to the file.
    128         fdata = open(filename, 'wb')
    129         try:
    130             self.pickle.dump(data, fdata, 2)
    131         finally:
    132             fdata.close()
    133 
    134     def read_file(self, filename):
    135         """Read the coverage data from `filename`."""
    136         self.lines, self.arcs = self._read_file(filename)
    137 
    138     def raw_data(self, filename):
    139         """Return the raw pickled data from `filename`."""
    140         fdata = open(filename, 'rb')
    141         try:
    142             data = pickle.load(fdata)
    143         finally:
    144             fdata.close()
    145         return data
    146 
    147     def _read_file(self, filename):
    148         """Return the stored coverage data from the given file.
    149 
    150         Returns two values, suitable for assigning to `self.lines` and
    151         `self.arcs`.
    152 
    153         """
    154         lines = {}
    155         arcs = {}
    156         try:
    157             data = self.raw_data(filename)
    158             if isinstance(data, dict):
    159                 # Unpack the 'lines' item.
    160                 lines = dict([
    161                     (f, dict.fromkeys(linenos, None))
    162                         for f, linenos in data.get('lines', {}).items()
    163                     ])
    164                 # Unpack the 'arcs' item.
    165                 arcs = dict([
    166                     (f, dict.fromkeys(arcpairs, None))
    167                         for f, arcpairs in data.get('arcs', {}).items()
    168                     ])
    169         except Exception:
    170             pass
    171         return lines, arcs
    172 
    173     def combine_parallel_data(self, aliases=None):
    174         """Combine a number of data files together.
    175 
    176         Treat `self.filename` as a file prefix, and combine the data from all
    177         of the data files starting with that prefix plus a dot.
    178 
    179         If `aliases` is provided, it's a `PathAliases` object that is used to
    180         re-map paths to match the local machine's.
    181 
    182         """
    183         aliases = aliases or PathAliases()
    184         data_dir, local = os.path.split(self.filename)
    185         localdot = local + '.'
    186         for f in os.listdir(data_dir or '.'):
    187             if f.startswith(localdot):
    188                 full_path = os.path.join(data_dir, f)
    189                 new_lines, new_arcs = self._read_file(full_path)
    190                 for filename, file_data in new_lines.items():
    191                     filename = aliases.map(filename)
    192                     self.lines.setdefault(filename, {}).update(file_data)
    193                 for filename, file_data in new_arcs.items():
    194                     filename = aliases.map(filename)
    195                     self.arcs.setdefault(filename, {}).update(file_data)
    196                 if f != local:
    197                     os.remove(full_path)
    198 
    199     def add_line_data(self, line_data):
    200         """Add executed line data.
    201 
    202         `line_data` is { filename: { lineno: None, ... }, ...}
    203 
    204         """
    205         for filename, linenos in line_data.items():
    206             self.lines.setdefault(filename, {}).update(linenos)
    207 
    208     def add_arc_data(self, arc_data):
    209         """Add measured arc data.
    210 
    211         `arc_data` is { filename: { (l1,l2): None, ... }, ...}
    212 
    213         """
    214         for filename, arcs in arc_data.items():
    215             self.arcs.setdefault(filename, {}).update(arcs)
    216 
    217     def touch_file(self, filename):
    218         """Ensure that `filename` appears in the data, empty if needed."""
    219         self.lines.setdefault(filename, {})
    220 
    221     def measured_files(self):
    222         """A list of all files that had been measured."""
    223         return list(self.lines.keys())
    224 
    225     def executed_lines(self, filename):
    226         """A map containing all the line numbers executed in `filename`.
    227 
    228         If `filename` hasn't been collected at all (because it wasn't executed)
    229         then return an empty map.
    230 
    231         """
    232         return self.lines.get(filename) or {}
    233 
    234     def executed_arcs(self, filename):
    235         """A map containing all the arcs executed in `filename`."""
    236         return self.arcs.get(filename) or {}
    237 
    238     def add_to_hash(self, filename, hasher):
    239         """Contribute `filename`'s data to the Md5Hash `hasher`."""
    240         hasher.update(self.executed_lines(filename))
    241         hasher.update(self.executed_arcs(filename))
    242 
    243     def summary(self, fullpath=False):
    244         """Return a dict summarizing the coverage data.
    245 
    246         Keys are based on the filenames, and values are the number of executed
    247         lines.  If `fullpath` is true, then the keys are the full pathnames of
    248         the files, otherwise they are the basenames of the files.
    249 
    250         """
    251         summ = {}
    252         if fullpath:
    253             filename_fn = lambda f: f
    254         else:
    255             filename_fn = self.os.path.basename
    256         for filename, lines in self.lines.items():
    257             summ[filename_fn(filename)] = len(lines)
    258         return summ
    259 
    260     def has_arcs(self):
    261         """Does this data have arcs?"""
    262         return bool(self.arcs)
    263 
    264 
    265 if __name__ == '__main__':
    266     # Ad-hoc: show the raw data in a data file.
    267     import pprint, sys
    268     covdata = CoverageData()
    269     if sys.argv[1:]:
    270         fname = sys.argv[1]
    271     else:
    272         fname = covdata.filename
    273     pprint.pprint(covdata.raw_data(fname))
    274