Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 # Copyright (c) 2015 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 
      7 """Parse an LLVM coverage report to generate useable results."""
      8 
      9 
     10 import argparse
     11 import json
     12 import os
     13 import re
     14 import subprocess
     15 import sys
     16 
     17 
     18 def _fix_filename(filename):
     19   """Return a filename which we can use to identify the file.
     20 
     21   The file paths printed by llvm-cov take the form:
     22 
     23       /path/to/repo/out/dir/../../src/filename.cpp
     24 
     25   And then they're truncated to 22 characters with leading ellipses:
     26 
     27       ...../../src/filename.cpp
     28 
     29   This makes it really tough to determine whether the file actually belongs in
     30   the Skia repo.  This function strips out the leading junk so that, if the file
     31   exists in the repo, the returned string matches the end of some relative path
     32   in the repo. This doesn't guarantee correctness, but it's about as close as
     33   we can get.
     34   """
     35   return filename.split('..')[-1].lstrip('./')
     36 
     37 
     38 def _file_in_repo(filename, all_files):
     39   """Return the name of the checked-in file matching the given filename.
     40 
     41   Use suffix matching to determine which checked-in files the given filename
     42   matches. If there are no matches or multiple matches, return None.
     43   """
     44   new_file = _fix_filename(filename)
     45   matched = []
     46   for f in all_files:
     47     if f.endswith(new_file):
     48       matched.append(f)
     49   if len(matched) == 1:
     50     return matched[0]
     51   elif len(matched) > 1:
     52     print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
     53                           % (new_file, '\n\t'.join(matched)))
     54   return None
     55 
     56 
     57 def _get_per_file_per_line_coverage(report):
     58   """Return a dict whose keys are file names and values are coverage data.
     59 
     60   Values are lists which take the form (lineno, coverage, code).
     61   """
     62   all_files = subprocess.check_output(['git', 'ls-files']).splitlines()
     63   lines = report.splitlines()
     64   current_file = None
     65   file_lines = []
     66   files = {}
     67   not_checked_in = '%' # Use this as the file name for not-checked-in files.
     68   for line in lines:
     69     m = re.match('([a-zA-Z0-9\./_-]+):', line)
     70     if m:
     71       if current_file and current_file != not_checked_in:
     72         files[current_file] = file_lines
     73       match_filename = _file_in_repo(m.groups()[0], all_files)
     74       current_file = match_filename or not_checked_in
     75       file_lines = []
     76     else:
     77       if current_file != not_checked_in:
     78         skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
     79         if line and not skip:
     80           cov, linenum, code = line.split('|', 2)
     81           cov = cov.strip()
     82           if cov:
     83             cov = int(cov)
     84           else:
     85             cov = None # We don't care about coverage for this line.
     86           linenum = int(linenum.strip())
     87           assert linenum == len(file_lines) + 1
     88           file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
     89   return files
     90 
     91 
     92 
     93 def _testname(filename):
     94   """Transform the file name into an ingestible test name."""
     95   return re.sub(r'[^a-zA-Z0-9]', '_', filename)
     96 
     97 
     98 def _nanobench_json(results, properties, key):
     99   """Return the results in JSON format like that produced by nanobench."""
    100   rv = {}
    101   # Copy over the properties first, then set the 'key' and 'results' keys,
    102   # in order to avoid bad formatting in case the user passes in a properties
    103   # dict containing those keys.
    104   rv.update(properties)
    105   rv['key'] = key
    106   rv['results'] = {
    107     _testname(f): {
    108       'coverage': {
    109         'percent': percent,
    110         'lines_not_covered': not_covered_lines,
    111         'options': {
    112           'fullname': f,
    113           'dir': os.path.dirname(f),
    114           'source_type': 'coverage',
    115         },
    116       },
    117     } for percent, not_covered_lines, f in results
    118   }
    119   return rv
    120 
    121 
    122 def _parse_key_value(kv_list):
    123   """Return a dict whose key/value pairs are derived from the given list.
    124 
    125   For example:
    126 
    127       ['k1', 'v1', 'k2', 'v2']
    128   becomes:
    129 
    130       {'k1': 'v1',
    131        'k2': 'v2'}
    132   """
    133   if len(kv_list) % 2 != 0:
    134     raise Exception('Invalid key/value pairs: %s' % kv_list)
    135 
    136   rv = {}
    137   for i in xrange(len(kv_list) / 2):
    138     rv[kv_list[i*2]] = kv_list[i*2+1]
    139   return rv
    140 
    141 
    142 def _get_per_file_summaries(line_by_line):
    143   """Summarize the full line-by-line coverage report by file."""
    144   per_file = []
    145   for filepath, lines in line_by_line.iteritems():
    146     total_lines = 0
    147     covered_lines = 0
    148     for _, cov, _ in lines:
    149       if cov is not None:
    150         total_lines += 1
    151         if cov > 0:
    152           covered_lines += 1
    153     if total_lines > 0:
    154       per_file.append((float(covered_lines)/float(total_lines)*100.0,
    155                        total_lines - covered_lines,
    156                        filepath))
    157   return per_file
    158 
    159 
    160 def main():
    161   """Generate useful data from a coverage report."""
    162   # Parse args.
    163   parser = argparse.ArgumentParser()
    164   parser.add_argument('--report', help='input file; an llvm coverage report.',
    165                       required=True)
    166   parser.add_argument('--nanobench', help='output file for nanobench data.')
    167   parser.add_argument(
    168       '--key', metavar='key_or_value', nargs='+',
    169       help='key/value pairs identifying this bot.')
    170   parser.add_argument(
    171       '--properties', metavar='key_or_value', nargs='+',
    172       help='key/value pairs representing properties of this build.')
    173   parser.add_argument('--linebyline',
    174                       help='output file for line-by-line JSON data.')
    175   args = parser.parse_args()
    176 
    177   if args.nanobench and not (args.key and args.properties):
    178     raise Exception('--key and --properties are required with --nanobench')
    179 
    180   with open(args.report) as f:
    181     report = f.read()
    182 
    183   line_by_line = _get_per_file_per_line_coverage(report)
    184 
    185   if args.linebyline:
    186     with open(args.linebyline, 'w') as f:
    187       json.dump(line_by_line, f)
    188 
    189   if args.nanobench:
    190     # Parse the key and properties for use in the nanobench JSON output.
    191     key = _parse_key_value(args.key)
    192     properties = _parse_key_value(args.properties)
    193 
    194     # Get per-file summaries.
    195     per_file = _get_per_file_summaries(line_by_line)
    196 
    197     # Write results.
    198     format_results = _nanobench_json(per_file, properties, key)
    199     with open(args.nanobench, 'w') as f:
    200       json.dump(format_results, f)
    201 
    202 
    203 if __name__ == '__main__':
    204   main()
    205