Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 # Copyright (c) 2015 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 
      7 """Parse an LLVM coverage report to generate useable results."""
      8 
      9 
     10 import argparse
     11 import json
     12 import os
     13 import re
     14 import subprocess
     15 import sys
     16 
     17 
     18 def _fix_filename(filename):
     19   """Return a filename which we can use to identify the file.
     20 
     21   The file paths printed by llvm-cov take the form:
     22 
     23       /path/to/repo/out/dir/../../src/filename.cpp
     24 
     25   And then they're truncated to 22 characters with leading ellipses:
     26 
     27       ...../../src/filename.cpp
     28 
     29   This makes it really tough to determine whether the file actually belongs in
     30   the Skia repo.  This function strips out the leading junk so that, if the file
     31   exists in the repo, the returned string matches the end of some relative path
     32   in the repo. This doesn't guarantee correctness, but it's about as close as
     33   we can get.
     34   """
     35   return filename.split('..')[-1].lstrip('./')
     36 
     37 
     38 def _file_in_repo(filename, all_files):
     39   """Return the name of the checked-in file matching the given filename.
     40 
     41   Use suffix matching to determine which checked-in files the given filename
     42   matches. If there are no matches or multiple matches, return None.
     43   """
     44   new_file = _fix_filename(filename)
     45   matched = []
     46   for f in all_files:
     47     if f.endswith(new_file):
     48       matched.append(f)
     49   if len(matched) == 1:
     50     return matched[0]
     51   elif len(matched) > 1:
     52     print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
     53                           % (new_file, '\n\t'.join(matched)))
     54   return None
     55 
     56 
     57 def _get_per_file_per_line_coverage(report):
     58   """Return a dict whose keys are file names and values are coverage data.
     59 
     60   Values are lists which take the form (lineno, coverage, code).
     61   """
     62   all_files = []
     63   for root, dirs, files in os.walk(os.getcwd()):
     64     if 'third_party/externals' in root:
     65       continue
     66     files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))]
     67     dirs[:] = [d for d in dirs if not d[0] == '.']
     68     for name in files:
     69       all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name))
     70   all_files.sort()
     71 
     72   lines = report.splitlines()
     73   current_file = None
     74   file_lines = []
     75   files = {}
     76   not_checked_in = '%' # Use this as the file name for not-checked-in files.
     77   for line in lines:
     78     m = re.match('([a-zA-Z0-9\./_-]+):', line)
     79     if m:
     80       if current_file and current_file != not_checked_in:
     81         files[current_file] = file_lines
     82       match_filename = _file_in_repo(m.groups()[0], all_files)
     83       current_file = match_filename or not_checked_in
     84       file_lines = []
     85     else:
     86       if current_file != not_checked_in:
     87         skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
     88         if line and not skip:
     89           cov, linenum, code = line.split('|', 2)
     90           cov = cov.strip()
     91           if cov:
     92             cov = int(cov)
     93           else:
     94             cov = None # We don't care about coverage for this line.
     95           linenum = int(linenum.strip())
     96           assert linenum == len(file_lines) + 1
     97           file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
     98   return files
     99 
    100 
    101 
    102 def _testname(filename):
    103   """Transform the file name into an ingestible test name."""
    104   return re.sub(r'[^a-zA-Z0-9]', '_', filename)
    105 
    106 
    107 def _nanobench_json(results, properties, key):
    108   """Return the results in JSON format like that produced by nanobench."""
    109   rv = {}
    110   # Copy over the properties first, then set the 'key' and 'results' keys,
    111   # in order to avoid bad formatting in case the user passes in a properties
    112   # dict containing those keys.
    113   rv.update(properties)
    114   rv['key'] = key
    115   rv['results'] = {
    116     _testname(f): {
    117       'coverage': {
    118         'percent': percent,
    119         'lines_not_covered': not_covered_lines,
    120         'options': {
    121           'fullname': f,
    122           'dir': os.path.dirname(f),
    123           'source_type': 'coverage',
    124         },
    125       },
    126     } for percent, not_covered_lines, f in results
    127   }
    128   return rv
    129 
    130 
    131 def _parse_key_value(kv_list):
    132   """Return a dict whose key/value pairs are derived from the given list.
    133 
    134   For example:
    135 
    136       ['k1', 'v1', 'k2', 'v2']
    137   becomes:
    138 
    139       {'k1': 'v1',
    140        'k2': 'v2'}
    141   """
    142   if len(kv_list) % 2 != 0:
    143     raise Exception('Invalid key/value pairs: %s' % kv_list)
    144 
    145   rv = {}
    146   for i in xrange(len(kv_list) / 2):
    147     rv[kv_list[i*2]] = kv_list[i*2+1]
    148   return rv
    149 
    150 
    151 def _get_per_file_summaries(line_by_line):
    152   """Summarize the full line-by-line coverage report by file."""
    153   per_file = []
    154   for filepath, lines in line_by_line.iteritems():
    155     total_lines = 0
    156     covered_lines = 0
    157     for _, cov, _ in lines:
    158       if cov is not None:
    159         total_lines += 1
    160         if cov > 0:
    161           covered_lines += 1
    162     if total_lines > 0:
    163       per_file.append((float(covered_lines)/float(total_lines)*100.0,
    164                        total_lines - covered_lines,
    165                        filepath))
    166   return per_file
    167 
    168 
    169 def main():
    170   """Generate useful data from a coverage report."""
    171   # Parse args.
    172   parser = argparse.ArgumentParser()
    173   parser.add_argument('--report', help='input file; an llvm coverage report.',
    174                       required=True)
    175   parser.add_argument('--nanobench', help='output file for nanobench data.')
    176   parser.add_argument(
    177       '--key', metavar='key_or_value', nargs='+',
    178       help='key/value pairs identifying this bot.')
    179   parser.add_argument(
    180       '--properties', metavar='key_or_value', nargs='+',
    181       help='key/value pairs representing properties of this build.')
    182   parser.add_argument('--linebyline',
    183                       help='output file for line-by-line JSON data.')
    184   args = parser.parse_args()
    185 
    186   if args.nanobench and not (args.key and args.properties):
    187     raise Exception('--key and --properties are required with --nanobench')
    188 
    189   with open(args.report) as f:
    190     report = f.read()
    191 
    192   line_by_line = _get_per_file_per_line_coverage(report)
    193 
    194   if args.linebyline:
    195     with open(args.linebyline, 'w') as f:
    196       json.dump(line_by_line, f)
    197 
    198   if args.nanobench:
    199     # Parse the key and properties for use in the nanobench JSON output.
    200     key = _parse_key_value(args.key)
    201     properties = _parse_key_value(args.properties)
    202 
    203     # Get per-file summaries.
    204     per_file = _get_per_file_summaries(line_by_line)
    205 
    206     # Write results.
    207     format_results = _nanobench_json(per_file, properties, key)
    208     with open(args.nanobench, 'w') as f:
    209       json.dump(format_results, f)
    210 
    211 
    212 if __name__ == '__main__':
    213   main()
    214