1 #!/usr/bin/env python 2 # Copyright (c) 2015 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 7 """Parse an LLVM coverage report to generate useable results.""" 8 9 10 import argparse 11 import json 12 import os 13 import re 14 import subprocess 15 import sys 16 17 18 def _fix_filename(filename): 19 """Return a filename which we can use to identify the file. 20 21 The file paths printed by llvm-cov take the form: 22 23 /path/to/repo/out/dir/../../src/filename.cpp 24 25 And then they're truncated to 22 characters with leading ellipses: 26 27 ...../../src/filename.cpp 28 29 This makes it really tough to determine whether the file actually belongs in 30 the Skia repo. This function strips out the leading junk so that, if the file 31 exists in the repo, the returned string matches the end of some relative path 32 in the repo. This doesn't guarantee correctness, but it's about as close as 33 we can get. 34 """ 35 return filename.split('..')[-1].lstrip('./') 36 37 38 def _file_in_repo(filename, all_files): 39 """Return the name of the checked-in file matching the given filename. 40 41 Use suffix matching to determine which checked-in files the given filename 42 matches. If there are no matches or multiple matches, return None. 43 """ 44 new_file = _fix_filename(filename) 45 matched = [] 46 for f in all_files: 47 if f.endswith(new_file): 48 matched.append(f) 49 if len(matched) == 1: 50 return matched[0] 51 elif len(matched) > 1: 52 print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' 53 % (new_file, '\n\t'.join(matched))) 54 return None 55 56 57 def _get_per_file_per_line_coverage(report): 58 """Return a dict whose keys are file names and values are coverage data. 59 60 Values are lists which take the form (lineno, coverage, code). 61 """ 62 all_files = subprocess.check_output(['git', 'ls-files']).splitlines() 63 lines = report.splitlines() 64 current_file = None 65 file_lines = [] 66 files = {} 67 not_checked_in = '%' # Use this as the file name for not-checked-in files. 68 for line in lines: 69 m = re.match('([a-zA-Z0-9\./_-]+):', line) 70 if m: 71 if current_file and current_file != not_checked_in: 72 files[current_file] = file_lines 73 match_filename = _file_in_repo(m.groups()[0], all_files) 74 current_file = match_filename or not_checked_in 75 file_lines = [] 76 else: 77 if current_file != not_checked_in: 78 skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) 79 if line and not skip: 80 cov, linenum, code = line.split('|', 2) 81 cov = cov.strip() 82 if cov: 83 cov = int(cov) 84 else: 85 cov = None # We don't care about coverage for this line. 86 linenum = int(linenum.strip()) 87 assert linenum == len(file_lines) + 1 88 file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) 89 return files 90 91 92 93 def _testname(filename): 94 """Transform the file name into an ingestible test name.""" 95 return re.sub(r'[^a-zA-Z0-9]', '_', filename) 96 97 98 def _nanobench_json(results, properties, key): 99 """Return the results in JSON format like that produced by nanobench.""" 100 rv = {} 101 # Copy over the properties first, then set the 'key' and 'results' keys, 102 # in order to avoid bad formatting in case the user passes in a properties 103 # dict containing those keys. 104 rv.update(properties) 105 rv['key'] = key 106 rv['results'] = { 107 _testname(f): { 108 'coverage': { 109 'percent': percent, 110 'lines_not_covered': not_covered_lines, 111 'options': { 112 'fullname': f, 113 'dir': os.path.dirname(f), 114 'source_type': 'coverage', 115 }, 116 }, 117 } for percent, not_covered_lines, f in results 118 } 119 return rv 120 121 122 def _parse_key_value(kv_list): 123 """Return a dict whose key/value pairs are derived from the given list. 124 125 For example: 126 127 ['k1', 'v1', 'k2', 'v2'] 128 becomes: 129 130 {'k1': 'v1', 131 'k2': 'v2'} 132 """ 133 if len(kv_list) % 2 != 0: 134 raise Exception('Invalid key/value pairs: %s' % kv_list) 135 136 rv = {} 137 for i in xrange(len(kv_list) / 2): 138 rv[kv_list[i*2]] = kv_list[i*2+1] 139 return rv 140 141 142 def _get_per_file_summaries(line_by_line): 143 """Summarize the full line-by-line coverage report by file.""" 144 per_file = [] 145 for filepath, lines in line_by_line.iteritems(): 146 total_lines = 0 147 covered_lines = 0 148 for _, cov, _ in lines: 149 if cov is not None: 150 total_lines += 1 151 if cov > 0: 152 covered_lines += 1 153 if total_lines > 0: 154 per_file.append((float(covered_lines)/float(total_lines)*100.0, 155 total_lines - covered_lines, 156 filepath)) 157 return per_file 158 159 160 def main(): 161 """Generate useful data from a coverage report.""" 162 # Parse args. 163 parser = argparse.ArgumentParser() 164 parser.add_argument('--report', help='input file; an llvm coverage report.', 165 required=True) 166 parser.add_argument('--nanobench', help='output file for nanobench data.') 167 parser.add_argument( 168 '--key', metavar='key_or_value', nargs='+', 169 help='key/value pairs identifying this bot.') 170 parser.add_argument( 171 '--properties', metavar='key_or_value', nargs='+', 172 help='key/value pairs representing properties of this build.') 173 parser.add_argument('--linebyline', 174 help='output file for line-by-line JSON data.') 175 args = parser.parse_args() 176 177 if args.nanobench and not (args.key and args.properties): 178 raise Exception('--key and --properties are required with --nanobench') 179 180 with open(args.report) as f: 181 report = f.read() 182 183 line_by_line = _get_per_file_per_line_coverage(report) 184 185 if args.linebyline: 186 with open(args.linebyline, 'w') as f: 187 json.dump(line_by_line, f) 188 189 if args.nanobench: 190 # Parse the key and properties for use in the nanobench JSON output. 191 key = _parse_key_value(args.key) 192 properties = _parse_key_value(args.properties) 193 194 # Get per-file summaries. 195 per_file = _get_per_file_summaries(line_by_line) 196 197 # Write results. 198 format_results = _nanobench_json(per_file, properties, key) 199 with open(args.nanobench, 'w') as f: 200 json.dump(format_results, f) 201 202 203 if __name__ == '__main__': 204 main() 205