Home | History | Annotate | Download | only in sanitizers
      1 #!/usr/bin/env python
      2 # Copyright 2016 the V8 project authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Script for merging sancov files in parallel.
      7 
      8 When merging test runner output, the sancov files are expected
      9 to be located in one directory with the file-name pattern:
     10 <executable name>.test.<id>.<attempt>.sancov
     11 
     12 For each executable, this script writes a new file:
     13 <executable name>.result.sancov
     14 
     15 When --swarming-output-dir is specified, this script will merge the result
     16 files found there into the coverage folder.
     17 
     18 The sancov tool is expected to be in the llvm compiler-rt third-party
     19 directory. It's not checked out by default and must be added as a custom deps:
     20 'v8/third_party/llvm/projects/compiler-rt':
     21     'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
     22 """
     23 
     24 import argparse
     25 import logging
     26 import math
     27 import os
     28 import re
     29 import subprocess
     30 import sys
     31 
     32 from multiprocessing import Pool, cpu_count
     33 
     34 
     35 logging.basicConfig(level=logging.INFO)
     36 
     37 # V8 checkout directory.
     38 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
     39     os.path.abspath(__file__))))
     40 
     41 # The sancov tool location.
     42 SANCOV_TOOL = os.path.join(
     43     BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
     44     'lib', 'sanitizer_common', 'scripts', 'sancov.py')
     45 
     46 # Number of cpus.
     47 CPUS = cpu_count()
     48 
     49 # Regexp to find sancov file as output by the v8 test runner. Also grabs the
     50 # executable name in group 1.
     51 SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.\d+\.sancov$')
     52 
     53 # Regexp to find sancov result files as returned from swarming.
     54 SANCOV_RESULTS_FILE_RE = re.compile(r'^.*\.result\.sancov$')
     55 
     56 
     57 def merge(args):
     58   """Merge several sancov files into one.
     59 
     60   Called trough multiprocessing pool. The args are expected to unpack to:
     61     keep: Option if source and intermediate sancov files should be kept.
     62     coverage_dir: Folder where to find the sancov files.
     63     executable: Name of the executable whose sancov files should be merged.
     64     index: A number to be put into the intermediate result file name.
     65            If None, this is a final result.
     66     bucket: The list of sancov files to be merged.
     67   Returns: A tuple with the executable name and the result file name.
     68   """
     69   keep, coverage_dir, executable, index, bucket = args
     70   process = subprocess.Popen(
     71       [SANCOV_TOOL, 'merge'] + bucket,
     72       stdout=subprocess.PIPE,
     73       stderr=subprocess.PIPE,
     74       cwd=coverage_dir,
     75   )
     76   output, _ = process.communicate()
     77   assert process.returncode == 0
     78   if index is not None:
     79     # This is an intermediate result, add the bucket index to the file name.
     80     result_file_name = '%s.result.%d.sancov' % (executable, index)
     81   else:
     82     # This is the final result without bucket index.
     83     result_file_name = '%s.result.sancov' % executable
     84   with open(os.path.join(coverage_dir, result_file_name), "wb") as f:
     85     f.write(output)
     86   if not keep:
     87     for f in bucket:
     88       os.remove(os.path.join(coverage_dir, f))
     89   return executable, result_file_name
     90 
     91 
     92 def generate_inputs(keep, coverage_dir, file_map, cpus):
     93   """Generate inputs for multiprocessed merging.
     94 
     95   Splits the sancov files into several buckets, so that each bucket can be
     96   merged in a separate process. We have only few executables in total with
     97   mostly lots of associated files. In the general case, with many executables
     98   we might need to avoid splitting buckets of executables with few files.
     99 
    100   Returns: List of args as expected by merge above.
    101   """
    102   inputs = []
    103   for executable, files in file_map.iteritems():
    104     # What's the bucket size for distributing files for merging? E.g. with
    105     # 2 cpus and 9 files we want bucket size 5.
    106     n = max(2, int(math.ceil(len(files) / float(cpus))))
    107 
    108     # Chop files into buckets.
    109     buckets = [files[i:i+n] for i in xrange(0, len(files), n)]
    110 
    111     # Inputs for multiprocessing. List of tuples containing:
    112     # Keep-files option, base path, executable name, index of bucket,
    113     # list of files.
    114     inputs.extend([(keep, coverage_dir, executable, i, b)
    115                    for i, b in enumerate(buckets)])
    116   return inputs
    117 
    118 
    119 def merge_parallel(inputs, merge_fun=merge):
    120   """Process several merge jobs in parallel."""
    121   pool = Pool(CPUS)
    122   try:
    123     return pool.map(merge_fun, inputs)
    124   finally:
    125     pool.close()
    126 
    127 
    128 def merge_test_runner_output(options):
    129   # Map executable names to their respective sancov files.
    130   file_map = {}
    131   for f in os.listdir(options.coverage_dir):
    132     match = SANCOV_FILE_RE.match(f)
    133     if match:
    134       file_map.setdefault(match.group(1), []).append(f)
    135 
    136   inputs = generate_inputs(
    137       options.keep, options.coverage_dir, file_map, CPUS)
    138 
    139   logging.info('Executing %d merge jobs in parallel for %d executables.' %
    140                (len(inputs), len(file_map)))
    141 
    142   results = merge_parallel(inputs)
    143 
    144   # Map executable names to intermediate bucket result files.
    145   file_map = {}
    146   for executable, f in results:
    147     file_map.setdefault(executable, []).append(f)
    148 
    149   # Merge the bucket results for each executable.
    150   # The final result has index None, so no index will appear in the
    151   # file name.
    152   inputs = [(options.keep, options.coverage_dir, executable, None, files)
    153              for executable, files in file_map.iteritems()]
    154 
    155   logging.info('Merging %d intermediate results.' % len(inputs))
    156 
    157   merge_parallel(inputs)
    158 
    159 
    160 def merge_two(args):
    161   """Merge two sancov files.
    162 
    163   Called trough multiprocessing pool. The args are expected to unpack to:
    164     swarming_output_dir: Folder where to find the new file.
    165     coverage_dir: Folder where to find the existing file.
    166     f: File name of the file to be merged.
    167   """
    168   swarming_output_dir, coverage_dir, f = args
    169   input_file = os.path.join(swarming_output_dir, f)
    170   output_file = os.path.join(coverage_dir, f)
    171   process = subprocess.Popen(
    172       [SANCOV_TOOL, 'merge', input_file, output_file],
    173       stdout=subprocess.PIPE,
    174       stderr=subprocess.PIPE,
    175   )
    176   output, _ = process.communicate()
    177   assert process.returncode == 0
    178   with open(output_file, "wb") as f:
    179     f.write(output)
    180 
    181 
    182 def merge_swarming_output(options):
    183   # Iterate sancov files from swarming.
    184   files = []
    185   for f in os.listdir(options.swarming_output_dir):
    186     match = SANCOV_RESULTS_FILE_RE.match(f)
    187     if match:
    188       if os.path.exists(os.path.join(options.coverage_dir, f)):
    189         # If the same file already exists, we'll merge the data.
    190         files.append(f)
    191       else:
    192         # No file yet? Just move it.
    193         os.rename(os.path.join(options.swarming_output_dir, f),
    194                   os.path.join(options.coverage_dir, f))
    195 
    196   inputs = [(options.swarming_output_dir, options.coverage_dir, f)
    197             for f in files]
    198 
    199   logging.info('Executing %d merge jobs in parallel.' % len(inputs))
    200   merge_parallel(inputs, merge_two)
    201 
    202 
    203 def main():
    204   parser = argparse.ArgumentParser()
    205   parser.add_argument('--coverage-dir', required=True,
    206                       help='Path to the sancov output files.')
    207   parser.add_argument('--keep', default=False, action='store_true',
    208                       help='Keep sancov output files after merging.')
    209   parser.add_argument('--swarming-output-dir',
    210                       help='Folder containing a results shard from swarming.')
    211   options = parser.parse_args()
    212 
    213   # Check if folder with coverage output exists.
    214   assert (os.path.exists(options.coverage_dir) and
    215           os.path.isdir(options.coverage_dir))
    216 
    217   if options.swarming_output_dir:
    218     # Check if folder with swarming output exists.
    219     assert (os.path.exists(options.swarming_output_dir) and
    220             os.path.isdir(options.swarming_output_dir))
    221     merge_swarming_output(options)
    222   else:
    223     merge_test_runner_output(options)
    224 
    225   return 0
    226 
    227 
    228 if __name__ == '__main__':
    229   sys.exit(main())
    230