Home | History | Annotate | Download | only in result_tools
      1 #!/usr/bin/python
      2 # Copyright 2017 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """
      7 This is a utility to build a summary of the given directory. and save to a json
      8 file.
      9 
     10 usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB]
     11 
     12 optional arguments:
     13   -p PATH         Path to build directory summary.
     14   -m MAX_SIZE_KB  Maximum result size in KB. Set to 0 to disable result
     15                   throttling.
     16 
     17 The content of the json file looks like:
     18 {'default': {'/D': [{'control': {'/S': 734}},
     19                     {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}},
     20                                        {'client.0.ERROR': {'/S': 254}},
     21                                        {'client.0.INFO': {'/S': 1020}},
     22                                        {'client.0.WARNING': {'/S': 242}}],
     23                                '/S': 7214}}
     24                       ],
     25               '/S': 7948
     26             }
     27 }
     28 """
     29 
     30 import argparse
     31 import copy
     32 import fnmatch
     33 import glob
     34 import json
     35 import logging
     36 import os
     37 import random
     38 import sys
     39 import time
     40 import traceback
     41 
     42 import dedupe_file_throttler
     43 import delete_file_throttler
     44 import result_info
     45 import shrink_file_throttler
     46 import throttler_lib
     47 import utils_lib
     48 import zip_file_throttler
     49 
     50 
     51 # Do NOT import autotest_lib modules here. This module can be executed without
     52 # dependency on other autotest modules. This is to keep the logic of result
     53 # trimming on the server side, instead of depending on the autotest client
     54 # module.
     55 
     56 DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
     57 SUMMARY_FILE_PATTERN = 'dir_summary_*.json'
     58 MERGED_SUMMARY_FILENAME = 'dir_summary_final.json'
     59 
     60 # Minimum disk space should be available after saving the summary file.
     61 MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
     62 
     63 # Autotest uses some state files to track process running state. The files are
     64 # deleted from test results. Therefore, these files can be ignored.
     65 FILES_TO_IGNORE = set([
     66     'control.autoserv.state'
     67 ])
     68 
     69 # Smallest file size to shrink to.
     70 MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024
     71 
     72 def get_unique_dir_summary_file(path):
     73     """Get a unique file path to save the directory summary json string.
     74 
     75     @param path: The directory path to save the summary file to.
     76     """
     77     summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
     78     # Make sure the summary file name is unique.
     79     file_name = os.path.join(path, summary_file)
     80     if os.path.exists(file_name):
     81         count = 1
     82         name, ext = os.path.splitext(summary_file)
     83         while os.path.exists(file_name):
     84             file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
     85             count += 1
     86     return file_name
     87 
     88 
     89 def _preprocess_result_dir_path(path):
     90     """Verify the result directory path is valid and make sure it ends with `/`.
     91 
     92     @param path: A path to the result directory.
     93     @return: A verified and processed path to the result directory.
     94     @raise IOError: If the path doesn't exist.
     95     @raise ValueError: If the path is not a directory.
     96     """
     97     if not os.path.exists(path):
     98         raise IOError('Path %s does not exist.' % path)
     99 
    100     if not os.path.isdir(path):
    101         raise ValueError('The given path %s is a file. It must be a '
    102                          'directory.' % path)
    103 
    104     # Make sure the path ends with `/` so the root key of summary json is always
    105     # utils_lib.ROOT_DIR ('')
    106     if not path.endswith(os.sep):
    107         path = path + os.sep
    108 
    109     return path
    110 
    111 
    112 def _delete_missing_entries(summary_old, summary_new):
    113     """Delete files/directories only exists in old summary.
    114 
    115     When the new summary is final, i.e., it's built from the final result
    116     directory, files or directories missing are considered to be deleted and
    117     trimmed to size 0.
    118 
    119     @param summary_old: Old directory summary.
    120     @param summary_new: New directory summary.
    121     """
    122     new_files = summary_new.get_file_names()
    123     old_files = summary_old.get_file_names()
    124     for name in old_files:
    125         old_file = summary_old.get_file(name)
    126         if name not in new_files:
    127             if old_file.is_dir:
    128                 # Trim sub-directories.
    129                 with old_file.disable_updating_parent_size_info():
    130                     _delete_missing_entries(old_file, result_info.EMPTY)
    131                 old_file.update_sizes()
    132             elif name in FILES_TO_IGNORE:
    133                 # Remove the file from the summary as it can be ignored.
    134                 summary_old.remove_file(name)
    135             else:
    136                 with old_file.disable_updating_parent_size_info():
    137                     # Before setting the trimmed size to 0, update the collected
    138                     # size if it's not set yet.
    139                     if not old_file.is_collected_size_recorded:
    140                         old_file.collected_size = old_file.trimmed_size
    141                     old_file.trimmed_size = 0
    142         elif old_file.is_dir:
    143             # If `name` is a directory in the old summary, but a file in the new
    144             # summary, delete the entry in the old summary.
    145             new_file = summary_new.get_file(name)
    146             if not new_file.is_dir:
    147                 new_file = result_info.EMPTY
    148             _delete_missing_entries(old_file, new_file)
    149 
    150 
    151 def _relocate_summary(result_dir, summary_file, summary):
    152     """Update the given summary with the path relative to the result_dir.
    153 
    154     @param result_dir: Path to the result directory.
    155     @param summary_file: Path to the summary file.
    156     @param summary: A directory summary inside the given result_dir or its
    157             sub-directory.
    158     @return: An updated summary with the path relative to the result_dir.
    159     """
    160     sub_path = os.path.dirname(summary_file).replace(
    161             result_dir.rstrip(os.sep), '')
    162     if sub_path == '':
    163         return summary
    164 
    165     folders = sub_path.split(os.sep)
    166 
    167     # The first folder is always '' because of the leading `/` in sub_path.
    168     parent = result_info.ResultInfo(
    169             result_dir, utils_lib.ROOT_DIR, parent_result_info=None)
    170     root = parent
    171 
    172     # That makes sure root has only one folder of utils_lib.ROOT_DIR.
    173     for i in range(1, len(folders)):
    174         child = result_info.ResultInfo(
    175                 parent.path, folders[i], parent_result_info=parent)
    176         if i == len(folders) - 1:
    177             # Add files in summary to child.
    178             for info in summary.files:
    179                 child.files.append(info)
    180 
    181         parent.files.append(child)
    182         parent = child
    183 
    184     parent.update_sizes()
    185     return root
    186 
    187 
    188 def merge_summaries(path):
    189     """Merge all directory summaries in the given path.
    190 
    191     This function calculates the total size of result files being collected for
    192     the test device and the files generated on the drone. It also returns merged
    193     directory summary.
    194 
    195     @param path: A path to search for directory summaries.
    196     @return a tuple of (client_collected_bytes, merged_summary, files):
    197             client_collected_bytes: The total size of results collected from
    198                 the DUT. The number can be larger than the total file size of
    199                 the given path, as files can be overwritten or removed.
    200             merged_summary: The merged directory summary of the given path.
    201             files: All summary files in the given path, including
    202                 sub-directories.
    203     """
    204     path = _preprocess_result_dir_path(path)
    205     # Find all directory summary files and sort them by the time stamp in file
    206     # name.
    207     summary_files = []
    208     for root, _, filenames in os.walk(path):
    209         for filename in fnmatch.filter(filenames, 'dir_summary_*.json'):
    210             summary_files.append(os.path.join(root, filename))
    211     summary_files = sorted(summary_files, key=os.path.getmtime)
    212 
    213     all_summaries = []
    214     for summary_file in summary_files:
    215         try:
    216             summary = result_info.load_summary_json_file(summary_file)
    217             summary = _relocate_summary(path, summary_file, summary)
    218             all_summaries.append(summary)
    219         except (IOError, ValueError) as e:
    220             utils_lib.LOG('Failed to load summary file %s Error: %s' %
    221                           (summary_file, e))
    222 
    223     # Merge all summaries.
    224     merged_summary = all_summaries[0] if len(all_summaries) > 0 else None
    225     for summary in all_summaries[1:]:
    226         merged_summary.merge(summary)
    227     # After all summaries from the test device (client side) are merged, we can
    228     # get the total size of result files being transfered from the test device.
    229     # If there is no directory summary collected, default client_collected_bytes
    230     # to 0.
    231     client_collected_bytes = 0
    232     if merged_summary:
    233         client_collected_bytes = merged_summary.collected_size
    234 
    235     # Get the summary of current directory
    236     last_summary = result_info.ResultInfo.build_from_path(path)
    237 
    238     if merged_summary:
    239         merged_summary.merge(last_summary, is_final=True)
    240         _delete_missing_entries(merged_summary, last_summary)
    241     else:
    242         merged_summary = last_summary
    243 
    244     return client_collected_bytes, merged_summary, summary_files
    245 
    246 
    247 def _throttle_results(summary, max_result_size_KB):
    248     """Throttle the test results by limiting to the given maximum size.
    249 
    250     @param summary: A ResultInfo object containing result summary.
    251     @param max_result_size_KB: Maximum test result size in KB.
    252     """
    253     if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
    254         utils_lib.LOG(
    255                 'Result size is %s, which is less than %d KB. No need to '
    256                 'throttle.' %
    257                 (utils_lib.get_size_string(summary.trimmed_size),
    258                  max_result_size_KB))
    259         return
    260 
    261     args = {'summary': summary,
    262             'max_result_size_KB': max_result_size_KB}
    263     args_skip_autotest_log = copy.copy(args)
    264     args_skip_autotest_log['skip_autotest_log'] = True
    265     # Apply the throttlers in following order.
    266     throttlers = [
    267             (shrink_file_throttler, copy.copy(args_skip_autotest_log)),
    268             (zip_file_throttler, copy.copy(args_skip_autotest_log)),
    269             (shrink_file_throttler, copy.copy(args)),
    270             (dedupe_file_throttler, copy.copy(args)),
    271             (zip_file_throttler, copy.copy(args)),
    272             ]
    273 
    274     # Add another zip_file_throttler to compress the files being shrunk.
    275     # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of
    276     # shrink_file_throttler.
    277     new_args = copy.copy(args)
    278     new_args['file_size_threshold_byte'] = 50 * 1024
    279     throttlers.append((zip_file_throttler, new_args))
    280 
    281     # If the above throttlers still can't reduce the result size to be under
    282     # max_result_size_KB, try to delete files with various threshold, starting
    283     # at 5MB then lowering to 100KB.
    284     delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024]
    285     # Try to keep tgz files first.
    286     exclude_file_patterns = ['.*\.tgz']
    287     for threshold in delete_file_thresholds:
    288         new_args = copy.copy(args)
    289         new_args.update({'file_size_threshold_byte': threshold,
    290                          'exclude_file_patterns': exclude_file_patterns})
    291         throttlers.append((delete_file_throttler, new_args))
    292     # Add one more delete_file_throttler to not skipping tgz files.
    293     new_args = copy.copy(args)
    294     new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]})
    295     throttlers.append((delete_file_throttler, new_args))
    296 
    297     # Run the throttlers in order until result size is under max_result_size_KB.
    298     old_size = summary.trimmed_size
    299     for throttler, args in throttlers:
    300         try:
    301             args_without_summary = copy.copy(args)
    302             del args_without_summary['summary']
    303             utils_lib.LOG('Applying throttler %s, args: %s' %
    304                           (throttler.__name__, args_without_summary))
    305             throttler.throttle(**args)
    306             if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
    307                 return
    308         except:
    309             utils_lib.LOG('Failed to apply throttler %s. Exception: %s' %
    310                           (throttler, traceback.format_exc()))
    311         finally:
    312             new_size = summary.trimmed_size
    313             if new_size == old_size:
    314                 utils_lib.LOG('Result size was not changed: %s.' % old_size)
    315             else:
    316                 utils_lib.LOG('Result size was reduced from %s to %s.' %
    317                               (utils_lib.get_size_string(old_size),
    318                                utils_lib.get_size_string(new_size)))
    319 
    320 
    321 def _setup_logging():
    322     """Set up logging to direct logs to stdout."""
    323     # Direct logging to stdout
    324     logger = logging.getLogger()
    325     logger.setLevel(logging.DEBUG)
    326     handler = logging.StreamHandler(sys.stdout)
    327     handler.setLevel(logging.DEBUG)
    328     formatter = logging.Formatter('%(asctime)s %(message)s')
    329     handler.setFormatter(formatter)
    330     logger.handlers = []
    331     logger.addHandler(handler)
    332 
    333 
    334 def _parse_options():
    335     """Options for the main script.
    336 
    337     @return: An option object container arg values.
    338     """
    339     parser = argparse.ArgumentParser()
    340     parser.add_argument('-p', type=str, dest='path',
    341                         help='Path to build directory summary.')
    342     parser.add_argument('-m', type=int, dest='max_size_KB', default=0,
    343                         help='Maximum result size in KB. Set to 0 to disable '
    344                         'result throttling.')
    345     parser.add_argument('-d', action='store_true', dest='delete_summaries',
    346                         default=False,
    347                         help='-d to delete all result summary files in the '
    348                         'given path.')
    349     return parser.parse_args()
    350 
    351 
    352 def execute(path, max_size_KB):
    353     """Execute the script with given arguments.
    354 
    355     @param path: Path to build directory summary.
    356     @param max_size_KB: Maximum result size in KB.
    357     """
    358     utils_lib.LOG('Running result_tools/utils on path: %s' % path)
    359     if max_size_KB > 0:
    360         utils_lib.LOG('Throttle result size to : %s' %
    361                       utils_lib.get_size_string(max_size_KB * 1024))
    362 
    363     result_dir = path
    364     if not os.path.isdir(result_dir):
    365         result_dir = os.path.dirname(result_dir)
    366     summary = result_info.ResultInfo.build_from_path(path)
    367     summary_json = json.dumps(summary)
    368     summary_file = get_unique_dir_summary_file(result_dir)
    369 
    370     # Make sure there is enough free disk to write the file
    371     stat = os.statvfs(path)
    372     free_space = stat.f_frsize * stat.f_bavail
    373     if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
    374         raise utils_lib.NotEnoughDiskError(
    375                 'Not enough disk space after saving the summary file. '
    376                 'Available free disk: %s bytes. Summary file size: %s bytes.' %
    377                 (free_space, len(summary_json)))
    378 
    379     with open(summary_file, 'w') as f:
    380         f.write(summary_json)
    381     utils_lib.LOG('Directory summary of %s is saved to file %s.' %
    382                   (path, summary_file))
    383 
    384     if max_size_KB > 0 and summary.trimmed_size > 0:
    385         old_size = summary.trimmed_size
    386         throttle_probability = float(max_size_KB * 1024) / old_size
    387         if random.random() < throttle_probability:
    388             utils_lib.LOG(
    389                     'Skip throttling %s: size=%s, throttle_probability=%s' %
    390                     (path, old_size, throttle_probability))
    391         else:
    392             _throttle_results(summary, max_size_KB)
    393             if summary.trimmed_size < old_size:
    394                 # Files are throttled, save the updated summary file.
    395                 utils_lib.LOG('Overwrite the summary file: %s' % summary_file)
    396                 result_info.save_summary(summary, summary_file)
    397 
    398 
    399 def _delete_summaries(path):
    400     """Delete all directory summary files in the given directory.
    401 
    402     This is to cleanup the directory so no summary files are left behind to
    403     affect later tests.
    404 
    405     @param path: Path to cleanup directory summary.
    406     """
    407     # Only summary files directly under the `path` needs to be cleaned.
    408     summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN))
    409     for summary in summary_files:
    410         try:
    411             os.remove(summary)
    412         except IOError as e:
    413             utils_lib.LOG('Failed to delete summary: %s. Error: %s' %
    414                           (summary, e))
    415 
    416 
    417 def main():
    418     """main script. """
    419     _setup_logging()
    420     options = _parse_options()
    421     if options.delete_summaries:
    422         _delete_summaries(options.path)
    423     else:
    424         execute(options.path, options.max_size_KB)
    425 
    426 
    427 if __name__ == '__main__':
    428     main()
    429