Home | History | Annotate | Download | only in result_tools
      1 # Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """This throttler tries to remove the remove repeated files sharing the same
      6 prefix, for example, screenshots or dumps in the same folder. The dedupe logic
      7 does not compare the file content, instead, it sorts the files with the same
      8 prefix and remove files in the middle part.
      9 """
     10 
     11 import os
     12 import re
     13 
     14 import result_info_lib
     15 import throttler_lib
     16 import utils_lib
     17 
     18 
     19 # Number of files to keep for the oldest files.
     20 OLDEST_FILES_TO_KEEP_COUNT = 2
     21 # Number of files to keep for the newest files.
     22 NEWEST_FILES_TO_KEEP_COUNT = 1
     23 
     24 # Files with path mathing following patterns should not be deduped.
     25 NO_DEDUPE_FILE_PATTERNS = [
     26         'debug/.*',
     27         '.*perf.data$',       # Performance test data.
     28         '.*/debug/.*',
     29         '.*dir_summary_\d+.json',
     30         ]
     31 
     32 # regex pattern to get the prefix of a file.
     33 PREFIX_PATTERN = '([a-zA-Z_-]*).*'
     34 
     35 def _group_by(file_infos, keys):
     36     """Group the file infos by the given keys.
     37 
     38     @param file_infos: A list of ResultInfo objects.
     39     @param keys: A list of names of the attribute to group the file infos by.
     40     @return: A dictionary of grouped_key: [ResultInfo].
     41     """
     42     grouped_infos = {}
     43     for info in file_infos:
     44         key_values = []
     45         for key in keys:
     46             key_values.append(getattr(info, key))
     47         grouped_key = os.sep.join(key_values)
     48         if grouped_key not in grouped_infos:
     49             grouped_infos[grouped_key] = []
     50         grouped_infos[grouped_key].append(info)
     51     return grouped_infos
     52 
     53 
     54 def _dedupe_files(summary, file_infos, max_result_size_KB):
     55     """Delete the given file and update the summary.
     56 
     57     @param summary: A ResultInfo object containing result summary.
     58     @param file_infos: A list of ResultInfo objects to be de-duplicated.
     59     @param max_result_size_KB: Maximum test result size in KB.
     60     """
     61     # Sort file infos based on the modify date of the file.
     62     file_infos.sort(
     63             key=lambda f: result_info_lib.get_last_modification_time(f.path))
     64     file_infos_to_delete = file_infos[
     65             OLDEST_FILES_TO_KEEP_COUNT:-NEWEST_FILES_TO_KEEP_COUNT]
     66 
     67     for file_info in file_infos_to_delete:
     68         if throttler_lib.try_delete_file_on_disk(file_info.path):
     69             file_info.trimmed_size = 0
     70 
     71             if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
     72                 return
     73 
     74 
     75 def throttle(summary, max_result_size_KB):
     76     """Throttle the files in summary by de-duplicating files.
     77 
     78     Stop throttling until all files are processed or the result size is already
     79     reduced to be under the given max_result_size_KB.
     80 
     81     @param summary: A ResultInfo object containing result summary.
     82     @param max_result_size_KB: Maximum test result size in KB.
     83     """
     84     _, grouped_files = throttler_lib.sort_result_files(summary)
     85     for pattern in throttler_lib.RESULT_THROTTLE_PRIORITY:
     86         throttable_files = list(throttler_lib.get_throttleable_files(
     87                 grouped_files[pattern], NO_DEDUPE_FILE_PATTERNS))
     88 
     89         for info in throttable_files:
     90             info.parent_dir = os.path.dirname(info.path)
     91             info.prefix = re.match(PREFIX_PATTERN, info.name).group(1)
     92 
     93         # Group files for each parent directory
     94         grouped_infos = _group_by(throttable_files, ['parent_dir', 'prefix'])
     95 
     96         for infos in grouped_infos.values():
     97             if (len(infos) <=
     98                 OLDEST_FILES_TO_KEEP_COUNT + NEWEST_FILES_TO_KEEP_COUNT):
     99                 # No need to dedupe if the count of file is too few.
    100                 continue
    101 
    102             # Remove files can be deduped
    103             utils_lib.LOG('De-duplicating files in %s with the same prefix of '
    104                           '"%s"' % (infos[0].parent_dir, infos[0].prefix))
    105             #dedupe_file_infos = [i.result_info for i in infos]
    106             _dedupe_files(summary, infos, max_result_size_KB)
    107 
    108             if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
    109                 return
    110