1 # Copyright 2017 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """This throttler tries to remove the remove repeated files sharing the same 6 prefix, for example, screenshots or dumps in the same folder. The dedupe logic 7 does not compare the file content, instead, it sorts the files with the same 8 prefix and remove files in the middle part. 9 """ 10 11 import os 12 import re 13 14 import result_info_lib 15 import throttler_lib 16 import utils_lib 17 18 19 # Number of files to keep for the oldest files. 20 OLDEST_FILES_TO_KEEP_COUNT = 2 21 # Number of files to keep for the newest files. 22 NEWEST_FILES_TO_KEEP_COUNT = 1 23 24 # Files with path mathing following patterns should not be deduped. 25 NO_DEDUPE_FILE_PATTERNS = [ 26 'debug/.*', 27 '.*perf.data$', # Performance test data. 28 '.*/debug/.*', 29 '.*dir_summary_\d+.json', 30 ] 31 32 # regex pattern to get the prefix of a file. 33 PREFIX_PATTERN = '([a-zA-Z_-]*).*' 34 35 def _group_by(file_infos, keys): 36 """Group the file infos by the given keys. 37 38 @param file_infos: A list of ResultInfo objects. 39 @param keys: A list of names of the attribute to group the file infos by. 40 @return: A dictionary of grouped_key: [ResultInfo]. 41 """ 42 grouped_infos = {} 43 for info in file_infos: 44 key_values = [] 45 for key in keys: 46 key_values.append(getattr(info, key)) 47 grouped_key = os.sep.join(key_values) 48 if grouped_key not in grouped_infos: 49 grouped_infos[grouped_key] = [] 50 grouped_infos[grouped_key].append(info) 51 return grouped_infos 52 53 54 def _dedupe_files(summary, file_infos, max_result_size_KB): 55 """Delete the given file and update the summary. 56 57 @param summary: A ResultInfo object containing result summary. 58 @param file_infos: A list of ResultInfo objects to be de-duplicated. 59 @param max_result_size_KB: Maximum test result size in KB. 60 """ 61 # Sort file infos based on the modify date of the file. 62 file_infos.sort( 63 key=lambda f: result_info_lib.get_last_modification_time(f.path)) 64 file_infos_to_delete = file_infos[ 65 OLDEST_FILES_TO_KEEP_COUNT:-NEWEST_FILES_TO_KEEP_COUNT] 66 67 for file_info in file_infos_to_delete: 68 if throttler_lib.try_delete_file_on_disk(file_info.path): 69 file_info.trimmed_size = 0 70 71 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 72 return 73 74 75 def throttle(summary, max_result_size_KB): 76 """Throttle the files in summary by de-duplicating files. 77 78 Stop throttling until all files are processed or the result size is already 79 reduced to be under the given max_result_size_KB. 80 81 @param summary: A ResultInfo object containing result summary. 82 @param max_result_size_KB: Maximum test result size in KB. 83 """ 84 _, grouped_files = throttler_lib.sort_result_files(summary) 85 for pattern in throttler_lib.RESULT_THROTTLE_PRIORITY: 86 throttable_files = list(throttler_lib.get_throttleable_files( 87 grouped_files[pattern], NO_DEDUPE_FILE_PATTERNS)) 88 89 for info in throttable_files: 90 info.parent_dir = os.path.dirname(info.path) 91 info.prefix = re.match(PREFIX_PATTERN, info.name).group(1) 92 93 # Group files for each parent directory 94 grouped_infos = _group_by(throttable_files, ['parent_dir', 'prefix']) 95 96 for infos in grouped_infos.values(): 97 if (len(infos) <= 98 OLDEST_FILES_TO_KEEP_COUNT + NEWEST_FILES_TO_KEEP_COUNT): 99 # No need to dedupe if the count of file is too few. 100 continue 101 102 # Remove files can be deduped 103 utils_lib.LOG('De-duplicating files in %s with the same prefix of ' 104 '"%s"' % (infos[0].parent_dir, infos[0].prefix)) 105 #dedupe_file_infos = [i.result_info for i in infos] 106 _dedupe_files(summary, infos, max_result_size_KB) 107 108 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 109 return 110