Home | History | Annotate | Download | only in result_tools
      1 # Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import os
      6 import re
      7 
      8 import throttler_lib
      9 import utils_lib
     10 
     11 
     12 # File extensions that can not be shrunk., as partial content will corrupt the
     13 # file.
     14 UNSHRINKABLE_EXTENSIONS = set([
     15         '.bin',
     16         '.data',
     17         '.dmp',
     18         '.gz',
     19         '.htm',
     20         '.html',
     21         '.img',
     22         '.journal',
     23         '.jpg',
     24         '.json',
     25         '.png',
     26         '.tar',
     27         '.tgz',
     28         '.xml',
     29         '.xz',
     30         '.zip',
     31         ])
     32 
     33 # Regex for files that should not be shrunk.
     34 UNSHRINKABLE_FILE_PATTERNS = [
     35         ]
     36 
     37 TRIMMED_FILE_HEADER = '!!! This file is trimmed !!!\n'
     38 ORIGINAL_SIZE_TEMPLATE = 'Original size: %d bytes\n\n'
     39 # Regex pattern to retrieve the original size of the file.
     40 ORIGINAL_SIZE_REGEX = 'Original size: (\d+) bytes'
     41 TRIMMED_FILE_INJECT_TEMPLATE = """
     42 
     43 ========================================================================
     44   < %d > characters are trimmed here.
     45 ========================================================================
     46 
     47 """
     48 
     49 # Percent of file content to keep at the beginning and end of the file, default
     50 # to 20%.
     51 HEAD_SIZE_PERCENT = 0.20
     52 
     53 # Default size in byte to trim the file down to.
     54 DEFAULT_FILE_SIZE_LIMIT_BYTE = 100 * 1024
     55 
     56 def _trim_file(file_info, file_size_limit_byte):
     57     """Remove the file content in the middle to reduce the file size.
     58 
     59     @param file_info: A ResultInfo object containing summary for the file to be
     60             shrunk.
     61     @param file_size_limit_byte: Maximum file size in bytes after trimming.
     62     """
     63     utils_lib.LOG('Trimming file %s to reduce size from %d bytes to %d bytes' %
     64                   (file_info.path, file_info.original_size,
     65                    file_size_limit_byte))
     66     new_path = os.path.join(os.path.dirname(file_info.path),
     67                             file_info.name + '_trimmed')
     68     original_size_bytes = file_info.original_size
     69     with open(new_path, 'w') as new_file, open(file_info.path) as old_file:
     70         # Read the beginning part of the old file, if it's already started with
     71         # TRIMMED_FILE_HEADER, no need to add the header again.
     72         header =  old_file.read(len(TRIMMED_FILE_HEADER))
     73         if header != TRIMMED_FILE_HEADER:
     74             new_file.write(TRIMMED_FILE_HEADER)
     75             new_file.write(ORIGINAL_SIZE_TEMPLATE % file_info.original_size)
     76         else:
     77             line = old_file.readline()
     78             match = re.match(ORIGINAL_SIZE_REGEX, line)
     79             if match:
     80                 original_size_bytes = int(match.group(1))
     81         header_size_bytes = new_file.tell()
     82         # Move old file reader to the beginning of the file.
     83         old_file.seek(0, os.SEEK_SET)
     84 
     85         new_file.write(old_file.read(
     86                 int((file_size_limit_byte - header_size_bytes) *
     87                     HEAD_SIZE_PERCENT)))
     88         # Position to seek from the end of the file.
     89         seek_pos = -(file_size_limit_byte - new_file.tell() -
     90                      len(TRIMMED_FILE_INJECT_TEMPLATE))
     91         bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
     92         # Adjust seek position based on string TRIMMED_FILE_INJECT_TEMPLATE
     93         seek_pos += len(str(bytes_to_skip)) - 2
     94         bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
     95         new_file.write(TRIMMED_FILE_INJECT_TEMPLATE % bytes_to_skip)
     96         old_file.seek(seek_pos, os.SEEK_END)
     97         new_file.write(old_file.read())
     98     stat = os.stat(file_info.path)
     99     if not throttler_lib.try_delete_file_on_disk(file_info.path):
    100         # Clean up the intermediate file.
    101         throttler_lib.try_delete_file_on_disk(new_path)
    102         utils_lib.LOG('Failed to shrink %s' % file_info.path)
    103         return
    104 
    105     os.rename(new_path, file_info.path)
    106     # Modify the new file's timestamp to the old one.
    107     os.utime(file_info.path, (stat.st_atime, stat.st_mtime))
    108     # Update the trimmed_size.
    109     file_info.trimmed_size = file_info.size
    110 
    111 
    112 def _get_shrinkable_files(file_infos, file_size_limit_byte):
    113     """Filter the files that can be throttled.
    114 
    115     @param file_infos: A list of ResultInfo objects.
    116     @param file_size_limit_byte: Minimum file size in bytes to be throttled.
    117     @yield: ResultInfo objects that can be shrunk.
    118     """
    119     for info in file_infos:
    120         ext = os.path.splitext(info.name)[1].lower()
    121         if ext in UNSHRINKABLE_EXTENSIONS:
    122             continue
    123 
    124         match_found = False
    125         for pattern in UNSHRINKABLE_FILE_PATTERNS:
    126             if re.match(pattern, info.name):
    127                 match_found = True
    128                 break
    129         if match_found:
    130             continue
    131 
    132         if info.trimmed_size <= file_size_limit_byte:
    133             continue
    134 
    135         yield info
    136 
    137 
    138 def throttle(summary, max_result_size_KB,
    139              file_size_limit_byte=DEFAULT_FILE_SIZE_LIMIT_BYTE,
    140              skip_autotest_log=False):
    141     """Throttle the files in summary by trimming file content.
    142 
    143     Stop throttling until all files are processed or the result file size is
    144     already reduced to be under the given max_result_size_KB.
    145 
    146     @param summary: A ResultInfo object containing result summary.
    147     @param max_result_size_KB: Maximum test result size in KB.
    148     @param file_size_limit_byte: Limit each file's size in the summary to be
    149             under the given threshold, until all files are processed or the
    150             result size is under the given max_result_size_KB.
    151     @param skip_autotest_log: True to skip shrink Autotest logs, default is
    152             False.
    153     """
    154     file_infos, _ = throttler_lib.sort_result_files(summary)
    155     extra_patterns = ([throttler_lib.AUTOTEST_LOG_PATTERN] if skip_autotest_log
    156                       else [])
    157     file_infos = throttler_lib.get_throttleable_files(
    158             file_infos, extra_patterns)
    159     file_infos = _get_shrinkable_files(file_infos, file_size_limit_byte)
    160     for info in file_infos:
    161         _trim_file(info, file_size_limit_byte)
    162 
    163         if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
    164             return
    165