Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 
      3 # Copyright 2016 The Chromium OS Authors. All rights reserved.
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 """
      8 This module is used to upload csv files generated by performance related tests
      9 to cns. More details about the implementation can be found in crbug.com/598504.
     10 
     11 The overall work flow is as follows.
     12 1. Query tko_test_attributes table for perf_csv_folder attribute. The attribute
     13 contains a path to csv files need to be uploaded to cns.
     14 2. Filter the perf_csv_folder attributes only for test jobs have finished an
     15 hour before. This is to make sure the results have already being uploaded to GS.
     16 3. Locate the csv files in GS, and upload them to desired cns location.
     17 
     18 After every run, the script saves the maximum test idx to a local file, and
     19 repeats the workflow.
     20 
     21 """
     22 
     23 import argparse
     24 import datetime
     25 import logging
     26 import os
     27 import shutil
     28 import tempfile
     29 import time
     30 
     31 import common
     32 from autotest_lib.client.bin import utils
     33 from autotest_lib.client.common_lib import logging_config
     34 from autotest_lib.client.common_lib.cros import retry
     35 from autotest_lib.frontend import setup_django_environment
     36 from autotest_lib.frontend.tko import models as tko_models
     37 
     38 
     39 # Number of hours that a test has to be finished for the script to process.
     40 # This allows gs_offloader to have enough time to upload the results to GS.
     41 CUTOFF_TIME_HOURS = 1
     42 
     43 # Default wait time in seconds after each run.
     44 DEFAULT_INTERVAL_SEC = 60
     45 
     46 # Timeout in minutes for upload attempts for a given folder.
     47 UPLOAD_TIMEOUT_MINS = 5
     48 
     49 class CsvNonexistenceException(Exception):
     50     """Exception raised when csv files not found in GS."""
     51 
     52 
     53 class CsvFolder(object):
     54     """A class contains the information of a folder storing csv files to be
     55     uploaded, and logic to upload the csv files.
     56     """
     57 
     58     # A class variable whose value is the GoogleStorage path to the test
     59     # results.
     60     gs_path = None
     61 
     62     # A class variable whose value is the cns path to upload the csv files to.
     63     cns_path = None
     64 
     65     def __init__(self, test_attribute_id, perf_csv_folder, test_view):
     66         """Initialize a CsvFolder object.
     67 
     68         @param test_attribute_id: ID of test attribute record.
     69         @param perf_csv_folder: Path of the folder contains csv files in test
     70                 results. It's the value of perf_csv_folder attribute from
     71                 tko_test_attributes table.
     72         @param test_view: A db object from querying tko_test_view_2 for the
     73                 related tko_test_attributes.
     74         """
     75         self.test_attribute_id = test_attribute_id
     76         self.perf_csv_folder = perf_csv_folder
     77         self.test_view = test_view
     78 
     79 
     80     def __str__(self):
     81         return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag,
     82                              self.perf_csv_folder)
     83 
     84 
     85     def _get_url(self):
     86         """Get the url to the folder storing csv files in GS.
     87 
     88         The url can be formulated based on csv folder, test_name and hostname.
     89         For example:
     90         gs://chromeos-autotest-results/123-chromeos-test/host1/
     91         gsutil is used to download the csv files with this gs url.
     92         """
     93         return os.path.join(self.gs_path, self.test_view.job_tag)
     94 
     95 
     96     def _download(self, dest_dir):
     97         """Download the folder containing csv files to the given dest_dir.
     98 
     99         @param dest_dir: A directory to store the downloaded csv files.
    100 
    101         @return: A list of strings, each is a path to a csv file in the
    102                  downloaded folder.
    103         @raise CsvNonexistenceException: If no csv file found in the GS.
    104         """
    105         gs_url = self._get_url()
    106         # Find all csv files in given GS url recursively
    107         files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' %
    108                           gs_url, ignore_status=True).stdout.strip().split('\n')
    109         if not files or files == ['']:
    110             raise CsvNonexistenceException('No csv file found in %s', gs_url)
    111 
    112         # Copy files from GS to temp_dir
    113         for f in files:
    114             utils.run('gsutil cp %s %s' % (f, dest_dir))
    115 
    116 
    117     @retry.retry(Exception, blacklist=[CsvNonexistenceException],
    118                  timeout_min=UPLOAD_TIMEOUT_MINS)
    119     def upload(self):
    120         """Upload the folder to cns.
    121         """
    122         temp_dir = tempfile.mkdtemp(suffix='perf_csv')
    123         try:
    124             self._download(temp_dir)
    125             files = os.listdir(temp_dir)
    126             # File in cns is stored under folder with format of:
    127             # <test_name>/<host_name>/YYYY/mm/dd/hh/mm
    128             path_in_cns = os.path.join(
    129                     self.cns_path,
    130                     self.test_view.test_name, self.test_view.hostname,
    131                     str(self.test_view.job_finished_time.year),
    132                     str(self.test_view.job_finished_time.month).zfill(2),
    133                     str(self.test_view.job_finished_time.day).zfill(2),
    134                     str(self.test_view.job_finished_time.hour).zfill(2),
    135                     str(self.test_view.job_finished_time.minute).zfill(2))
    136             utils.run('fileutil mkdir -p %s' % path_in_cns)
    137             for f in files:
    138                 utils.run('fileutil copytodir -f %s %s' %
    139                           (os.path.join(temp_dir, f), path_in_cns))
    140         finally:
    141             shutil.rmtree(temp_dir)
    142 
    143 
    144 class DBScanner(object):
    145     """Class contains the logic to query tko_test_attributes table for
    146     new perf_csv_folder attributes and create CsvFolder object for each
    147     new perf_csv_folder attribute.
    148     """
    149 
    150     # Minimum test_attribute id for querying tko_test_attributes table.
    151     min_test_attribute_id = -1
    152 
    153     @classmethod
    154     def get_perf_csv_folders(cls):
    155         """Query tko_test_attributes table for new entries of perf_csv_folder.
    156 
    157         @return: A list of CsvFolder objects for each new entry of
    158                  perf_csv_folder attribute in tko_test_attributes table.
    159         """
    160         attributes = tko_models.TestAttribute.objects.filter(
    161                 attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id)
    162         folders = []
    163 
    164         cutoff_time = (datetime.datetime.now() -
    165                        datetime.timedelta(hours=CUTOFF_TIME_HOURS))
    166         for attribute in attributes:
    167             test_views = tko_models.TestView.objects.filter(
    168                     test_idx=attribute.test_id)
    169             if test_views[0].job_finished_time > cutoff_time:
    170                 continue
    171             folders.append(CsvFolder(attribute.id, attribute.value,
    172                                      test_views[0]))
    173         return folders
    174 
    175 
    176 def setup_logging(log_dir):
    177     """Setup logging information.
    178 
    179     @param log_dir: Path to the directory storing logs of this script.
    180     """
    181     config = logging_config.LoggingConfig()
    182     logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log')
    183     config.add_file_handler(file_path=logfile, level=logging.DEBUG)
    184 
    185 
    186 def save_min_test_attribute_id(test_attribute_id_file):
    187     """Save the minimum test attribute id to a cached file.
    188 
    189     @param test_attribute_id_file: Path to the file storing the value of
    190             min_test_attribute_id.
    191     """
    192     with open(test_attribute_id_file, 'w') as f:
    193         return f.write(str(DBScanner.min_test_attribute_id))
    194 
    195 
    196 def get_min_test_attribute_id(test_attribute_id_file):
    197     """Get the minimum test attribute id from a cached file.
    198 
    199     @param test_attribute_id_file: Path to the file storing the value of
    200             min_test_attribute_id.
    201     """
    202     try:
    203         with open(test_attribute_id_file, 'r') as f:
    204             return int(f.read())
    205     except IOError:
    206         # min_test_attribute_id has not been set, default to -1.
    207         return -1
    208 
    209 
    210 def get_options():
    211     """Get the command line options.
    212 
    213     @return: Command line options of the script.
    214     """
    215     parser = argparse.ArgumentParser()
    216     parser.add_argument('--gs_path', type=str, dest='gs_path',
    217                         help='GoogleStorage path that stores test results.')
    218     parser.add_argument('--cns_path', type=str, dest='cns_path',
    219                         help='cns path to where csv files are uploaded to.')
    220     parser.add_argument('--log_dir', type=str, dest='log_dir',
    221                         help='Directory used to store logs.')
    222 
    223     options = parser.parse_args()
    224     CsvFolder.gs_path = options.gs_path
    225     CsvFolder.cns_path = options.cns_path
    226 
    227     return options
    228 
    229 
    230 def main():
    231     """Main process to repeat the workflow of searching/uploading csv files.
    232     """
    233     options = get_options()
    234     setup_logging(options.log_dir)
    235     test_attribute_id_file = os.path.join(options.log_dir,
    236                                           'perf_csv_uploader_test_attr_id')
    237     DBScanner.min_test_attribute_id = get_min_test_attribute_id(
    238             test_attribute_id_file)
    239 
    240     while True:
    241         folders = DBScanner.get_perf_csv_folders()
    242         if not folders:
    243             logging.info('No new folders found. Wait...')
    244             time.sleep(DEFAULT_INTERVAL_SEC)
    245             continue
    246 
    247         failed_folders = []
    248         for folder in folders:
    249             try:
    250                 logging.info('Uploading folder: %s', folder)
    251                 folder.upload()
    252             except CsvNonexistenceException:
    253                 # Ignore the failure if CSV files are not found in GS.
    254                 pass
    255             except Exception as e:
    256                 failed_folders.append(folder)
    257                 logging.error('Failed to upload folder %s, error: %s',
    258                               folder, e)
    259         if failed_folders:
    260             # Set the min_test_attribute_id to be the smallest one that failed
    261             # to upload.
    262             min_test_attribute_id = min([folder.test_attribute_id for folder in
    263                                          failed_folders])
    264         else:
    265             min_test_attribute_id = max([folder.test_attribute_id for folder in
    266                                          folders]) + 1
    267         if DBScanner.min_test_attribute_id != min_test_attribute_id:
    268             DBScanner.min_test_attribute_id = min_test_attribute_id
    269             save_min_test_attribute_id(test_attribute_id_file)
    270 
    271 
    272 if __name__ == '__main__':
    273     main()
    274