Home | History | Annotate | Download | only in site_utils
      1 import abc
      2 import datetime
      3 import glob
      4 import json
      5 import os
      6 import re
      7 import shutil
      8 
      9 import common
     10 from autotest_lib.client.common_lib import time_utils
     11 from autotest_lib.client.common_lib import utils
     12 from autotest_lib.server.cros.dynamic_suite import constants
     13 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     14 
     15 
     16 _AFE = frontend_wrappers.RetryingAFE()
     17 
     18 SPECIAL_TASK_PATTERN = '.*/hosts/[^/]+/(\d+)-[^/]+'
     19 JOB_PATTERN = '.*/(\d+)-[^/]+'
     20 # Pattern of a job folder, e.g., 123-debug_user, where 123 is job id and
     21 # debug_user is the name of user starts the job.
     22 JOB_FOLDER_PATTERN = '.*/(\d+-[^/]+)'
     23 
     24 def is_job_expired(age_limit, timestamp):
     25   """Check whether a job timestamp is older than an age limit.
     26 
     27   @param age_limit: Minimum age, measured in days.  If the value is
     28                     not positive, the job is always expired.
     29   @param timestamp: Timestamp of the job whose age we are checking.
     30                     The format must match time_utils.TIME_FMT.
     31 
     32   @returns True iff the job is old enough to be expired.
     33   """
     34   if age_limit <= 0:
     35     return True
     36   job_time = time_utils.time_string_to_datetime(timestamp)
     37   expiration = job_time + datetime.timedelta(days=age_limit)
     38   return datetime.datetime.now() >= expiration
     39 
     40 
     41 def get_job_id_or_task_id(result_dir):
     42     """Extract job id or special task id from result_dir
     43 
     44     @param result_dir: path to the result dir.
     45             For test job:
     46             /usr/local/autotest/results/2032-chromeos-test/chromeos1-rack5-host6
     47             The hostname at the end is optional.
     48             For special task:
     49             /usr/local/autotest/results/hosts/chromeos1-rack5-host6/1343-cleanup
     50 
     51     @returns: integer representing the job id or task id. Returns None if fail
     52               to parse job or task id from the result_dir.
     53     """
     54     if not result_dir:
     55         return
     56     result_dir = os.path.abspath(result_dir)
     57     # Result folder for job running inside container has only job id.
     58     ssp_job_pattern = '.*/(\d+)$'
     59     # Try to get the job ID from the last pattern of number-text. This avoids
     60     # issue with path like 123-results/456-debug_user, in which 456 is the real
     61     # job ID.
     62     m_job = re.findall(JOB_PATTERN, result_dir)
     63     if m_job:
     64         return int(m_job[-1])
     65     m_special_task = re.match(SPECIAL_TASK_PATTERN, result_dir)
     66     if m_special_task:
     67         return int(m_special_task.group(1))
     68     m_ssp_job_pattern = re.match(ssp_job_pattern, result_dir)
     69     if m_ssp_job_pattern and utils.is_in_container():
     70         return int(m_ssp_job_pattern.group(1))
     71 
     72 
     73 def get_job_folder_name(result_dir):
     74     """Extract folder name of a job from result_dir.
     75 
     76     @param result_dir: path to the result dir.
     77             For test job:
     78             /usr/local/autotest/results/2032-chromeos-test/chromeos1-rack5-host6
     79             The hostname at the end is optional.
     80             For special task:
     81             /usr/local/autotest/results/hosts/chromeos1-rack5-host6/1343-cleanup
     82 
     83     @returns: The name of the folder of a job. Returns None if fail to parse
     84             the name matching pattern JOB_FOLDER_PATTERN from the result_dir.
     85     """
     86     if not result_dir:
     87         return
     88     m_job = re.findall(JOB_FOLDER_PATTERN, result_dir)
     89     if m_job:
     90         return m_job[-1]
     91 
     92 
     93 class _JobDirectory(object):
     94   """State associated with a job to be offloaded.
     95 
     96   The full life-cycle of a job (including failure events that
     97   normally don't occur) looks like this:
     98    1. The job's results directory is discovered by
     99       `get_job_directories()`, and a job instance is created for it.
    100    2. Calls to `offload()` have no effect so long as the job
    101       isn't complete in the database and the job isn't expired
    102       according to the `age_limit` parameter.
    103    3. Eventually, the job is both finished and expired.  The next
    104       call to `offload()` makes the first attempt to offload the
    105       directory to GS.  Offload is attempted, but fails to complete
    106       (e.g. because of a GS problem).
    107    4. Finally, a call to `offload()` succeeds, and the directory no
    108       longer exists.  Now `is_offloaded()` is true, so the job
    109       instance is deleted, and future failures will not mention this
    110       directory any more.
    111 
    112   Only steps 1. and 4. are guaranteed to occur.  The others depend
    113   on the timing of calls to `offload()`, and on the reliability of
    114   the actual offload process.
    115 
    116   """
    117 
    118   __metaclass__ = abc.ABCMeta
    119 
    120   GLOB_PATTERN = None   # must be redefined in subclass
    121 
    122   def __init__(self, resultsdir):
    123     self.dirname = resultsdir
    124     self._id = get_job_id_or_task_id(resultsdir)
    125     self.offload_count = 0
    126     self.first_offload_start = 0
    127 
    128   @classmethod
    129   def get_job_directories(cls):
    130     """Return a list of directories of jobs that need offloading."""
    131     return [d for d in glob.glob(cls.GLOB_PATTERN) if os.path.isdir(d)]
    132 
    133   @abc.abstractmethod
    134   def get_timestamp_if_finished(self):
    135     """Return this job's timestamp from the database.
    136 
    137     If the database has not marked the job as finished, return
    138     `None`.  Otherwise, return a timestamp for the job.  The
    139     timestamp is to be used to determine expiration in
    140     `is_job_expired()`.
    141 
    142     @return Return `None` if the job is still running; otherwise
    143             return a string with a timestamp in the appropriate
    144             format.
    145     """
    146     raise NotImplementedError("_JobDirectory.get_timestamp_if_finished")
    147 
    148   def process_gs_instructions(self):
    149     """Process any gs_offloader instructions for this special task.
    150 
    151     @returns True/False if there is anything left to offload.
    152     """
    153     # Default support is to still offload the directory.
    154     return True
    155 
    156 
    157 NO_OFFLOAD_README = """These results have been deleted rather than offloaded.
    158 This is the expected behavior for passing jobs from the Commit Queue."""
    159 
    160 
    161 class RegularJobDirectory(_JobDirectory):
    162   """Subclass of _JobDirectory for regular test jobs."""
    163 
    164   GLOB_PATTERN = '[0-9]*-*'
    165 
    166   def process_gs_instructions(self):
    167     """Process any gs_offloader instructions for this job.
    168 
    169     @returns True/False if there is anything left to offload.
    170     """
    171     # Go through the gs_offloader instructions file for each test in this job.
    172     for path in glob.glob(os.path.join(self.dirname, '*',
    173                                        constants.GS_OFFLOADER_INSTRUCTIONS)):
    174       with open(path, 'r') as f:
    175         gs_off_instructions = json.load(f)
    176       if gs_off_instructions.get(constants.GS_OFFLOADER_NO_OFFLOAD):
    177         dirname = os.path.dirname(path)
    178         _remove_log_directory_contents(dirname)
    179 
    180     # Finally check if there's anything left to offload.
    181     if not os.listdir(self.dirname):
    182       shutil.rmtree(self.dirname)
    183       return False
    184     return True
    185 
    186 
    187   def get_timestamp_if_finished(self):
    188     """Get the timestamp to use for finished jobs.
    189 
    190     @returns the latest hqe finished_on time. If the finished_on times are null
    191              returns the job's created_on time.
    192     """
    193     entry = _AFE.get_jobs(id=self._id, finished=True)
    194     if not entry:
    195       return None
    196     hqes = _AFE.get_host_queue_entries(finished_on__isnull=False,
    197                                        job_id=self._id)
    198     if not hqes:
    199       return entry[0].created_on
    200     # While most Jobs have 1 HQE, some can have multiple, so check them all.
    201     return max([hqe.finished_on for hqe in hqes])
    202 
    203 
    204 def _remove_log_directory_contents(dirpath):
    205     """Remove log directory contents.
    206 
    207     Leave a note explaining what has happened to the logs.
    208 
    209     @param dirpath: Path to log directory.
    210     """
    211     shutil.rmtree(dirpath)
    212     os.mkdir(dirpath)
    213     breadcrumb_name = os.path.join(dirpath, 'logs-removed-readme.txt')
    214     with open(breadcrumb_name, 'w') as f:
    215       f.write(NO_OFFLOAD_README)
    216 
    217 
    218 class SpecialJobDirectory(_JobDirectory):
    219   """Subclass of _JobDirectory for special (per-host) jobs."""
    220 
    221   GLOB_PATTERN = 'hosts/*/[0-9]*-*'
    222 
    223   def __init__(self, resultsdir):
    224     super(SpecialJobDirectory, self).__init__(resultsdir)
    225 
    226   def get_timestamp_if_finished(self):
    227     entry = _AFE.get_special_tasks(id=self._id, is_complete=True)
    228     return entry[0].time_finished if entry else None
    229