Home | History | Annotate | Download | only in lib
      1 # Copyright 2016 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Services relating to generating a suite timeline and report."""
      6 
      7 from __future__ import print_function
      8 
      9 import common
     10 import datetime
     11 import json
     12 
     13 from autotest_lib.client.common_lib import time_utils
     14 from autotest_lib.server import frontend
     15 from autotest_lib.server.lib import status_history
     16 from chromite.lib import cros_logging as logging
     17 
     18 
     19 HostJobHistory = status_history.HostJobHistory
     20 
     21 # TODO: Handle other statuses like infra failures.
     22 TKO_STATUS_MAP = {
     23     'ERROR': 'fail',
     24     'FAIL': 'fail',
     25     'GOOD': 'pass',
     26     'PASS': 'pass',
     27     'ABORT': 'aborted',
     28     'Failed': 'fail',
     29     'Completed': 'pass',
     30     'Aborted': 'aborted',
     31 }
     32 
     33 
     34 # Default suite timeout in seconds
     35 DEFAULT_SUITE_TIMEOUT = 90 * 60
     36 
     37 
     38 def to_epoch_time_int(value):
     39     """Convert the given value to epoch time int.
     40 
     41     @returns: epoch time in integer."""
     42     return int(time_utils.to_epoch_time(value))
     43 
     44 
     45 def parse_tko_status_string(status_string):
     46     """Parse a status string from TKO or the HQE databases.
     47 
     48     @param status_string: A status string from TKO or HQE databases.
     49 
     50     @return A status string suitable for inclusion within Cloud Datastore.
     51     """
     52     return TKO_STATUS_MAP.get(status_string, 'unknown:' + status_string)
     53 
     54 
     55 def make_entry(entry_id, name, status, start_time,
     56                finish_time=None, parent=None):
     57     """Generate an event log entry to be stored in Cloud Datastore.
     58 
     59     @param entry_id: A (Kind, id) tuple representing the key.
     60     @param name: A string identifying the event
     61     @param status: A string identifying the status of the event.
     62     @param start_time: A unix timestamp of the start of the event.
     63     @param finish_time: A unix timestamp of the finish of the event.
     64     @param parent: A (Kind, id) tuple representing the parent key.
     65 
     66     @return A dictionary representing the entry suitable for dumping via JSON.
     67     """
     68     entry = {
     69         'id': entry_id,
     70         'name': name,
     71         'status': status,
     72         'start_time': start_time,
     73     }
     74     if finish_time is not None:
     75         entry['finish_time'] = finish_time
     76     if parent is not None:
     77         entry['parent'] = parent
     78     return entry
     79 
     80 
     81 def find_start_finish_times(statuses):
     82     """Determines the start and finish times for a list of statuses.
     83 
     84     @param statuses: A list of job test statuses.
     85 
     86     @return (start_tme, finish_time) tuple of seconds past epoch.  If either
     87             cannot be determined, None for that time.
     88     """
     89     starts = {to_epoch_time_int(s.test_started_time)
     90               for s in statuses if s.test_started_time != 'None'}
     91     finishes = {to_epoch_time_int(s.test_finished_time)
     92                 for s in statuses if s.test_finished_time != 'None'}
     93     start_time = min(starts) if starts else None
     94     finish_time = max(finishes) if finishes else None
     95     return start_time, finish_time
     96 
     97 
     98 def make_job_entry(tko, job, parent=None, suite_job=False, job_entries=None):
     99     """Generate a Suite or HWTest event log entry.
    100 
    101     @param tko: TKO database handle.
    102     @param job: A frontend.Job to generate an entry for.
    103     @param parent: A (Kind, id) tuple representing the parent key.
    104     @param suite_job: A boolean indicating wheret this represents a suite job.
    105     @param job_entries: A dictionary mapping job id to earlier job entries.
    106 
    107     @return A dictionary representing the entry suitable for dumping via JSON.
    108     """
    109     statuses = tko.get_job_test_statuses_from_db(job.id)
    110     status = 'pass'
    111     dut = None
    112     for s in statuses:
    113         parsed_status = parse_tko_status_string(s.status)
    114         # TODO: Improve this generation of status.
    115         if parsed_status != 'pass':
    116             status = parsed_status
    117         if s.hostname:
    118             dut = s.hostname
    119         if s.test_started_time == 'None' or s.test_finished_time == 'None':
    120             logging.warn('TKO entry for %d missing time: %s' % (job.id, str(s)))
    121     start_time, finish_time = find_start_finish_times(statuses)
    122     entry = make_entry(('Suite' if suite_job else 'HWTest', int(job.id)),
    123                        job.name.split('/')[-1], status, start_time,
    124                        finish_time=finish_time, parent=parent)
    125 
    126     entry['job_id'] = int(job.id)
    127     if dut:
    128         entry['dut'] = dut
    129     if job.shard:
    130         entry['shard'] = job.shard
    131     # Determine the try of this job by looking back through what the
    132     # original job id is.
    133     if 'retry_original_job_id' in job.keyvals:
    134         original_job_id = int(job.keyvals['retry_original_job_id'])
    135         original_job = job_entries.get(original_job_id, None)
    136         if original_job:
    137             entry['try'] = original_job['try'] + 1
    138         else:
    139             entry['try'] = 0
    140     else:
    141         entry['try'] = 1
    142     entry['gs_url'] = status_history.get_job_gs_url(job)
    143     return entry
    144 
    145 
    146 def make_hqe_entry(hostname, hqe, hqe_statuses, parent=None):
    147     """Generate a HQE event log entry.
    148 
    149     @param hostname: A string of the hostname.
    150     @param hqe: A host history to generate an event for.
    151     @param hqe_statuses: A dictionary mapping HQE ids to job status.
    152     @param parent: A (Kind, id) tuple representing the parent key.
    153 
    154     @return A dictionary representing the entry suitable for dumping via JSON.
    155     """
    156     entry = make_entry(
    157         ('HQE', int(hqe.id)), hostname,
    158         hqe_statuses.get(hqe.id, parse_tko_status_string(hqe.job_status)),
    159         hqe.start_time, finish_time=hqe.end_time, parent=parent)
    160 
    161     entry['task_name'] = hqe.name.split('/')[-1]
    162     entry['in_suite'] = hqe.id in hqe_statuses
    163     entry['job_url'] = hqe.job_url
    164     entry['gs_url'] = hqe.gs_url
    165     if hqe.job_id is not None:
    166         entry['job_id'] = hqe.job_id
    167     entry['is_special'] = hqe.is_special
    168     return entry
    169 
    170 
    171 def generate_suite_report(suite_job_id, afe=None, tko=None,
    172                           reset_finish_time=False):
    173     """Generate a list of events corresonding to a single suite job.
    174 
    175     @param suite_job_id: The AFE id of the suite job.
    176     @param afe: AFE database handle.
    177     @param tko: TKO database handle.
    178     @reset_finish_time: Boolean indicating whether to reset the suite finish
    179                         to now.
    180 
    181     @return A list of entries suitable for dumping via JSON.
    182     """
    183     if afe is None:
    184         afe = frontend.AFE()
    185     if tko is None:
    186         tko = frontend.TKO()
    187 
    188     # Retrieve the main suite job.
    189     suite_job = afe.get_jobs(id=suite_job_id)[0]
    190 
    191     suite_entry = make_job_entry(tko, suite_job, suite_job=True)
    192     entries = [suite_entry]
    193 
    194     # Retrieve the child jobs and cache all their statuses
    195     logging.debug('Fetching child jobs...')
    196     child_jobs = afe.get_jobs(parent_job_id=suite_job_id)
    197     logging.debug('... fetched %s child jobs.' % len(child_jobs))
    198     job_statuses = {}
    199     job_entries = {}
    200     for j in child_jobs:
    201         job_entry = make_job_entry(tko, j, suite_entry['id'],
    202                                    job_entries=job_entries)
    203         entries.append(job_entry)
    204         job_statuses[j.id] = job_entry['status']
    205         job_entries[j.id] = job_entry
    206 
    207     # Retrieve the HQEs from all the child jobs, record statuses from
    208     # job statuses.
    209     child_job_ids = {j.id for j in child_jobs}
    210     logging.debug('Fetching HQEs...')
    211     hqes = afe.get_host_queue_entries(job_id__in=list(child_job_ids))
    212     logging.debug('... fetched %s HQEs.' % len(hqes))
    213     hqe_statuses = {h.id: job_statuses.get(h.job.id, None) for h in hqes}
    214 
    215     # Generate list of hosts.
    216     hostnames = {h.host.hostname for h in hqes if h.host}
    217     logging.debug('%s distinct hosts participated in the suite.' %
    218                   len(hostnames))
    219 
    220     suite_start_time = suite_entry.get('start_time')
    221     suite_finish_time = suite_entry.get('finish_time')
    222     # Retrieve histories for the time of the suite for all associated hosts.
    223     # TODO: Include all hosts in the pool.
    224     if suite_start_time and suite_finish_time:
    225 
    226         if reset_finish_time:
    227             suite_timeout_time = suite_start_time + DEFAULT_SUITE_TIMEOUT
    228             current_time = to_epoch_time_int(datetime.datetime.now())
    229             suite_finish_time = min(current_time, suite_timeout_time)
    230 
    231         histories = [HostJobHistory.get_host_history(afe, hostname,
    232                                                      suite_start_time,
    233                                                      suite_finish_time)
    234                      for hostname in sorted(hostnames)]
    235 
    236         for history in histories:
    237             entries.extend(make_hqe_entry(history.hostname, h, hqe_statuses,
    238                                           suite_entry['id']) for h in history)
    239 
    240     return entries
    241 
    242 def dump_entries_as_json(entries, output_file):
    243     """Dump event log entries as json to a file.
    244 
    245     @param entries: A list of event log entries to dump.
    246     @param output_file: The file to write to.
    247     """
    248     # Write the entries out as JSON.
    249     logging.debug('Dumping %d entries' % len(entries))
    250     for e in entries:
    251         json.dump(e, output_file, sort_keys=True)
    252         output_file.write('\n')
    253