Home | History | Annotate | Download | only in contrib
      1 #!/usr/bin/python -u
      2 
      3 """
      4 A script to help find the last few jobs that ran on a set of hosts that match
      5 the specified query, and rank them according to frequence across these hosts.
      6 Usage:
      7 1. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are
      8     currently in repair fail:
      9     ./sheriff_host_utils --days_back=1
     10     --query 'labels=pool:suites,board:lumpy status="Repair Failed"'
     11 
     12 2. Email someone about the last 5 jobs on all Repair Failed hosts.
     13     ./sheriff_host_utils --limit 5 --query 'status="Repair Failed"'
     14             --email someone (at] something.com
     15 """
     16 
     17 import argparse
     18 import collections
     19 import datetime
     20 import operator
     21 import shlex
     22 import sys
     23 
     24 import common
     25 
     26 from autotest_lib.client.common_lib import mail
     27 from autotest_lib.frontend import setup_django_environment
     28 from autotest_lib.frontend.afe import models
     29 from autotest_lib.server import frontend
     30 from autotest_lib.server.cros import repair_utils
     31 from django.utils import timezone as django_timezone
     32 
     33 
     34 def _parse_args(args):
     35     description=('./sheriff_host_utils.py --limit 5 --days_back 5 '
     36                  '--query \'status="Repair Failed" invalid=0 locked=0\'')
     37     if not args:
     38         print ('Too few arguments, execute %s, or try '
     39                './sheriff_host_utils.py --help' % description)
     40         sys.exit(1)
     41 
     42     parser = argparse.ArgumentParser(description=description)
     43     parser.add_argument('--limit', default=5,
     44                         help='The number of jobs per host.Eg: --limit 5')
     45     parser.add_argument('--days_back', default=5,
     46                         help='Number of days to search. Eg: --days_back 5')
     47     default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy'
     48     parser.add_argument('--query', default=default_query,
     49                         help='Search query.Eg: --query %s' % default_query)
     50     parser.add_argument('--email', default=None, help='send results to email.')
     51     return parser.parse_args(args)
     52 
     53 
     54 def _parse_query(query):
     55     """Parses query string for a host.
     56 
     57     All queries follow the format: 'key=value key2=value..' where all keys are
     58     are columns of the host table with the exception of labels. When specifying
     59     labels, the format is the same even though a label is a foreign key:
     60     --query 'lable=<comma seperated list of label names>'.
     61 
     62     @return: A dictionary into which the query has been parsed.
     63     """
     64     l = shlex.split(query)
     65     keys = [elem[:elem.find('=')] for elem in l]
     66     values = [elem[elem.find('=')+1:] for elem in l]
     67     payload = dict(zip(keys, values))
     68     return payload
     69 
     70 
     71 def _get_pool(host):
     72     """Returns the pool of a host.
     73     """
     74     labels = host.labels.all()
     75     for label_name in [label.name for label in labels]:
     76         if 'pool' in label_name:
     77             return label_name
     78 
     79 
     80 def retrieve_hosts(payload):
     81     """Retrieve hosts matching the payload.
     82 
     83     @param payload: A dict with selection criteria for hosts.
     84 
     85     @return: A queryset of hosts matching the payload.
     86     """
     87     # Replace label names with a foreign key query.
     88     query_hosts = models.Host.objects.all()
     89     if 'labels' in payload:
     90         for label in payload['labels'].split(','):
     91             query_hosts = query_hosts.filter(labels__name=label)
     92         del payload['labels']
     93     return query_hosts.filter(**payload)
     94 
     95 
     96 def analyze_jobs(hqes):
     97     """Perform some aggregation on the jobs that ran on matching hosts.
     98 
     99     @return: A string with the results of the analysis.
    100     """
    101     names = [hqe.job.name for hqe in hqes]
    102     ranking = collections.Counter([name[name.rfind('/')+1:] for name in names])
    103     sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1))
    104     m = 'Ranking tests that ran on those hosts by frequency: \n\t'
    105     for job_stat in reversed(sorted_rankings):
    106         m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0])
    107     return m
    108 
    109 
    110 def last_jobs_on_hosts(payload, limit_jobs, days_back):
    111     """Find the last limit_jobs on hosts with given status within days_back.
    112 
    113     @param payload: A dictionary specifiying the selection criteria of the hosts.
    114         Eg {'stauts': "Ready", 'id': 40}
    115     @param limit_jobs: The number of jobs per host.
    116     @param days_back: The days back to search for jobs.
    117 
    118     @retrurn: A string with information about the last jobs that ran on all
    119         hosts matching the query mentioned in the payload.
    120     """
    121     host_map = {}
    122     pool_less, job_less, jobs_to_analyze  = [], [], []
    123     hqes = models.HostQueueEntry.objects.all()
    124     cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back)
    125     message = ''
    126 
    127     for host in retrieve_hosts(payload):
    128         pool = _get_pool(host)
    129         if not pool:
    130             pool_less.append(host.hostname)
    131             continue
    132         relevent_hqes = list(hqes.filter(host_id=host.id,
    133                 started_on__gte=cutoff).order_by('-started_on')[:limit_jobs])
    134         if relevent_hqes:
    135             jobs = ['name: %s, id: %s' %
    136                     (hqe.job.name, hqe.job_id) for hqe in relevent_hqes]
    137             message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs))
    138             jobs_to_analyze += relevent_hqes
    139         else:
    140             job_less.append(host.hostname)
    141 
    142     if job_less:
    143         message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' %
    144                     cutoff)
    145         message += '\n\t'.join(job_less)
    146     if pool_less:
    147         message += '%s%s' % ('\nNo pools found on the following hosts:',
    148                             '\n\t'.join(pool_less))
    149     if jobs_to_analyze:
    150         message += '\n\n%s' % analyze_jobs(jobs_to_analyze)
    151 
    152     if message:
    153         return '%s\n%s' % ('Host information:', message)
    154     return 'No hosts matching query %s from %s days back' % (payload, days_back)
    155 
    156 
    157 if __name__ == '__main__':
    158     args = _parse_args(sys.argv[1:])
    159     message = last_jobs_on_hosts(_parse_query(args.query),
    160                                  int(args.limit), int(args.days_back))
    161     if args.email:
    162         mail.send('', args.email, '',
    163                   'Results from your sheirff script.', message)
    164     print message
    165