Home | History | Annotate | Download | only in contrib
      1 #!/usr/bin/env python
      2 
      3 """
      4 This script prints out a csv file of `suite,test,path/to/control.file` where
      5 each row is a test that has failed every time that it ran for the past N days,
      6 where N is that one constant lower in this file.
      7 
      8 You run it like this
      9 
     10   ./always_failing_tests.py | tee output
     11 
     12 But please note that since we're using the models to do queries, you'll probably
     13 need to move your local shadow config out of the way before you run this script
     14 so that you point at prod.
     15 """
     16 
     17 import time
     18 import hashlib
     19 import re
     20 import datetime
     21 import sys
     22 
     23 import common
     24 from autotest_lib.frontend import setup_django_readonly_environment
     25 
     26 # Django and the models are only setup after
     27 # the setup_django_readonly_environment module is imported.
     28 from autotest_lib.frontend.tko import models as tko_models
     29 from autotest_lib.frontend.afe import models as afe_models
     30 from autotest_lib.server.cros.dynamic_suite import suite
     31 
     32 
     33 _DAYS_NOT_RUNNING_CUTOFF = 30
     34 
     35 
     36 def md5(s):
     37   m = hashlib.md5()
     38   m.update(s)
     39   return m.hexdigest()
     40 
     41 
     42 def main():
     43     cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF)
     44     cutoff_date = datetime.datetime.today() - cutoff_delta
     45     statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()}
     46     now = time.time()
     47 
     48     tests = tko_models.Test.objects.select_related('job'
     49             ).filter(started_time__gte=cutoff_date
     50             ).exclude(test__icontains='/'
     51             ).exclude(test__icontains='_JOB'
     52             ).exclude(test='provision'
     53             ).exclude(test__icontains='try_new_image')
     54     tests = list(tests)
     55     # These prints are vague profiling work.  We're handling a lot of data, so I
     56     # had to dump some decent work into making sure things chug along at a
     57     # decent speed.
     58     print "DB: %d -- len=%d" % (time.time()-now, len(tests))
     59 
     60     def only_failures(d, t):
     61       word = statuses[t.status_id]
     62       if word == 'TEST_NA':
     63         return d
     64       if word == 'GOOD' or word == 'WARN':
     65         passed = True
     66       else:
     67         passed = False
     68       d[t.test] = d.get(t.test, False) or passed
     69       return d
     70     dct = reduce(only_failures, tests, {})
     71     print "OF: %d -- len=%d" % (time.time()-now, len(dct))
     72 
     73     all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests)
     74     print "AF: %d -- len=%d" % (time.time()-now, len(all_fail))
     75 
     76     hash_to_file = {}
     77     fs_getter = suite.Suite.create_fs_getter(common.autotest_dir)
     78     for control_file in fs_getter.get_control_file_list():
     79       with open(control_file, 'rb') as f:
     80         h = md5(f.read())
     81         hash_to_file[h] = control_file.replace(common.autotest_dir, '')\
     82                                       .lstrip('/')
     83     print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file))
     84 
     85     afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail))
     86     afe_jobs = afe_models.Job.objects.select_related('parent_job')\
     87                                      .filter(id__in=afe_job_ids)
     88     print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs))
     89 
     90     job_to_hash = {}
     91     for job in afe_jobs:
     92       job_to_hash[job.id] = md5(job.control_file)
     93     print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash))
     94 
     95     job_to_suite = {}
     96     rgx = re.compile("test_suites/control.(\w+)")
     97     for job in afe_jobs:
     98       job_id = job.parent_job
     99       if not job_id:
    100         job_id = job
    101       x = rgx.search(job_id.name)
    102       if not x:
    103         print job_id.name
    104         continue
    105       job_to_suite[job.id] = x.groups(1)[0]
    106 
    107     def collect_by_suite_name(d, t):
    108       s = job_to_suite.get(t.job.afe_job_id, None)
    109       d.setdefault((s, t.test), []).append(t)
    110       return d
    111     by_name = reduce(collect_by_suite_name, all_fail, {})
    112     print "BN: %d -- len=%d" % (time.time()-now, len(by_name))
    113 
    114     for (s, testname), tests in by_name.iteritems():
    115       for test in tests:
    116         h = job_to_hash[test.job.afe_job_id]
    117         if h in hash_to_file:
    118           print "%s,%s,%s" % (s, testname, hash_to_file[h])
    119           break
    120       else:
    121         print "%s,%s,?" % (s, testname)
    122 
    123 
    124 if __name__ == '__main__':
    125     sys.exit(main())
    126