1 #!/usr/bin/env python 2 3 """ 4 This script prints out a csv file of `suite,test,path/to/control.file` where 5 each row is a test that has failed every time that it ran for the past N days, 6 where N is that one constant lower in this file. 7 8 You run it like this 9 10 ./always_failing_tests.py | tee output 11 12 But please note that since we're using the models to do queries, you'll probably 13 need to move your local shadow config out of the way before you run this script 14 so that you point at prod. 15 """ 16 17 import time 18 import hashlib 19 import re 20 import datetime 21 import sys 22 23 import common 24 from autotest_lib.frontend import setup_django_readonly_environment 25 26 # Django and the models are only setup after 27 # the setup_django_readonly_environment module is imported. 28 from autotest_lib.frontend.tko import models as tko_models 29 from autotest_lib.frontend.afe import models as afe_models 30 from autotest_lib.server.cros.dynamic_suite import suite 31 32 33 _DAYS_NOT_RUNNING_CUTOFF = 30 34 35 36 def md5(s): 37 m = hashlib.md5() 38 m.update(s) 39 return m.hexdigest() 40 41 42 def main(): 43 cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF) 44 cutoff_date = datetime.datetime.today() - cutoff_delta 45 statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()} 46 now = time.time() 47 48 tests = tko_models.Test.objects.select_related('job' 49 ).filter(started_time__gte=cutoff_date 50 ).exclude(test__icontains='/' 51 ).exclude(test__icontains='_JOB' 52 ).exclude(test='provision' 53 ).exclude(test__icontains='try_new_image') 54 tests = list(tests) 55 # These prints are vague profiling work. We're handling a lot of data, so I 56 # had to dump some decent work into making sure things chug along at a 57 # decent speed. 58 print "DB: %d -- len=%d" % (time.time()-now, len(tests)) 59 60 def only_failures(d, t): 61 word = statuses[t.status_id] 62 if word == 'TEST_NA': 63 return d 64 if word == 'GOOD' or word == 'WARN': 65 passed = True 66 else: 67 passed = False 68 d[t.test] = d.get(t.test, False) or passed 69 return d 70 dct = reduce(only_failures, tests, {}) 71 print "OF: %d -- len=%d" % (time.time()-now, len(dct)) 72 73 all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests) 74 print "AF: %d -- len=%d" % (time.time()-now, len(all_fail)) 75 76 hash_to_file = {} 77 fs_getter = suite.Suite.create_fs_getter(common.autotest_dir) 78 for control_file in fs_getter.get_control_file_list(): 79 with open(control_file, 'rb') as f: 80 h = md5(f.read()) 81 hash_to_file[h] = control_file.replace(common.autotest_dir, '')\ 82 .lstrip('/') 83 print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file)) 84 85 afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail)) 86 afe_jobs = afe_models.Job.objects.select_related('parent_job')\ 87 .filter(id__in=afe_job_ids) 88 print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs)) 89 90 job_to_hash = {} 91 for job in afe_jobs: 92 job_to_hash[job.id] = md5(job.control_file) 93 print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash)) 94 95 job_to_suite = {} 96 rgx = re.compile("test_suites/control.(\w+)") 97 for job in afe_jobs: 98 job_id = job.parent_job 99 if not job_id: 100 job_id = job 101 x = rgx.search(job_id.name) 102 if not x: 103 print job_id.name 104 continue 105 job_to_suite[job.id] = x.groups(1)[0] 106 107 def collect_by_suite_name(d, t): 108 s = job_to_suite.get(t.job.afe_job_id, None) 109 d.setdefault((s, t.test), []).append(t) 110 return d 111 by_name = reduce(collect_by_suite_name, all_fail, {}) 112 print "BN: %d -- len=%d" % (time.time()-now, len(by_name)) 113 114 for (s, testname), tests in by_name.iteritems(): 115 for test in tests: 116 h = job_to_hash[test.job.afe_job_id] 117 if h in hash_to_file: 118 print "%s,%s,%s" % (s, testname, hash_to_file[h]) 119 break 120 else: 121 print "%s,%s,?" % (s, testname) 122 123 124 if __name__ == '__main__': 125 sys.exit(main()) 126