1 #! /usr/bin/python 2 3 # The tool gathers the time used by special tasks/test jobs to a tab-separated 4 # output. 5 # Import the output to google spreadsheet can generate a sheet like this: 6 # https://docs.google.com/a/google.com/spreadsheets/d/ 7 # 1iLPSRAgSVz9rGVusTb2yaHJ88iv0QY3hZI_ZI-RdatI/edit#gid=51630294 8 9 10 import os 11 import argparse 12 from datetime import datetime 13 import re 14 import subprocess 15 import sys 16 import urllib2 17 18 import common 19 from autotest_lib.client.common_lib import global_config 20 from autotest_lib.frontend import setup_django_environment 21 from autotest_lib.frontend.afe import models 22 from autotest_lib.frontend.tko import models as tko_models 23 from autotest_lib.server.cros.dynamic_suite import job_status 24 25 26 CONFIG = global_config.global_config 27 AUTOTEST_SERVER = CONFIG.get_config_value('SERVER', 'hostname', type=str) 28 29 LOG_BASE_URL = 'http://%s/tko/retrieve_logs.cgi?job=/results/' % AUTOTEST_SERVER 30 JOB_URL = LOG_BASE_URL + '%(job_id)s-%(owner)s/%(hostname)s' 31 LOG_PATH_FMT = 'hosts/%(hostname)s/%(task_id)d-%(taskname)s' 32 TASK_URL = LOG_BASE_URL + LOG_PATH_FMT 33 AUTOSERV_DEBUG_LOG = 'debug/autoserv.DEBUG' 34 HYPERLINK = '=HYPERLINK("%s","%0.1f")' 35 36 GS_URI = 'gs://chromeos-autotest-results' 37 38 # A cache of special tasks, hostname:[task] 39 tasks_cache = {} 40 41 def get_debug_log(autoserv_log_url, autoserv_log_path): 42 """Get a list of strings or a stream for autoserv.DEBUG log file. 43 44 @param autoserv_log_url: Url to the repair job's autoserv.DEBUG log. 45 @param autoserv_log_path: Path to autoserv.DEBUG log, e.g., 46 hosts/hostname/job_id-repair/debug/autoserv.DEBUG. 47 @return: A list of string if debug log was moved to GS already, or a 48 stream of the autoserv.DEBUG file. 49 """ 50 url = urllib2.urlopen(autoserv_log_url).geturl() 51 if not 'accounts.google.com' in url: 52 return urllib2.urlopen(url) 53 else: 54 # The file was moved to Google storage already, read the file from GS. 55 debug_log_link = '%s/%s' % (GS_URI, autoserv_log_path) 56 cmd = 'gsutil cat %s' % debug_log_link 57 proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, 58 stderr=subprocess.PIPE) 59 stdout, stderr = proc.communicate() 60 if proc.returncode == 0: 61 return stdout.split(os.linesep) 62 else: 63 print 'Failed to read %s: %s' % (debug_log_link, stderr) 64 65 66 class task_runtime(object): 67 """Details about the task run time. 68 """ 69 def __init__(self, task): 70 """Init task_runtime 71 """ 72 # Special task ID 73 self.id = task.id 74 # AFE special_task model 75 self.task = task 76 self.hostname = task.host.hostname 77 78 # Link to log 79 task_info = {'task_id': task.id, 'taskname': task.task.lower(), 80 'hostname': self.hostname} 81 self.log = TASK_URL % task_info 82 83 autoserv_log_url = '%s/%s' % (self.log, AUTOSERV_DEBUG_LOG) 84 log_path = LOG_PATH_FMT % task_info 85 autoserv_log_path = '%s/%s' % (log_path, AUTOSERV_DEBUG_LOG) 86 debug_log = get_debug_log(autoserv_log_url, autoserv_log_path) 87 lines = [line for line in debug_log if line] 88 # Task start time 89 self.start_time = _get_timestamp(lines[0]) 90 # Task end time 91 self.end_time = _get_timestamp(lines[-1]) 92 # Run time across end of a year. 93 if self.end_time < self.start_time: 94 self.end_time = self.end_time.replace(year=self.end_time.year+1) 95 self.time_used = self.end_time - self.start_time 96 97 # Start/end time from afe_special_tasks table. 98 # To support old special tasks, the value falls back to the ones from 99 # debug log. 100 self.start_time_db = (self.task.time_started if self.task.time_started 101 else self.start_time) 102 self.end_time_db = (self.task.time_finished if self.task.time_finished 103 else self.end_time) 104 self.time_used_db = self.end_time_db - self.start_time_db 105 106 107 class job_runtime(object): 108 """Details about the job run time, including time spent on special tasks. 109 """ 110 def __init__(self, job, suite_start_time=None, suite_end_time=None): 111 """Init job_run_time 112 """ 113 # AFE job ID 114 self.id = job.id 115 # AFE job model 116 self.job = job 117 # Name of the job, strip all build and suite info. 118 self.name = job.name.split('/')[-1] 119 120 try: 121 self.tko_job = tko_models.Job.objects.filter(afe_job_id=self.id)[0] 122 self.host_id = self.tko_job.machine_id 123 self.hostname = self.tko_job.machine.hostname 124 # Job start time 125 self.start_time = self.tko_job.started_time 126 # Job end time 127 self.end_time = self.tko_job.finished_time 128 self.time_used = self.end_time - self.start_time 129 except IndexError: 130 self.tko_job = None 131 self.host_id = 0 132 self.time_used = 0 133 134 # Link to log 135 self.log = JOB_URL % {'job_id': job.id, 'owner': job.owner, 136 'hostname': self.hostname} 137 138 # Special tasks run before job starts. 139 self.tasks_before = [] 140 # Special tasks run after job finished. 141 self.tasks_after = [] 142 143 # If job time used is not calculated, skip locating special tasks. 144 if not self.time_used: 145 return 146 147 # suite job has no special tasks 148 if not self.hostname: 149 return 150 151 tasks = tasks_cache.get(self.hostname, None) 152 if not tasks: 153 tasks = models.SpecialTask.objects.filter( 154 host__hostname=self.hostname, 155 time_started__gte=suite_start_time, 156 time_started__lte=suite_end_time) 157 tasks_cache[self.hostname] = tasks 158 for task in tasks: 159 if not task.queue_entry or task.queue_entry.job_id != self.id: 160 continue 161 t_runtime = task_runtime(task) 162 if task.time_started < self.start_time: 163 self.tasks_before.append(t_runtime) 164 else: 165 self.tasks_after.append(t_runtime) 166 167 168 def get_all_tasks(self): 169 return self.tasks_before + self.tasks_after 170 171 172 def get_first_task_start_time(self): 173 """Get the start time of first task, return test job start time if 174 there is no tasks_before 175 """ 176 start = self.start_time 177 for task in self.tasks_before: 178 if task.start_time_db < start: 179 start = task.start_time_db 180 return start 181 182 183 def get_last_task_end_time(self): 184 """Get the end time of last task, return test job end time if there is 185 no tasks_after. 186 """ 187 end = self.end_time 188 for task in self.tasks_after: 189 if end < task.end_time_db: 190 end = task.end_time_db 191 return end 192 193 194 def get_total_time(self): 195 return (self.get_last_task_end_time() - 196 self.get_first_task_start_time()).total_seconds() 197 198 199 def get_time_on_tasks(self): 200 time = 0 201 for task in self.tasks_before + self.tasks_after: 202 time += task.time_used.total_seconds() 203 return time 204 205 206 def get_time_on_tasks_from_db(self): 207 time = 0 208 for task in self.tasks_before + self.tasks_after: 209 time += task.time_used_db.total_seconds() 210 return time 211 212 213 def get_time_on_wait(self): 214 return (self.get_total_time() - 215 self.get_time_on_tasks() - 216 self.time_used.total_seconds()) 217 218 219 def get_time_on_wait_from_db(self): 220 return (self.get_total_time() - 221 self.get_time_on_tasks_from_db() - 222 self.time_used.total_seconds()) 223 224 225 def get_time_per_task_type(self, task_type): 226 """only used for suite job 227 """ 228 tasks = [] 229 for job in self.test_jobs: 230 tasks += [task for task in job.get_all_tasks() 231 if task.task.task == task_type] 232 if not tasks: 233 return None 234 235 task_min = tasks[0] 236 task_max = tasks[0] 237 total = 0 238 for task in tasks: 239 if task.time_used < task_min.time_used: 240 task_min = task 241 if task.time_used > task_max.time_used: 242 task_max = task 243 total += task.time_used.total_seconds() 244 return (task_min, task_max, total/len(tasks), len(tasks)) 245 246 247 def get_time_per_task_type_from_db(self, task_type): 248 """only used for suite job 249 """ 250 tasks = [] 251 for job in self.test_jobs: 252 tasks += [task for task in job.get_all_tasks() 253 if task.task.task == task_type] 254 if not tasks: 255 return None 256 257 task_min = tasks[0] 258 task_max = tasks[0] 259 total = 0 260 for task in tasks: 261 if task.time_used_db < task_min.time_used_db: 262 task_min = task 263 if task.time_used_db > task_max.time_used_db: 264 task_max = task 265 total += task.time_used_db.total_seconds() 266 return (task_min, task_max, total/len(tasks), len(tasks)) 267 268 269 def _get_timestamp(line): 270 """Get the time from the beginning of a log entry. 271 272 @param line: A line of log entry, e.g., "06/19 19:56:53.602 INFO |" 273 @return: A datetime value of the log entry. 274 """ 275 try: 276 time = re.match( '^(\d\d\/\d\d \d\d:\d\d:\d\d.\d+).*', line).group(1) 277 time = '%d/%s' % (datetime.now().year, time) 278 return datetime.strptime(time, '%Y/%m/%d %H:%M:%S.%f') 279 except: 280 return None 281 282 283 def _parse_args(args): 284 if not args: 285 print 'Try ./contrib/compare_suite.py --jobs 51,52,53' 286 sys.exit(0) 287 parser = argparse.ArgumentParser( 288 description=('A script to compare the performance of multiple suite' 289 ' runs.')) 290 parser.add_argument('--jobs', 291 help='A list of job IDs.') 292 return parser.parse_args(args) 293 294 295 def merge_time_dict(time_dict_list): 296 merged = {} 297 for time_dict in time_dict_list: 298 for key,val in time_dict.items(): 299 time_used = merged.get(key, 0) 300 merged[key] = time_used + val 301 return merged 302 303 304 def print_task_data(all_jobs, time_data): 305 percent_string = delimiter.join( 306 [str(100.0*data[2]*data[3]/suite_job.total_time) 307 if data else '_' for (data, suite_job) in 308 zip(time_data, all_jobs.keys())]) 309 print '%% on %s %s%s' % (task_type, delimiter, percent_string) 310 min_string = delimiter.join( 311 [(HYPERLINK % (data[0].log, data[0].time_used.total_seconds())) 312 if data else '_' for data in time_data]) 313 print ' %s min (seconds)%s%s' % (task_type, delimiter, min_string) 314 max_string = delimiter.join( 315 [HYPERLINK % (data[1].log, data[1].time_used.total_seconds()) 316 if data else '_' for data in time_data]) 317 print ' %s max (seconds)%s%s' % (task_type, delimiter, max_string) 318 ave_string = delimiter.join( 319 [str(data[2]) if data else '_' for data in time_data]) 320 print ' %s average (seconds)%s%s' % (task_type, delimiter, ave_string) 321 322 323 if __name__ == '__main__': 324 args = _parse_args(sys.argv[1:]) 325 print 'Comparing jobs: %s' % args.jobs 326 job_ids = [int(id) for id in args.jobs.split(',')] 327 328 # Make sure all jobs are suite jobs 329 parent_jobs = [job.parent_job_id for job in 330 models.Job.objects.filter(id__in=job_ids)] 331 if any(parent_jobs): 332 print ('Some jobs are not suite job. Please provide a list of suite job' 333 ' IDs.') 334 sys.exit(1) 335 336 # A dictionary of suite_job_runtime:{test_job_name:test_job_runtime} 337 all_jobs = {} 338 for job_id in job_ids: 339 suite_job = models.Job.objects.filter(id=job_id)[0] 340 suite_job_runtime = job_runtime(suite_job) 341 test_jobs = models.Job.objects.filter(parent_job_id=job_id) 342 if len(test_jobs) == 0: 343 print 'No child job found for suite job: %d' % job_id 344 sys.exit(1) 345 test_job_runtimes = [job_runtime(job, suite_job_runtime.start_time, 346 suite_job_runtime.end_time) 347 for job in test_jobs] 348 suite_job_runtime.test_jobs = test_job_runtimes 349 suite_job_runtime.hosts = set([job.host_id 350 for job in test_job_runtimes 351 if job.host_id != 0]) 352 suite_job_runtime.total_time = sum( 353 [job.get_total_time() for job in test_job_runtimes]) 354 suite_job_runtime.total_time_on_tasks = sum( 355 [job.get_time_on_tasks() for job in test_job_runtimes]) 356 suite_job_runtime.total_time_on_tasks_from_db = sum( 357 [job.get_time_on_tasks_from_db() for job in test_job_runtimes]) 358 suite_job_runtime.total_time_on_wait = sum( 359 [job.get_time_on_wait() for job in test_job_runtimes]) 360 suite_job_runtime.total_time_on_wait_from_db = sum( 361 [job.get_time_on_wait_from_db() for job in test_job_runtimes]) 362 suite_job_runtime.percent_on_tasks = 100*( 363 suite_job_runtime.total_time_on_tasks / 364 suite_job_runtime.total_time) 365 suite_job_runtime.percent_on_wait = 100*( 366 suite_job_runtime.total_time_on_wait / 367 suite_job_runtime.total_time) 368 suite_job_runtime.percent_on_tasks_from_db = 100*( 369 suite_job_runtime.total_time_on_tasks_from_db / 370 suite_job_runtime.total_time) 371 suite_job_runtime.percent_on_wait_from_db = 100*( 372 suite_job_runtime.total_time_on_wait_from_db / 373 suite_job_runtime.total_time) 374 all_jobs[suite_job_runtime] = {r.name:r for r in test_job_runtimes} 375 376 delimiter = '\t' 377 # Print a row of suite job IDs. 378 print ('job ID%s' % delimiter + 379 delimiter.join([str(suite_job.id) 380 for suite_job in all_jobs.keys()])) 381 382 # Print a row of platforms, e.g., lumpy-release. 383 print ('platform%s' % delimiter + 384 delimiter.join([suite_job.job.name.split('/')[0] 385 for suite_job in all_jobs.keys()])) 386 387 # Print time to run suite 388 print ('time(mins)%s' % delimiter + 389 delimiter.join([str(suite_job.time_used.total_seconds()/60) 390 for suite_job in all_jobs.keys()])) 391 392 # Print percent of time on tasks 393 print ('%% on special tasks%s' % delimiter + 394 delimiter.join([str(suite_job.percent_on_tasks) 395 for suite_job in all_jobs.keys()])) 396 397 # Print percent of time on tasks based on time data from afe_special_tasks 398 print ('%% on special tasks with data from DB%s' % delimiter + 399 delimiter.join([str(suite_job.percent_on_tasks_from_db) 400 for suite_job in all_jobs.keys()])) 401 402 # Print percent of time on tasks per task type 403 all_task_types = set() 404 for suite_job in all_jobs.keys(): 405 for job in suite_job.test_jobs: 406 all_task_types.update( 407 [task.task.task for task in job.get_all_tasks()]) 408 for task_type in all_task_types: 409 print 'Time data retrieved from debug log.' 410 time_data = [suite_job.get_time_per_task_type(task_type) 411 for suite_job in all_jobs.keys()] 412 print_task_data(all_jobs, time_data) 413 414 print 'Time data retrieved from database' 415 time_data = [suite_job.get_time_per_task_type_from_db(task_type) 416 for suite_job in all_jobs.keys()] 417 print_task_data(all_jobs, time_data) 418 419 count_string = delimiter.join( 420 [str(data[3]) if data else '_' for data in time_data]) 421 print ' %s count%s%s' % (task_type, delimiter, count_string) 422 423 # Print percent of time on wait 424 print ('%% on wait%s' % delimiter + 425 delimiter.join([str(suite_job.percent_on_wait) 426 for suite_job in all_jobs.keys()])) 427 428 # Print percent of time on wait based on special task time data from 429 # database 430 print ('%% on wait based on time data from db%s' % delimiter + 431 delimiter.join([str(suite_job.percent_on_wait_from_db) 432 for suite_job in all_jobs.keys()])) 433 434 # Print the number of duts used for suite. 435 print ('dut #%s' % delimiter + 436 delimiter.join([str(len(suite_job.hosts)) 437 for suite_job in all_jobs.keys()])) 438 439 for test_name in [job for job in all_jobs.values()[0].keys()]: 440 line = '%s%s' % (test_name, delimiter) 441 for suite_job in all_jobs.keys(): 442 test_job = all_jobs[suite_job].get(test_name) 443 if test_job: 444 line += (HYPERLINK % 445 (test_job.log, test_job.time_used.total_seconds()) 446 + delimiter) 447 else: 448 line += '_%s' % delimiter 449 print line 450