1 #!/usr/bin/env python 2 3 # Copyright 2014 The Chromium Authors. All rights reserved. 4 # Use of this source code is governed by a BSD-style license that can be 5 # found in the LICENSE file. 6 7 import argparse 8 import datetime 9 import getpass 10 import json 11 import os 12 import smtplib 13 import sys 14 import time 15 import urllib 16 import urllib2 17 18 class Emailer: 19 DEFAULT_EMAIL_PASSWORD_FILE = '.email_password' 20 GMAIL_SMTP_SERVER = 'smtp.gmail.com:587' 21 SUBJECT = 'Chrome GPU Bots Notification' 22 23 def __init__(self, email_from, email_to, email_password_file): 24 self.email_from = email_from 25 self.email_to = email_to 26 self.email_password = Emailer._getEmailPassword(email_password_file) 27 28 @staticmethod 29 def format_email_body(time_str, offline_str, failed_str, noteworthy_str): 30 return '%s%s%s%s' % (time_str, offline_str, failed_str, noteworthy_str) 31 32 def send_email(self, body): 33 message = 'From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s' % (self.email_from, 34 ','.join(self.email_to), Emailer.SUBJECT, body) 35 36 try: 37 server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER) 38 server.starttls() 39 server.login(self.email_from, self.email_password) 40 server.sendmail(self.email_from, self.email_to, message) 41 server.quit() 42 except Exception as e: 43 print 'Error sending email: %s' % str(e) 44 45 def testEmailLogin(self): 46 server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER) 47 server.starttls() 48 server.login(self.email_from, self.email_password) 49 server.quit() 50 51 @staticmethod 52 def _getEmailPassword(email_password_file): 53 password = '' 54 55 password_file = (email_password_file if email_password_file is not None 56 else Emailer.DEFAULT_EMAIL_PASSWORD_FILE) 57 58 if os.path.isfile(password_file): 59 with open(password_file, 'r') as f: 60 password = f.read().strip() 61 else: 62 password = getpass.getpass( 63 'Please enter email password for source email account: ') 64 65 return password 66 67 class GpuBot: 68 def __init__(self, waterfall_name, bot_name, bot_data): 69 self.waterfall_name = waterfall_name 70 self.bot_name = bot_name 71 self.bot_data = bot_data 72 self._end_time = None 73 self._hours_since_last_run = None 74 self.failure_string = None 75 self.bot_url = None 76 self.build_url = None 77 78 def getEndTime(self): 79 return self._end_time 80 81 def setEndTime(self, end_time): 82 self._end_time = end_time 83 self._hours_since_last_run = \ 84 roughTimeDiffInHours(end_time, time.localtime()) 85 86 def getHoursSinceLastRun(self): 87 return self._hours_since_last_run 88 89 def toDict(self): 90 dict = {'waterfall_name': self.waterfall_name, 'bot_name': self.bot_name} 91 92 if self._end_time is not None: 93 dict['end_time'] = serialTime(self._end_time) 94 dict['hours_since_last_run'] = self._hours_since_last_run 95 96 if self.failure_string is not None: 97 dict['failure_string'] = self.failure_string 98 99 if self.bot_url is not None: 100 dict['bot_url'] = self.bot_url 101 102 if self.build_url is not None: 103 dict['build_url'] = self.build_url 104 105 return dict 106 107 @staticmethod 108 def fromDict(dict): 109 gpu_bot = GpuBot(dict['waterfall_name'], dict['bot_name'], None) 110 111 if 'end_time' in dict: 112 gpu_bot._end_time = unserializeTime(dict['end_time']) 113 114 if 'hours_since_last_run' in dict: 115 gpu_bot._hours_since_last_run = dict['hours_since_last_run'] 116 117 if 'failure_string' in dict: 118 gpu_bot.failure_string = dict['failure_string'] 119 120 if 'bot_url' in dict: 121 gpu_bot.bot_url = dict['bot_url'] 122 123 if 'build_url' in dict: 124 gpu_bot.build_url = dict['build_url'] 125 126 return gpu_bot 127 128 def errorNoMostRecentBuild(waterfall_name, bot_name): 129 print 'No most recent build available: %s::%s' % (waterfall_name, bot_name) 130 131 class Waterfall: 132 BASE_URL = 'http://build.chromium.org/p/' 133 BASE_BUILD_URL = BASE_URL + '%s/builders/%s' 134 SPECIFIC_BUILD_URL = BASE_URL + '%s/builders/%s/builds/%s' 135 BASE_JSON_BUILDERS_URL = BASE_URL + '%s/json/builders' 136 BASE_JSON_BUILDS_URL = BASE_URL + '%s/json/builders/%s/builds' 137 REGULAR_WATERFALLS = ['chromium.gpu', 138 'tryserver.chromium.gpu', 139 'chromium.gpu.fyi'] 140 WEBKIT_GPU_BOTS = ['GPU Win Builder', 141 'GPU Win Builder (dbg)', 142 'GPU Win7 (NVIDIA)', 143 'GPU Win7 (dbg) (NVIDIA)', 144 'GPU Mac Builder', 145 'GPU Mac Builder (dbg)', 146 'GPU Mac10.7', 147 'GPU Mac10.7 (dbg)', 148 'GPU Linux Builder', 149 'GPU Linux Builder (dbg)', 150 'GPU Linux (NVIDIA)', 151 'GPU Linux (dbg) (NVIDIA)'] 152 FILTERED_WATERFALLS = [('chromium.webkit', WEBKIT_GPU_BOTS)] 153 154 @staticmethod 155 def getJsonFromUrl(url): 156 conn = urllib2.urlopen(url) 157 result = conn.read() 158 conn.close() 159 return json.loads(result) 160 161 @staticmethod 162 def getBuildersJsonForWaterfall(waterfall): 163 querystring = '?filter' 164 return (Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s') 165 % (waterfall, querystring))) 166 167 @staticmethod 168 def getLastNBuildsForBuilder(n, waterfall, builder): 169 if n <= 0: 170 return {} 171 172 querystring = '?' 173 174 for i in range(n): 175 querystring += 'select=-%d&' % (i + 1) 176 177 querystring += 'filter' 178 179 return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDS_URL + '%s') % 180 (waterfall, urllib.quote(builder), querystring)) 181 182 @staticmethod 183 def getFilteredBuildersJsonForWaterfall(waterfall, filter): 184 querystring = '?' 185 186 for bot_name in filter: 187 querystring += 'select=%s&' % urllib.quote(bot_name) 188 189 querystring += 'filter' 190 191 return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s') 192 % (waterfall, querystring)) 193 194 @staticmethod 195 def getAllGpuBots(): 196 allbots = {k: Waterfall.getBuildersJsonForWaterfall(k) 197 for k in Waterfall.REGULAR_WATERFALLS} 198 199 filteredbots = {k[0]: 200 Waterfall.getFilteredBuildersJsonForWaterfall(k[0], k[1]) 201 for k in Waterfall.FILTERED_WATERFALLS} 202 203 allbots.update(filteredbots) 204 205 return allbots 206 207 @staticmethod 208 def getOfflineBots(bots): 209 offline_bots = [] 210 211 for waterfall_name in bots: 212 waterfall = bots[waterfall_name] 213 214 for bot_name in waterfall: 215 bot = waterfall[bot_name] 216 217 if bot['state'] != 'offline': 218 continue 219 220 gpu_bot = GpuBot(waterfall_name, bot_name, bot) 221 gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name, 222 urllib.quote(bot_name)) 223 224 most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot( 225 gpu_bot) 226 227 if (most_recent_build and 'times' in most_recent_build and 228 most_recent_build['times']): 229 gpu_bot.setEndTime(time.localtime(most_recent_build['times'][1])) 230 else: 231 errorNoMostRecentBuild(waterfall_name, bot_name) 232 233 offline_bots.append(gpu_bot) 234 235 return offline_bots 236 237 @staticmethod 238 def getMostRecentlyCompletedBuildForBot(bot): 239 if bot.bot_data is not None and 'most_recent_build' in bot.bot_data: 240 return bot.bot_data['most_recent_build'] 241 242 # Unfortunately, the JSON API doesn't provide a "most recent completed 243 # build" call. We just have to get some number of the most recent (including 244 # current, in-progress builds) and give up if that's not enough. 245 NUM_BUILDS = 10 246 builds = Waterfall.getLastNBuildsForBuilder(NUM_BUILDS, bot.waterfall_name, 247 bot.bot_name) 248 249 for i in range(NUM_BUILDS): 250 current_build_name = '-%d' % (i + 1) 251 current_build = builds[current_build_name] 252 253 if 'results' in current_build and current_build['results'] is not None: 254 if bot.bot_data is not None: 255 bot.bot_data['most_recent_build'] = current_build 256 257 return current_build 258 259 return None 260 261 @staticmethod 262 def getFailedBots(bots): 263 failed_bots = [] 264 265 for waterfall_name in bots: 266 waterfall = bots[waterfall_name] 267 268 for bot_name in waterfall: 269 bot = waterfall[bot_name] 270 gpu_bot = GpuBot(waterfall_name, bot_name, bot) 271 gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name, 272 urllib.quote(bot_name)) 273 274 most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot( 275 gpu_bot) 276 277 if (most_recent_build and 'text' in most_recent_build and 278 'failed' in most_recent_build['text']): 279 gpu_bot.failure_string = ' '.join(most_recent_build['text']) 280 gpu_bot.build_url = Waterfall.SPECIFIC_BUILD_URL % (waterfall_name, 281 urllib.quote(bot_name), most_recent_build['number']) 282 failed_bots.append(gpu_bot) 283 elif not most_recent_build: 284 errorNoMostRecentBuild(waterfall_name, bot_name) 285 286 return failed_bots 287 288 def formatTime(t): 289 return time.strftime("%a, %d %b %Y %H:%M:%S", t) 290 291 def roughTimeDiffInHours(t1, t2): 292 datetimes = [] 293 294 for t in [t1, t2]: 295 datetimes.append(datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday, 296 t.tm_hour, t.tm_min, t.tm_sec)) 297 298 datetime_diff = datetimes[0] - datetimes[1] 299 300 hours = float(datetime_diff.total_seconds()) / 3600.0 301 302 return abs(hours) 303 304 def getBotStr(bot): 305 s = ' %s::%s\n' % (bot.waterfall_name, bot.bot_name) 306 307 if bot.failure_string is not None: 308 s += ' failure: %s\n' % bot.failure_string 309 310 if bot.getEndTime() is not None: 311 s += (' last build end time: %s (roughly %f hours ago)\n' % 312 (formatTime(bot.getEndTime()), bot.getHoursSinceLastRun())) 313 314 if bot.bot_url is not None: 315 s += ' bot url: %s\n' % bot.bot_url 316 317 if bot.build_url is not None: 318 s += ' build url: %s\n' % bot.build_url 319 320 s += '\n' 321 return s 322 323 def getBotsStr(bots): 324 s = '' 325 326 for bot in bots: 327 s += getBotStr(bot) 328 329 s += '\n' 330 return s 331 332 def getOfflineBotsStr(offline_bots): 333 return 'Offline bots:\n%s' % getBotsStr(offline_bots) 334 335 def getFailedBotsStr(failed_bots): 336 return 'Failed bots:\n%s' % getBotsStr(failed_bots) 337 338 def getBotDicts(bots): 339 dicts = [] 340 341 for bot in bots: 342 dicts.append(bot.toDict()) 343 344 return dicts 345 346 def unserializeTime(t): 347 return time.struct_time((t['year'], t['mon'], t['day'], t['hour'], t['min'], 348 t['sec'], 0, 0, 0)) 349 350 def serialTime(t): 351 return {'year': t.tm_year, 'mon': t.tm_mon, 'day': t.tm_mday, 352 'hour': t.tm_hour, 'min': t.tm_min, 'sec': t.tm_sec} 353 354 def getSummary(offline_bots, failed_bots): 355 offline_bot_dict = getBotDicts(offline_bots) 356 failed_bot_dict = getBotDicts(failed_bots) 357 return {'offline': offline_bot_dict, 'failed': failed_bot_dict} 358 359 def findBot(name, lst): 360 for bot in lst: 361 if bot.bot_name == name: 362 return bot 363 364 return None 365 366 def getNoteworthyEvents(offline_bots, failed_bots, previous_results): 367 CRITICAL_NUM_HOURS = 1.0 368 369 previous_offline = (previous_results['offline'] if 'offline' 370 in previous_results else []) 371 372 previous_failures = (previous_results['failed'] if 'failed' 373 in previous_results else []) 374 375 noteworthy_offline = [] 376 for bot in offline_bots: 377 if bot.getHoursSinceLastRun() >= CRITICAL_NUM_HOURS: 378 previous_bot = findBot(bot.bot_name, previous_offline) 379 380 if (previous_bot is None or 381 previous_bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS): 382 noteworthy_offline.append(bot) 383 384 noteworthy_new_failures = [] 385 for bot in failed_bots: 386 previous_bot = findBot(bot.bot_name, previous_failures) 387 388 if previous_bot is None: 389 noteworthy_new_failures.append(bot) 390 391 noteworthy_new_offline_recoveries = [] 392 for bot in previous_offline: 393 if bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS: 394 continue 395 396 current_bot = findBot(bot.bot_name, offline_bots) 397 if current_bot is None: 398 noteworthy_new_offline_recoveries.append(bot) 399 400 noteworthy_new_failure_recoveries = [] 401 for bot in previous_failures: 402 current_bot = findBot(bot.bot_name, failed_bots) 403 404 if current_bot is None: 405 noteworthy_new_failure_recoveries.append(bot) 406 407 return {'offline': noteworthy_offline, 'failed': noteworthy_new_failures, 408 'recovered_failures': noteworthy_new_failure_recoveries, 409 'recovered_offline': noteworthy_new_offline_recoveries} 410 411 def getNoteworthyStr(noteworthy_events): 412 s = '' 413 414 if noteworthy_events['offline']: 415 s += 'IMPORTANT bots newly offline for over an hour:\n' 416 417 for bot in noteworthy_events['offline']: 418 s += getBotStr(bot) 419 420 s += '\n' 421 422 if noteworthy_events['failed']: 423 s += 'IMPORTANT new failing bots:\n' 424 425 for bot in noteworthy_events['failed']: 426 s += getBotStr(bot) 427 428 s += '\n' 429 430 if noteworthy_events['recovered_offline']: 431 s += 'IMPORTANT newly recovered previously offline bots:\n' 432 433 for bot in noteworthy_events['recovered_offline']: 434 s += getBotStr(bot) 435 436 s += '\n' 437 438 if noteworthy_events['recovered_failures']: 439 s += 'IMPORTANT newly recovered failing bots:\n' 440 441 for bot in noteworthy_events['recovered_failures']: 442 s += getBotStr(bot) 443 444 s += '\n' 445 446 return s 447 448 def dictsToBots(bots): 449 offline_bots = [] 450 for bot in bots['offline']: 451 offline_bots.append(GpuBot.fromDict(bot)) 452 453 failed_bots = [] 454 for bot in bots['failed']: 455 failed_bots.append(GpuBot.fromDict(bot)) 456 457 return {'offline': offline_bots, 'failed': failed_bots} 458 459 class GpuBotPoller: 460 DEFAULT_PREVIOUS_RESULTS_FILE = '.check_gpu_bots_previous_results' 461 462 def __init__(self, emailer, send_email_for_recovered_offline_bots, 463 send_email_for_recovered_failing_bots, send_email_on_error, 464 previous_results_file): 465 self.emailer = emailer 466 467 self.send_email_for_recovered_offline_bots = \ 468 send_email_for_recovered_offline_bots 469 470 self.send_email_for_recovered_failing_bots = \ 471 send_email_for_recovered_failing_bots 472 473 self.send_email_on_error = send_email_on_error 474 self.previous_results_file = previous_results_file 475 476 def shouldEmail(self, noteworthy_events): 477 if noteworthy_events['offline'] or noteworthy_events['failed']: 478 return True 479 480 if (self.send_email_for_recovered_offline_bots and 481 noteworthy_events['recovered_offline']): 482 return True 483 484 if (self.send_email_for_recovered_failing_bots and 485 noteworthy_events['recovered_failures']): 486 return True 487 488 return False 489 490 def writeResults(self, summary): 491 results_file = (self.previous_results_file 492 if self.previous_results_file is not None 493 else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE) 494 495 with open(results_file, 'w') as f: 496 f.write(json.dumps(summary)) 497 498 def getPreviousResults(self): 499 previous_results_file = (self.previous_results_file 500 if self.previous_results_file is not None 501 else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE) 502 503 previous_results = {} 504 if os.path.isfile(previous_results_file): 505 with open(previous_results_file, 'r') as f: 506 previous_results = dictsToBots(json.loads(f.read())) 507 508 return previous_results 509 510 def checkBots(self): 511 time_str = 'Current time: %s\n\n' % (formatTime(time.localtime())) 512 print time_str 513 514 try: 515 bots = Waterfall.getAllGpuBots() 516 517 offline_bots = Waterfall.getOfflineBots(bots) 518 offline_str = getOfflineBotsStr(offline_bots) 519 print offline_str 520 521 failed_bots = Waterfall.getFailedBots(bots) 522 failed_str = getFailedBotsStr(failed_bots) 523 print failed_str 524 525 previous_results = self.getPreviousResults() 526 noteworthy_events = getNoteworthyEvents(offline_bots, failed_bots, 527 previous_results) 528 529 noteworthy_str = getNoteworthyStr(noteworthy_events) 530 print noteworthy_str 531 532 summary = getSummary(offline_bots, failed_bots) 533 self.writeResults(summary) 534 535 if (self.emailer is not None and self.shouldEmail(noteworthy_events)): 536 self.emailer.send_email(Emailer.format_email_body(time_str, offline_str, 537 failed_str, noteworthy_str)) 538 except Exception as e: 539 error_str = 'Error: %s' % str(e) 540 print error_str 541 542 if self.send_email_on_error: 543 self.emailer.send_email(error_str) 544 545 def parseArgs(sys_args): 546 parser = argparse.ArgumentParser(prog=sys_args[0], 547 description='Query the Chromium GPU Bots Waterfall, output ' + 548 'potential problems, and optionally repeat automatically and/or ' + 549 'email notifications of results.') 550 551 parser.add_argument('--repeat-delay', type=int, dest='repeat_delay', 552 required=False, 553 help='How often to automatically re-run the script, in minutes.') 554 555 parser.add_argument('--email-from', type=str, dest='email_from', 556 required=False, 557 help='Email address to send from. Requires also specifying ' + 558 '\'--email-to\'.') 559 560 parser.add_argument('--email-to', type=str, dest='email_to', required=False, 561 nargs='+', 562 help='Email address(es) to send to. Requires also specifying ' + 563 '\'--email-from\'') 564 565 parser.add_argument('--send-email-for-recovered-offline-bots', 566 dest='send_email_for_recovered_offline_bots', action='store_true', 567 default=False, 568 help='Send an email out when a bot which has been offline for more ' + 569 'than 1 hour goes back online.') 570 571 parser.add_argument('--send-email-for-recovered-failing-bots', 572 dest='send_email_for_recovered_failing_bots', 573 action='store_true', default=False, 574 help='Send an email when a failing bot recovers.') 575 576 parser.add_argument('--send-email-on-error', 577 dest='send_email_on_error', 578 action='store_true', default=False, 579 help='Send an email when the script has an error. For example, if ' + 580 'the server is unreachable.') 581 582 parser.add_argument('--email-password-file', 583 dest='email_password_file', 584 required=False, 585 help=(('File containing the plaintext password of the source email ' + 586 'account. By default, \'%s\' will be tried. If it does not exist, ' + 587 'you will be prompted. If you opt to store your password on disk ' + 588 'in plaintext, use of a dummy account is strongly recommended.') 589 % Emailer.DEFAULT_EMAIL_PASSWORD_FILE)) 590 591 parser.add_argument('--previous-results-file', 592 dest='previous_results_file', 593 required=False, 594 help=(('File to store the results of the previous invocation of ' + 595 'this script. By default, \'%s\' will be used.') 596 % GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)) 597 598 args = parser.parse_args(sys_args[1:]) 599 600 if args.email_from is not None and args.email_to is None: 601 parser.error('--email-from requires --email-to.') 602 elif args.email_to is not None and args.email_from is None: 603 parser.error('--email-to requires --email-from.') 604 elif args.email_from is None and args.send_email_for_recovered_offline_bots: 605 parser.error('--send-email-for-recovered-offline-bots requires ' + 606 '--email-to and --email-from.') 607 elif (args.email_from is None and args.send_email_for_recovered_failing_bots): 608 parser.error('--send-email-for-recovered-failing-bots ' + 609 'requires --email-to and --email-from.') 610 elif (args.email_from is None and args.send_email_on_error): 611 parser.error('--send-email-on-error ' + 612 'requires --email-to and --email-from.') 613 elif (args.email_password_file and 614 not os.path.isfile(args.email_password_file)): 615 parser.error('File does not exist: %s' % args.email_password_file) 616 617 return args 618 619 def main(sys_args): 620 args = parseArgs(sys_args) 621 622 emailer = None 623 if args.email_from is not None and args.email_to is not None: 624 emailer = Emailer(args.email_from, args.email_to, args.email_password_file) 625 626 try: 627 emailer.testEmailLogin() 628 except Exception as e: 629 print 'Error logging into email account: %s' % str(e) 630 return 1 631 632 poller = GpuBotPoller(emailer, 633 args.send_email_for_recovered_offline_bots, 634 args.send_email_for_recovered_failing_bots, 635 args.send_email_on_error, 636 args.previous_results_file) 637 638 while True: 639 poller.checkBots() 640 641 if args.repeat_delay is None: 642 break 643 644 print 'Will run again in %d minutes...\n' % args.repeat_delay 645 time.sleep(args.repeat_delay * 60) 646 647 return 0 648 649 if __name__ == '__main__': 650 sys.exit(main(sys.argv)) 651