1 #!/usr/bin/env python 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """This script queries the Chromium issue tracker and e-mails the results. 7 8 It queries issue tracker using Issue Tracker API. The query 9 parameters can be specified by command-line arguments. For example, with the 10 following command: 11 12 'python bug_hunter.py -q video Status:Unconfirmed OR audio Status:Unconfirmed 13 -s sender (at] chromium.org -r receiver (at] chromium.org -v 100 -u days' 14 15 You will find all 'Unconfirmed' issues created in the last 100 days containing 16 'video' or 'audio' in their content/comments. The content of these issues are 17 sent to receiver (at] chromium.org. 18 19 TODO(imasaki): users can specify the interval as say: "100d" for "100 days". 20 21 There are two limitations in the current implementation of issue tracker API 22 and UI: 23 * only outermost OR is valid. For example, the query 24 'video OR audio Status:Unconfirmed' is translated into 25 'video OR (audio AND Status:Unconfirmed)' 26 * brackets are not supported. For example, the query 27 '(video OR audio) Status:Unconfirmed' does not work. 28 29 You need to install following to run this script 30 gdata-python-client (http://code.google.com/p/gdata-python-client/) 31 rfc3339.py (http://henry.precheur.org/projects/rfc3339) 32 33 Links: 34 * Chromium issue tracker: http://code.google.com/p/chromium/issues/list 35 * Issue tracker API: http://code.google.com/p/support/wiki/IssueTrackerAPI 36 * Search tips for the issue tracker: 37 http://code.google.com/p/chromium/issues/searchtips 38 """ 39 40 import csv 41 import datetime 42 from email.mime.multipart import MIMEMultipart 43 from email.mime.text import MIMEText 44 import logging 45 from operator import itemgetter 46 import optparse 47 import re 48 import smtplib 49 import socket 50 import sys 51 import urllib 52 53 try: 54 import gdata.data 55 import gdata.projecthosting.client 56 except ImportError: 57 logging.error('gdata-client needs to be installed. Please install\n' 58 'and try again (http://code.google.com/p/gdata-python-client/)') 59 sys.exit(1) 60 61 try: 62 import rfc3339 63 except ImportError: 64 logging.error('rfc3339 needs to be installed. Please install\n' 65 'and try again (http://henry.precheur.org/projects/rfc3339)') 66 sys.exit(1) 67 68 # A list of default values. 69 _DEFAULT_INTERVAL_UNIT = 'hours' 70 _DEFAULT_ISSUE_ELEMENT_IN_EMAIL = ('author', 'status', 'state', 'content', 71 'comments', 'labels', 'urls') 72 _DEFAULT_PROJECT_NAME = 'chromium' 73 _DEFAULT_QUERY_TITLE = 'potential media bugs' 74 _DEFAULT_QUERY = ('video -has:Feature -has:Owner -label:nomedia ' 75 'status:Unconfirmed OR audio -has:Feature -has:Owner ' 76 '-label:nomedia status:Unconfirmed') 77 _DEFAULT_OUTPUT_FILENAME = 'output.csv' 78 _DETAULT_MAX_COMMENTS = 1000 79 80 _INTERVAL_UNIT_CHOICES = ('hours', 'days', 'weeks') 81 82 # URLs in this list are excluded from URL extraction from bug 83 # content/comments. Each list element should not contain the url ending in 84 # '/'. For example, the element should be 'http://www.google.com' but not 85 # 'http://www.google.com/' 86 _URL_EXCLUSION_LIST = ('http://www.youtube.com/html5', 87 'http://www.google.com') 88 _ISSUE_ELEMENT_IN_EMAIL_CHOICES = ('issue_id', 'author', 'status', 'state', 89 'content', 'comments', 'labels', 'urls', 90 'mstone') 91 92 93 def ParseArgs(): 94 """Returns options dictionary from parsed command line arguments.""" 95 parser = optparse.OptionParser() 96 97 parser.add_option('-e', '--email-entries', 98 help=('A comma-separated list of issue entries that are ' 99 'sent in the email content. ' 100 'Possible strings are %s. Default: %%default.' % 101 ', '.join(_ISSUE_ELEMENT_IN_EMAIL_CHOICES)), 102 default=','.join(_DEFAULT_ISSUE_ELEMENT_IN_EMAIL)) 103 parser.add_option('-l', '--max-comments', 104 help=('The maximum number of comments returned for each ' 105 'issue in a reverse chronological order. ' 106 'Default: %default.'), 107 type='int', default=_DETAULT_MAX_COMMENTS) 108 parser.add_option('-o', '--output-filename', 109 help=('Filename for result output in CSV format. ' 110 'Default: %default.'), 111 default=_DEFAULT_OUTPUT_FILENAME, metavar='FILE') 112 parser.add_option('-p', '--project-name', default=_DEFAULT_PROJECT_NAME, 113 help='Project name string. Default: %default') 114 parser.add_option('-q', '--query', default=_DEFAULT_QUERY, 115 help=('Query to be used to find bugs. The detail can be ' 116 'found in Chromium Issue tracker page ' 117 'http://code.google.com/p/chromium/issues/searchtips.' 118 ' Default: "%default".')) 119 parser.add_option('-r', '--receiver-email-address', 120 help="Receiver's email address (Required).") 121 parser.add_option('-s', '--sender-email-address', 122 help="Sender's email address (Required).") 123 parser.add_option('-t', '--query-title', 124 default=_DEFAULT_QUERY_TITLE, dest='query_title', 125 help=('Query title string used in the subject of the ' 126 'result email. Default: %default.')) 127 parser.add_option('-u', '--interval_unit', default=_DEFAULT_INTERVAL_UNIT, 128 choices=_INTERVAL_UNIT_CHOICES, 129 help=('Unit name for |interval_value|. Valid options are ' 130 '%s. Default: %%default' % ( 131 ', '.join(_INTERVAL_UNIT_CHOICES)))) 132 parser.add_option('-v', '--interval-value', type='int', 133 help=('Interval value to find bugs. ' 134 'The script looks for bugs during ' 135 'that interval (up to now). This option is used in ' 136 'conjunction with |--interval_unit| option. ' 137 'The script looks for all bugs if this is not ' 138 'specified.')) 139 140 options = parser.parse_args()[0] 141 142 options.email_entries = options.email_entries.split(',') 143 options.email_entries = [entry for entry in options.email_entries 144 if entry in _ISSUE_ELEMENT_IN_EMAIL_CHOICES] 145 if not options.email_entries: 146 logging.warning('No issue elements in email in option. ' 147 'Default email entries will be used.') 148 options.email_entries = _DEFAULT_ISSUE_ELEMENT_IN_EMAIL 149 logging.info('The following is the issue elements in email: %s ' + ( 150 ', '.join(options.email_entries))) 151 return options 152 153 154 class BugHunter(object): 155 """This class queries issue trackers and e-mails the results.""" 156 157 _ISSUE_SEARCH_LINK_BASE = ('http://code.google.com/p/chromium/issues/list?' 158 'can=2&colspec=ID+Pri+Mstone+ReleaseBlock+Area' 159 '+Feature+Status+Owner+Summary&cells=tiles' 160 '&sort=-id') 161 # TODO(imasaki): Convert these into template library. 162 _EMAIL_ISSUE_TEMPLATE = ('<li><a href="http://crbug.com/%(issue_id)s">' 163 '%(issue_id)s %(title)s</a> ') 164 _EMAIL_SUBJECT_TEMPLATE = ('BugHunter found %(n_issues)d %(query_title)s ' 165 'bug%(plural)s%(time_msg)s!') 166 _EMAIL_MSG_TEMPLATE = ('<a href="%(link_base)s&q=%(unquote_query_text)s">' 167 'Used Query</a>: %(query_text)s<br><br>' 168 'The number of issues : %(n_issues)d<br>' 169 '<ul>%(issues)s</ul>') 170 171 def __init__(self, options): 172 """Sets up initial state for Bug Hunter. 173 174 Args: 175 options: Command-line options. 176 """ 177 self._client = gdata.projecthosting.client.ProjectHostingClient() 178 self._options = options 179 self._issue_template = BugHunter._EMAIL_ISSUE_TEMPLATE 180 for entry in options.email_entries: 181 self._issue_template += '%%(%s)s ' % entry 182 self._issue_template += '</li>' 183 184 def GetComments(self, issue_id, max_comments): 185 """Get comments for a issue. 186 187 Args: 188 issue_id: Issue id for each issue in the issue tracker. 189 max_comments: The maximum number of comments to be returned. The comments 190 are returned in a reverse chronological order. 191 192 Returns: 193 A list of (author name, comments, updated time) tuples. 194 """ 195 comments_feed = self._client.get_comments(self._options.project_name, 196 issue_id) 197 comment_list = [(comment.content.text, comment.author[0].name.text, 198 comment.updated.text) 199 for comment 200 in list(reversed(comments_feed.entry))[0:max_comments]] 201 return comment_list 202 203 def GetIssues(self): 204 """Get issues from issue tracker and return them. 205 206 Returns: 207 A list of issues in descending order by issue_id. Each element in the 208 list is a dictionary where the keys are 'issue_id', 'title', 'author', 209 'status', 'state', 'content', 'comments', 'labels', 'urls'. 210 Returns an empty list when there is no matching issue. 211 """ 212 min_time = None 213 if self._options.interval_value: 214 # Issue Tracker Data API uses RFC 3339 timestamp format, For example: 215 # 2005-08-09T10:57:00-08:00 216 # (http://code.google.com/p/support/wiki/IssueTrackerAPIPython) 217 delta = datetime.timedelta( 218 **{self._options.interval_unit: self._options.interval_value}) 219 dt = datetime.datetime.now() - delta 220 min_time = rfc3339.rfc3339(dt) 221 222 query = gdata.projecthosting.client.Query(text_query=self._options.query, 223 max_results=1000, 224 published_min=min_time) 225 226 feed = self._client.get_issues(self._options.project_name, query=query) 227 if not feed.entry: 228 logging.info('No issues available to match query %s.', 229 self._options.query) 230 return [] 231 issues = [] 232 for entry in feed.entry: 233 # The fully qualified id is a URL. We just want the number. 234 issue_id = entry.id.text.split('/')[-1] 235 if not issue_id.isdigit(): 236 logging.warning('Issue_id is not correct: %s. Skipping.', issue_id) 237 continue 238 label_list = [label.text for label in entry.label] 239 comments = '' 240 if 'comments' in self._options.email_entries: 241 comments = ''.join( 242 [''.join(comment) if not comment else '' 243 for comment 244 in self.GetComments(issue_id, self._options.max_comments)]) 245 content = BugHunterUtils.StripHTML(entry.content.text) 246 url_list = list( 247 set(re.findall(r'(https?://\S+)', content + comments))) 248 url_list = [url for url in url_list 249 if not url.rstrip('/') in _URL_EXCLUSION_LIST] 250 mstone = '' 251 r = re.compile(r'Mstone-(\d*)') 252 for label in label_list: 253 m = r.search(label) 254 if m: 255 mstone = m.group(1) 256 issues.append( 257 {'issue_id': issue_id, 'title': entry.title.text, 258 'author': entry.author[0].name.text, 259 'status': entry.status.text if entry.status is not None else '', 260 'state': entry.state.text if entry.state is not None else '', 261 'content': content, 'mstone': mstone, 'comments': comments, 262 'labels': label_list, 'urls': url_list}) 263 return sorted(issues, key=itemgetter('issue_id'), reverse=True) 264 265 def _SetUpEmailSubjectMsg(self, issues): 266 """Set up email subject and its content. 267 268 Args: 269 issues: Please refer to the return value in GetIssues(). 270 271 Returns: 272 A tuple of two strings (email subject and email content). 273 """ 274 time_msg = '' 275 if self._options.interval_value: 276 time_msg = ' in the past %s %s%s' % ( 277 self._options.interval_value, self._options.interval_unit[:-1], 278 's' if self._options.interval_value > 1 else '') 279 subject = BugHunter._EMAIL_SUBJECT_TEMPLATE % { 280 'n_issues': len(issues), 281 'query_title': self._options.query_title, 282 'plural': 's' if len(issues) > 1 else '', 283 'time_msg': time_msg} 284 content = BugHunter._EMAIL_MSG_TEMPLATE % { 285 'link_base': BugHunter._ISSUE_SEARCH_LINK_BASE, 286 'unquote_query_text': urllib.quote(self._options.query), 287 'query_text': self._options.query, 288 'n_issues': len(issues), 289 'issues': ''.join( 290 [self._issue_template % issue for issue in issues])} 291 return (subject, content) 292 293 def SendResultEmail(self, issues): 294 """Send result email. 295 296 Args: 297 issues: Please refer to the return value in GetIssues(). 298 """ 299 subject, content = self._SetUpEmailSubjectMsg(issues) 300 BugHunterUtils.SendEmail( 301 content, self._options.sender_email_address, 302 self._options.receiver_email_address, subject) 303 304 def WriteIssuesToFileInCSV(self, issues, filename): 305 """Write issues to a file in CSV format. 306 307 Args: 308 issues: Please refer to the return value in GetIssues(). 309 filename: File name for CSV file. 310 """ 311 with open(filename, 'w') as f: 312 writer = csv.writer(f) 313 # Write header first. 314 writer.writerow(issues[0].keys()) 315 for issue in issues: 316 writer.writerow( 317 [unicode(value).encode('utf-8') for value in issue.values()]) 318 319 320 class BugHunterUtils(object): 321 """Utility class for Bug Hunter.""" 322 323 @staticmethod 324 def StripHTML(string_with_html): 325 """Strip HTML tags from string. 326 327 Args: 328 string_with_html: A string with HTML tags. 329 330 Returns: 331 A string without HTML tags. 332 """ 333 return re.sub('<[^<]+?>', '', string_with_html) 334 335 @staticmethod 336 def SendEmail(message, sender_email_address, receivers_email_address, 337 subject): 338 """Send email using localhost's mail server. 339 340 Args: 341 message: Email message to be sent. 342 sender_email_address: Sender's email address. 343 receivers_email_address: Receiver's email address. 344 subject: Email subject. 345 346 Returns: 347 True if successful; False, otherwise. 348 """ 349 try: 350 html = '<html><head></head><body>%s</body></html>' % message 351 msg = MIMEMultipart('alternative') 352 msg['Subject'] = subject 353 msg['From'] = sender_email_address 354 msg['To'] = receivers_email_address 355 msg.attach(MIMEText(html.encode('utf-8'), 'html', _charset='utf-8')) 356 smtp_obj = smtplib.SMTP('localhost') 357 smtp_obj.sendmail(sender_email_address, receivers_email_address, 358 msg.as_string()) 359 logging.info('Successfully sent email.') 360 smtp_obj.quit() 361 return True 362 except smtplib.SMTPException: 363 logging.exception('Authentication failed, unable to send email.') 364 except (socket.gaierror, socket.error, socket.herror): 365 logging.exception('Unable to send email.') 366 return False 367 368 369 def Main(): 370 ops = ParseArgs() 371 bh = BugHunter(ops) 372 issues = bh.GetIssues() 373 if issues and ops.sender_email_address and ops.receiver_email_address: 374 bh.SendResultEmail(issues) 375 if issues: 376 bh.WriteIssuesToFileInCSV(issues, ops.output_filename) 377 378 379 if __name__ == '__main__': 380 Main() 381