Home | History | Annotate | Download | only in bug_hunter
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """This script queries the Chromium issue tracker and e-mails the results.
      7 
      8 It queries issue tracker using Issue Tracker API. The query
      9 parameters can be specified by command-line arguments. For example, with the
     10 following command:
     11 
     12   'python bug_hunter.py -q video Status:Unconfirmed OR audio Status:Unconfirmed
     13    -s sender (at] chromium.org -r receiver (at] chromium.org -v 100 -u days'
     14 
     15 You will find all 'Unconfirmed' issues created in the last 100 days containing
     16 'video' or 'audio' in their content/comments. The content of these issues are
     17 sent to receiver (at] chromium.org.
     18 
     19 TODO(imasaki): users can specify the interval as say: "100d" for "100 days".
     20 
     21 There are two limitations in the current implementation of issue tracker API
     22 and UI:
     23 * only outermost OR is valid. For example, the query
     24   'video OR audio Status:Unconfirmed' is translated into
     25   'video OR (audio AND Status:Unconfirmed)'
     26 * brackets are not supported. For example, the query
     27   '(video OR audio) Status:Unconfirmed' does not work.
     28 
     29 You need to install following to run this script
     30   gdata-python-client (http://code.google.com/p/gdata-python-client/)
     31   rfc3339.py (http://henry.precheur.org/projects/rfc3339)
     32 
     33 Links:
     34 * Chromium issue tracker: http://code.google.com/p/chromium/issues/list
     35 * Issue tracker API: http://code.google.com/p/support/wiki/IssueTrackerAPI
     36 * Search tips for the issue tracker:
     37     http://code.google.com/p/chromium/issues/searchtips
     38 """
     39 
     40 import csv
     41 import datetime
     42 from email.mime.multipart import MIMEMultipart
     43 from email.mime.text import MIMEText
     44 import logging
     45 from operator import itemgetter
     46 import optparse
     47 import re
     48 import smtplib
     49 import socket
     50 import sys
     51 import urllib
     52 
     53 try:
     54   import gdata.data
     55   import gdata.projecthosting.client
     56 except ImportError:
     57   logging.error('gdata-client needs to be installed. Please install\n'
     58                 'and try again (http://code.google.com/p/gdata-python-client/)')
     59   sys.exit(1)
     60 
     61 try:
     62   import rfc3339
     63 except ImportError:
     64   logging.error('rfc3339 needs to be installed. Please install\n'
     65                 'and try again (http://henry.precheur.org/projects/rfc3339)')
     66   sys.exit(1)
     67 
     68 # A list of default values.
     69 _DEFAULT_INTERVAL_UNIT = 'hours'
     70 _DEFAULT_ISSUE_ELEMENT_IN_EMAIL = ('author', 'status', 'state', 'content',
     71                                    'comments', 'labels', 'urls')
     72 _DEFAULT_PROJECT_NAME = 'chromium'
     73 _DEFAULT_QUERY_TITLE = 'potential media bugs'
     74 _DEFAULT_QUERY = ('video -has:Feature -has:Owner -label:nomedia '
     75                   'status:Unconfirmed OR audio -has:Feature -has:Owner '
     76                   '-label:nomedia status:Unconfirmed')
     77 _DEFAULT_OUTPUT_FILENAME = 'output.csv'
     78 _DETAULT_MAX_COMMENTS = 1000
     79 
     80 _INTERVAL_UNIT_CHOICES = ('hours', 'days', 'weeks')
     81 
     82 # URLs in this list are excluded from URL extraction from bug
     83 # content/comments. Each list element should not contain the url ending in
     84 # '/'. For example, the element should be 'http://www.google.com' but not
     85 # 'http://www.google.com/'
     86 _URL_EXCLUSION_LIST = ('http://www.youtube.com/html5',
     87                        'http://www.google.com')
     88 _ISSUE_ELEMENT_IN_EMAIL_CHOICES = ('issue_id', 'author', 'status', 'state',
     89                                    'content', 'comments', 'labels', 'urls',
     90                                    'mstone')
     91 
     92 
     93 def ParseArgs():
     94   """Returns options dictionary from parsed command line arguments."""
     95   parser = optparse.OptionParser()
     96 
     97   parser.add_option('-e', '--email-entries',
     98                     help=('A comma-separated list of issue entries that are '
     99                           'sent in the email content. '
    100                           'Possible strings are %s. Default: %%default.' %
    101                           ', '.join(_ISSUE_ELEMENT_IN_EMAIL_CHOICES)),
    102                     default=','.join(_DEFAULT_ISSUE_ELEMENT_IN_EMAIL))
    103   parser.add_option('-l', '--max-comments',
    104                     help=('The maximum number of comments returned for each '
    105                           'issue in a reverse chronological order. '
    106                           'Default: %default.'),
    107                     type='int', default=_DETAULT_MAX_COMMENTS)
    108   parser.add_option('-o', '--output-filename',
    109                     help=('Filename for result output in CSV format. '
    110                           'Default: %default.'),
    111                     default=_DEFAULT_OUTPUT_FILENAME, metavar='FILE')
    112   parser.add_option('-p', '--project-name', default=_DEFAULT_PROJECT_NAME,
    113                     help='Project name string. Default: %default')
    114   parser.add_option('-q', '--query', default=_DEFAULT_QUERY,
    115                     help=('Query to be used to find bugs. The detail can be '
    116                           'found in Chromium Issue tracker page '
    117                           'http://code.google.com/p/chromium/issues/searchtips.'
    118                           ' Default: "%default".'))
    119   parser.add_option('-r', '--receiver-email-address',
    120                     help="Receiver's email address (Required).")
    121   parser.add_option('-s', '--sender-email-address',
    122                     help="Sender's email address (Required).")
    123   parser.add_option('-t', '--query-title',
    124                     default=_DEFAULT_QUERY_TITLE, dest='query_title',
    125                     help=('Query title string used in the subject of the '
    126                           'result email. Default: %default.'))
    127   parser.add_option('-u', '--interval_unit', default=_DEFAULT_INTERVAL_UNIT,
    128                     choices=_INTERVAL_UNIT_CHOICES,
    129                     help=('Unit name for |interval_value|. Valid options are '
    130                           '%s. Default: %%default' % (
    131                               ', '.join(_INTERVAL_UNIT_CHOICES))))
    132   parser.add_option('-v', '--interval-value', type='int',
    133                     help=('Interval value to find bugs. '
    134                           'The script looks for bugs during '
    135                           'that interval (up to now). This option is used in '
    136                           'conjunction with |--interval_unit| option. '
    137                           'The script looks for all bugs if this is not '
    138                           'specified.'))
    139 
    140   options = parser.parse_args()[0]
    141 
    142   options.email_entries = options.email_entries.split(',')
    143   options.email_entries = [entry for entry in options.email_entries
    144                            if entry in _ISSUE_ELEMENT_IN_EMAIL_CHOICES]
    145   if not options.email_entries:
    146     logging.warning('No issue elements in email in option. '
    147                     'Default email entries will be used.')
    148     options.email_entries = _DEFAULT_ISSUE_ELEMENT_IN_EMAIL
    149   logging.info('The following is the issue elements in email: %s ' + (
    150       ', '.join(options.email_entries)))
    151   return options
    152 
    153 
    154 class BugHunter(object):
    155   """This class queries issue trackers and e-mails the results."""
    156 
    157   _ISSUE_SEARCH_LINK_BASE = ('http://code.google.com/p/chromium/issues/list?'
    158                              'can=2&colspec=ID+Pri+Mstone+ReleaseBlock+Area'
    159                              '+Feature+Status+Owner+Summary&cells=tiles'
    160                              '&sort=-id')
    161   # TODO(imasaki): Convert these into template library.
    162   _EMAIL_ISSUE_TEMPLATE = ('<li><a href="http://crbug.com/%(issue_id)s">'
    163                            '%(issue_id)s %(title)s</a> ')
    164   _EMAIL_SUBJECT_TEMPLATE = ('BugHunter found %(n_issues)d %(query_title)s '
    165                              'bug%(plural)s%(time_msg)s!')
    166   _EMAIL_MSG_TEMPLATE = ('<a href="%(link_base)s&q=%(unquote_query_text)s">'
    167                          'Used Query</a>: %(query_text)s<br><br>'
    168                          'The number of issues : %(n_issues)d<br>'
    169                          '<ul>%(issues)s</ul>')
    170 
    171   def __init__(self, options):
    172     """Sets up initial state for Bug Hunter.
    173 
    174     Args:
    175       options: Command-line options.
    176     """
    177     self._client = gdata.projecthosting.client.ProjectHostingClient()
    178     self._options = options
    179     self._issue_template = BugHunter._EMAIL_ISSUE_TEMPLATE
    180     for entry in options.email_entries:
    181       self._issue_template += '%%(%s)s ' % entry
    182     self._issue_template += '</li>'
    183 
    184   def GetComments(self, issue_id, max_comments):
    185     """Get comments for a issue.
    186 
    187     Args:
    188       issue_id: Issue id for each issue in the issue tracker.
    189       max_comments: The maximum number of comments to be returned. The comments
    190         are returned in a reverse chronological order.
    191 
    192     Returns:
    193       A list of (author name, comments, updated time) tuples.
    194     """
    195     comments_feed = self._client.get_comments(self._options.project_name,
    196                                               issue_id)
    197     comment_list = [(comment.content.text, comment.author[0].name.text,
    198                      comment.updated.text)
    199                     for comment
    200                     in list(reversed(comments_feed.entry))[0:max_comments]]
    201     return comment_list
    202 
    203   def GetIssues(self):
    204     """Get issues from issue tracker and return them.
    205 
    206     Returns:
    207       A list of issues in descending order by issue_id. Each element in the
    208         list is a dictionary where the keys are 'issue_id', 'title', 'author',
    209         'status', 'state', 'content', 'comments', 'labels', 'urls'.
    210         Returns an empty list when there is no matching issue.
    211     """
    212     min_time = None
    213     if self._options.interval_value:
    214       # Issue Tracker Data API uses RFC 3339 timestamp format, For example:
    215       # 2005-08-09T10:57:00-08:00
    216       # (http://code.google.com/p/support/wiki/IssueTrackerAPIPython)
    217       delta = datetime.timedelta(
    218           **{self._options.interval_unit: self._options.interval_value})
    219       dt = datetime.datetime.now() - delta
    220       min_time = rfc3339.rfc3339(dt)
    221 
    222     query = gdata.projecthosting.client.Query(text_query=self._options.query,
    223                                               max_results=1000,
    224                                               published_min=min_time)
    225 
    226     feed = self._client.get_issues(self._options.project_name, query=query)
    227     if not feed.entry:
    228       logging.info('No issues available to match query %s.',
    229                    self._options.query)
    230       return []
    231     issues = []
    232     for entry in feed.entry:
    233       # The fully qualified id is a URL. We just want the number.
    234       issue_id = entry.id.text.split('/')[-1]
    235       if not issue_id.isdigit():
    236         logging.warning('Issue_id is not correct: %s. Skipping.', issue_id)
    237         continue
    238       label_list = [label.text for label in entry.label]
    239       comments = ''
    240       if 'comments' in self._options.email_entries:
    241         comments = ''.join(
    242             [''.join(comment) if not comment else ''
    243              for comment
    244              in self.GetComments(issue_id, self._options.max_comments)])
    245       content = BugHunterUtils.StripHTML(entry.content.text)
    246       url_list = list(
    247           set(re.findall(r'(https?://\S+)', content + comments)))
    248       url_list = [url for url in url_list
    249                   if not url.rstrip('/') in _URL_EXCLUSION_LIST]
    250       mstone = ''
    251       r = re.compile(r'Mstone-(\d*)')
    252       for label in label_list:
    253         m = r.search(label)
    254         if m:
    255           mstone = m.group(1)
    256       issues.append(
    257           {'issue_id': issue_id, 'title': entry.title.text,
    258            'author': entry.author[0].name.text,
    259            'status': entry.status.text if entry.status is not None else '',
    260            'state': entry.state.text if entry.state is not None else '',
    261            'content': content, 'mstone': mstone, 'comments': comments,
    262            'labels': label_list, 'urls': url_list})
    263     return sorted(issues, key=itemgetter('issue_id'), reverse=True)
    264 
    265   def _SetUpEmailSubjectMsg(self, issues):
    266     """Set up email subject and its content.
    267 
    268     Args:
    269       issues: Please refer to the return value in GetIssues().
    270 
    271     Returns:
    272       A tuple of two strings (email subject and email content).
    273     """
    274     time_msg = ''
    275     if self._options.interval_value:
    276       time_msg = ' in the past %s %s%s' % (
    277           self._options.interval_value, self._options.interval_unit[:-1],
    278           's' if self._options.interval_value > 1 else '')
    279     subject = BugHunter._EMAIL_SUBJECT_TEMPLATE % {
    280         'n_issues': len(issues),
    281         'query_title': self._options.query_title,
    282         'plural': 's' if len(issues) > 1 else '',
    283         'time_msg': time_msg}
    284     content = BugHunter._EMAIL_MSG_TEMPLATE % {
    285         'link_base': BugHunter._ISSUE_SEARCH_LINK_BASE,
    286         'unquote_query_text': urllib.quote(self._options.query),
    287         'query_text': self._options.query,
    288         'n_issues': len(issues),
    289         'issues': ''.join(
    290             [self._issue_template % issue for issue in issues])}
    291     return (subject, content)
    292 
    293   def SendResultEmail(self, issues):
    294     """Send result email.
    295 
    296     Args:
    297       issues: Please refer to the return value in GetIssues().
    298     """
    299     subject, content = self._SetUpEmailSubjectMsg(issues)
    300     BugHunterUtils.SendEmail(
    301         content, self._options.sender_email_address,
    302         self._options.receiver_email_address, subject)
    303 
    304   def WriteIssuesToFileInCSV(self, issues, filename):
    305     """Write issues to a file in CSV format.
    306 
    307     Args:
    308       issues: Please refer to the return value in GetIssues().
    309       filename: File name for CSV file.
    310     """
    311     with open(filename, 'w') as f:
    312       writer = csv.writer(f)
    313       # Write header first.
    314       writer.writerow(issues[0].keys())
    315       for issue in issues:
    316         writer.writerow(
    317             [unicode(value).encode('utf-8') for value in issue.values()])
    318 
    319 
    320 class BugHunterUtils(object):
    321   """Utility class for Bug Hunter."""
    322 
    323   @staticmethod
    324   def StripHTML(string_with_html):
    325     """Strip HTML tags from string.
    326 
    327     Args:
    328       string_with_html: A string with HTML tags.
    329 
    330     Returns:
    331       A string without HTML tags.
    332     """
    333     return re.sub('<[^<]+?>', '', string_with_html)
    334 
    335   @staticmethod
    336   def SendEmail(message, sender_email_address, receivers_email_address,
    337                 subject):
    338     """Send email using localhost's mail server.
    339 
    340     Args:
    341       message: Email message to be sent.
    342       sender_email_address: Sender's email address.
    343       receivers_email_address: Receiver's email address.
    344       subject: Email subject.
    345 
    346     Returns:
    347       True if successful; False, otherwise.
    348     """
    349     try:
    350       html = '<html><head></head><body>%s</body></html>' % message
    351       msg = MIMEMultipart('alternative')
    352       msg['Subject'] = subject
    353       msg['From'] = sender_email_address
    354       msg['To'] = receivers_email_address
    355       msg.attach(MIMEText(html.encode('utf-8'), 'html', _charset='utf-8'))
    356       smtp_obj = smtplib.SMTP('localhost')
    357       smtp_obj.sendmail(sender_email_address, receivers_email_address,
    358                         msg.as_string())
    359       logging.info('Successfully sent email.')
    360       smtp_obj.quit()
    361       return True
    362     except smtplib.SMTPException:
    363       logging.exception('Authentication failed, unable to send email.')
    364     except (socket.gaierror, socket.error, socket.herror):
    365       logging.exception('Unable to send email.')
    366     return False
    367 
    368 
    369 def Main():
    370   ops = ParseArgs()
    371   bh = BugHunter(ops)
    372   issues = bh.GetIssues()
    373   if issues and ops.sender_email_address and ops.receiver_email_address:
    374     bh.SendResultEmail(issues)
    375   if issues:
    376     bh.WriteIssuesToFileInCSV(issues, ops.output_filename)
    377 
    378 
    379 if __name__ == '__main__':
    380   Main()
    381