Home | History | Annotate | Download | only in dashboard
      1 # Copyright 2015 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """URL endpoints to show and gather stats on performance and alerts.
      6 
      7 StatsHandler is the main entry point, and provides the interface for requesting
      8 statistics to be generated and viewing generated statistics.
      9 
     10 This module also contains other handlers for gathering statistics Test by Test,
     11 since querying all Tests at once puts us over the 60s timeout.
     12 """
     13 
     14 import collections
     15 import datetime
     16 import json
     17 import math
     18 
     19 from google.appengine.api import taskqueue
     20 from google.appengine.ext import ndb
     21 
     22 from dashboard import datastore_hooks
     23 from dashboard import math_utils
     24 from dashboard import request_handler
     25 from dashboard import utils
     26 from dashboard import xsrf
     27 from dashboard.models import anomaly
     28 from dashboard.models import graph_data
     29 from dashboard.models import sheriff
     30 
     31 # Buckets to split alerts into based on relative change size.
     32 _PERCENT_CHANGED_BUCKETS = [1, 2, 5, 10, 20]
     33 
     34 # Task queue name, should be present in queue.yaml.
     35 _QUEUE_NAME = 'stats-queue'
     36 
     37 
     38 class StatContainer(ndb.Model):
     39   """Represents a set of statistics that is displayed together."""
     40   # Type of statistics, e.g. 'around_revision' or 'alerts_summary'.
     41   stat_type = ndb.StringProperty()
     42   # A dictionary of information about the set of statistics overall.
     43   summary = ndb.JsonProperty()
     44   # Number of individual items in this set of statistics.
     45   num_stats = ndb.IntegerProperty()
     46   # Time that this entity was created.
     47   timestamp = ndb.DateTimeProperty(auto_now_add=True, indexed=True)
     48 
     49 
     50 class IndividualStat(ndb.Model):
     51   """Represents one item within a set of statistics."""
     52   # A dictionary which could contain different things for different types of
     53   # statistics; could contain information about one Test or one day.
     54   details = ndb.JsonProperty()
     55 
     56 
     57 class StatsHandler(request_handler.RequestHandler):
     58   """URL endpoint to request/view stats."""
     59 
     60   def get(self):
     61     """Shows a set of statistics, or a form for producing stats."""
     62     if not utils.IsInternalUser():
     63       self.RenderHtml('result.html', {
     64           'errors': ['Only logged-in internal users can access stats.']
     65       })
     66       return
     67 
     68     key = self.request.get('key')
     69     if key:
     70       self._DisplayResults(key)
     71     else:
     72       self._DisplayForm()
     73 
     74   def _DisplayResults(self, key):
     75     """Displays a set of previously-generated statistics."""
     76     container = ndb.Key(urlsafe=key).get()
     77     stats = IndividualStat.query(ancestor=container.key).fetch()
     78     total = container.num_stats
     79     processed = len(stats)
     80     title = self._GetStatTitle(container)
     81 
     82     if processed < total:
     83       have_stats = False
     84       stat_type = None
     85       processed_stats = None
     86     else:
     87       have_stats = True
     88       stat_type = container.stat_type
     89       if stat_type == 'around_revision':
     90         processed_stats = self._GetAroundRevisionStats(container, stats)
     91       elif stat_type == 'alert_summary':
     92         processed_stats = self._GetAlertSummaryStats(container, stats)
     93 
     94     self.RenderHtml('stats.html', {
     95         'title': title,
     96         'waiting': not have_stats,
     97         'have_stats': have_stats,
     98         'type': stat_type,
     99         'stats': processed_stats,
    100         'processed': processed,
    101         'total': total,
    102     })
    103 
    104   def _DisplayForm(self):
    105     """Displays a form for requesting a set of statistics."""
    106     master = ndb.Key('Master', 'ChromiumPerf')
    107     bots = graph_data.Bot.query(ancestor=master).fetch(keys_only=True)
    108     bots = [b.string_id() for b in bots]
    109     sheriffs = sheriff.Sheriff.query().fetch(keys_only=True)
    110     sheriffs = [s.string_id() for s in sheriffs]
    111     recent_stats = StatContainer.query().order(
    112         -StatContainer.timestamp).fetch(limit=20)
    113     recent = []
    114     for r in recent_stats:
    115       recent.append({
    116           'key': r.key.urlsafe(),
    117           'title': self._GetStatTitle(r),
    118       })
    119 
    120     self.RenderHtml('stats.html', {
    121         'recent': recent,
    122         'bots': bots,
    123         'sheriffs': sheriffs,
    124     })
    125 
    126   def _GetStatTitle(self, container):
    127     """Returns a title string for the given stat container."""
    128     title_prefix = ''
    129     if container.summary.get('name'):
    130       title_prefix = '%s: ' % container.summary.get('name')
    131 
    132     if container.stat_type == 'around_revision':
    133       revision = container.summary.get('revision')
    134       num_around = container.summary.get('num_around')
    135       return ('%sChanges around revision %s (%s points each direction)' %
    136               (title_prefix, revision, num_around))
    137 
    138     if container.stat_type == 'alert_summary':
    139       start = container.summary.get('start_date')
    140       end = container.summary.get('end_date')
    141       return '%s: %s-%s' % (title_prefix, start, end)
    142 
    143   def _GetAroundRevisionStats(self, container, stats):
    144     """Fetches the template variables needed to display around-revision stats.
    145 
    146     Args:
    147       container: A StatContainer entity.
    148       stats: A list of IndividualStat entities.
    149 
    150     Returns:
    151       A dictionary.
    152     """
    153     data = {
    154         'revision': int(container.summary['revision']),
    155         'num_around': int(container.summary['num_around']),
    156         'tests': [],
    157     }
    158     for stat in stats:
    159       data['tests'].append(stat.details)
    160     return data
    161 
    162   def _GetAlertSummaryStats(self, container, stats):
    163     """Gets all the template variables needed to display alert summary stats.
    164 
    165     Args:
    166       container: A StatContainer entity.
    167       stats: A list of IndividualStat entities.
    168 
    169     Returns:
    170       A dictionary.
    171     """
    172     def IndividualStatTimeInt(individual_stat):
    173       date = individual_stat.details['date']
    174       return (int(date.split('-')[0]) * 10000 +
    175               int(date.split('-')[1]) * 100 +
    176               int(date.split('-')[2]))
    177 
    178     stats.sort(key=IndividualStatTimeInt)
    179     details = [s.details for s in stats]
    180     categories = [
    181         'bots',
    182         'test_suites',
    183         'traces',
    184         'bug_ids',
    185         'percent_changed_buckets',
    186     ]
    187     axis_map = {i: d['date'] for i, d in enumerate(details)}
    188     overall_summaries = {}
    189     daily_summaries = {}
    190     for category in categories:
    191       key_names = set()
    192       for d in details:
    193         key_names |= set(d.get(category, {}))
    194       overall_summaries[category] = []
    195       daily_summaries[category] = []
    196       for key_name in key_names:
    197         pie_dict = {
    198             'label': key_name,
    199             'data': sum(d.get(category, {}).get(key_name, 0) for d in details)
    200         }
    201         overall_summaries[category].append(pie_dict)
    202         daily_dict = {'label': key_name, 'data': []}
    203         for i, d in enumerate(details):
    204           yval = d.get(category, {}).get(key_name, 0)
    205           daily_dict['data'].append([i, yval])
    206         daily_summaries[category].append(daily_dict)
    207       # Sort by percent.
    208       if category != 'percent_changed_buckets':
    209         overall_summaries[category].sort(key=lambda d: d['data'])
    210 
    211     data = {
    212         'start_date': container.summary['start_date'],
    213         'end_date': container.summary['end_date'],
    214         'alert_summaries': [s.details for s in stats],
    215         'axis_map': json.dumps(axis_map),
    216         'overall_summaries': json.dumps(overall_summaries),
    217         'daily_summaries': json.dumps(daily_summaries),
    218     }
    219     return data
    220 
    221   @xsrf.TokenRequired
    222   def post(self):
    223     """Kicks off a task on the task queue to generate the requested stats."""
    224     if not utils.IsInternalUser():
    225       self.RenderHtml('result.html', {
    226           'errors': ['Only logged-in internal users can access stats.']
    227       })
    228       return
    229 
    230     datastore_hooks.SetPrivilegedRequest()
    231     stat_type = self.request.get('type')
    232     stat_container = StatContainer(stat_type=stat_type)
    233 
    234     if stat_type == 'around_revision':
    235       self._StartGeneratingStatsAroundRevision(stat_container)
    236     elif stat_type == 'alert_summary':
    237       self._StartGeneratingStatsForAlerts(stat_container)
    238     self.redirect('/stats?key=%s' % stat_container.key.urlsafe())
    239 
    240   def _StartGeneratingStatsAroundRevision(self, stat_container):
    241     """Adds tasks for generating around_revision stats to the task queue.
    242 
    243     Note: Master and sheriff are hard-coded below. If we want to use this
    244     to generate stats about other masters or sheriffs, we should:
    245       1. Make master and sheriff specified by parameters.
    246       2. Add fields on the form to specify these parameters.
    247 
    248     Args:
    249       stat_container: A StatContainer entity to populate.
    250     """
    251     bots = self.request.get_all('bots')
    252     bots = ['ChromiumPerf/' + bot for bot in bots]
    253     sheriff_key = ndb.Key('Sheriff', 'Chromium Perf Sheriff')
    254     test_query = graph_data.Test.query(graph_data.Test.sheriff == sheriff_key)
    255     test_keys = test_query.fetch(keys_only=True)
    256     test_keys = [k for k in test_keys if '/'.join(
    257         utils.TestPath(k).split('/')[:2]) in bots]
    258     summary = {
    259         'revision': int(self.request.get('rev')),
    260         'num_around': int(self.request.get('num_around')),
    261         'name': self.request.get('name', None),
    262     }
    263     stat_container.summary = summary
    264     stat_container.num_stats = len(test_keys)
    265     stat_container.put()
    266     for test_key in test_keys:
    267       taskqueue.add(url='/stats_around_revision',
    268                     params={
    269                         'revision': summary['revision'],
    270                         'num_around': summary['num_around'],
    271                         'test_key': test_key.urlsafe(),
    272                         'parent_key': stat_container.key.urlsafe(),
    273                     },
    274                     queue_name=_QUEUE_NAME)
    275 
    276   def _StartGeneratingStatsForAlerts(self, stat_container):
    277     """Adds tasks for generating alert_summary stats to the task queue.
    278 
    279     Args:
    280       stat_container: A StatContainer entity to populate.
    281     """
    282     def DateParts(date_string):
    283       """Returns the year, month, day numbers in a yyyy-mm-dd string."""
    284       return map(int, date_string.split('-'))
    285     start_date = datetime.datetime(*DateParts(self.request.get('start_date')))
    286     end_date = datetime.datetime(*DateParts(self.request.get('end_date')))
    287 
    288     sheriff_name = self.request.get('sheriff')
    289     stat_container.summary = {
    290         'name': self.request.get('name', None),
    291         'start_date': self.request.get('start_date'),
    292         'end_date': self.request.get('end_date'),
    293         'sheriff': sheriff_name,
    294     }
    295     stat_container.num_stats = 0
    296     stat_container.put()
    297 
    298     date_to_enqueue = start_date
    299     while date_to_enqueue <= end_date:
    300       taskqueue.add(url='/stats_for_alerts',
    301                     params={
    302                         'sheriff': sheriff_name,
    303                         'year': date_to_enqueue.year,
    304                         'month': date_to_enqueue.month,
    305                         'day': date_to_enqueue.day,
    306                         'parent_key': stat_container.key.urlsafe(),
    307                     },
    308                     queue_name=_QUEUE_NAME)
    309       date_to_enqueue += datetime.timedelta(days=1)
    310       stat_container.num_stats += 1
    311     stat_container.put()
    312 
    313 
    314 class StatsAroundRevisionHandler(request_handler.RequestHandler):
    315   """URL endpoint for tasks which generate stats before/after a revision."""
    316 
    317   def post(self):
    318     """Task queue task to get stats before/after a revision of a single Test.
    319 
    320     Request parameters:
    321       revision: A central revision to look around.
    322       num_around: The number of points before and after the given revision.
    323       test_key: The urlsafe string of a Test key.
    324       parent_key: The urlsafe string of a StatContainer key.
    325     """
    326     datastore_hooks.SetPrivilegedRequest()
    327 
    328     revision = int(self.request.get('revision'))
    329     num_around = int(self.request.get('num_around'), 10)
    330     test_key = ndb.Key(urlsafe=self.request.get('test_key'))
    331     container_key = ndb.Key(urlsafe=self.request.get('parent_key'))
    332 
    333     # Get the Rows and values before and starting from the given revision.
    334     before_revs = graph_data.Row.query(
    335         graph_data.Row.parent_test == test_key,
    336         graph_data.Row.revision < revision).order(
    337             -graph_data.Row.revision).fetch(limit=num_around)
    338     before_vals = [b.value for b in before_revs]
    339     after_revs = graph_data.Row.query(
    340         graph_data.Row.parent_test == test_key,
    341         graph_data.Row.revision >= revision).order(
    342             graph_data.Row.revision).fetch(limit=num_around)
    343     after_vals = [a.value for a in after_revs]
    344 
    345     # There may be no Row at the particular revision requested; if so, we use
    346     # the first revision after the given revision.
    347     actual_revision = None
    348     if after_vals:
    349       actual_revision = after_revs[0].revision
    350 
    351     test = test_key.get()
    352     improvement_direction = self._ImprovementDirection(test)
    353     median_before = math_utils.Median(before_vals)
    354     median_after = math_utils.Median(after_vals)
    355     mean_before = math_utils.Median(before_vals)
    356     mean_after = math_utils.Median(after_vals)
    357     details = {
    358         'test_path': utils.TestPath(test_key),
    359         'improvement_direction': improvement_direction,
    360         'actual_revision': actual_revision,
    361         'median_before': '%.2f' % median_before,
    362         'median_after': '%.2f' % median_after,
    363         'median_percent_improved': self._PercentImproved(
    364             median_before, median_after, improvement_direction),
    365         'mean_before': '%.2f' % mean_before,
    366         'mean_after': '%.2f' % mean_after,
    367         'mean_percent_improved': self._PercentImproved(
    368             mean_before, mean_after, improvement_direction),
    369         'std': '%.2f' % math_utils.StandardDeviation(before_vals + after_vals),
    370     }
    371     new_stat = IndividualStat(parent=container_key, details=details)
    372     new_stat.put()
    373 
    374   def _ImprovementDirection(self, test):
    375     """Returns a string describing improvement direction of a Test."""
    376     if test.improvement_direction == anomaly.UP:
    377       return 'up'
    378     if test.improvement_direction == anomaly.DOWN:
    379       return 'down'
    380     return 'unknown'
    381 
    382   def _PercentImproved(self, before, after, improvement_direction):
    383     """Returns a string containing percent improvement."""
    384     if math.isnan(before) or math.isnan(after):
    385       return 'NaN'
    386     if before == 0:
    387       return anomaly.FREAKIN_HUGE
    388     percent_improved = ((after - before) / before) * 100
    389     if improvement_direction == 'down' and percent_improved != 0:
    390       percent_improved = -percent_improved
    391     return '%.2f' % percent_improved
    392 
    393 
    394 class StatsForAlertsHandler(request_handler.RequestHandler):
    395   """URL endpoint for tasks which generate stats about alerts."""
    396 
    397   def post(self):
    398     """Task queue task to process a single day's alerts for a sheriff."""
    399     datastore_hooks.SetPrivilegedRequest()
    400     container_key = ndb.Key(urlsafe=self.request.get('parent_key'))
    401     sheriff_key = ndb.Key('Sheriff', self.request.get('sheriff'))
    402     year = int(self.request.get('year'))
    403     month = int(self.request.get('month'))
    404     day = int(self.request.get('day'))
    405 
    406     # Fetch all of the alerts for the day.
    407     start_time = datetime.datetime(year, month, day)
    408     end_time = start_time + datetime.timedelta(days=1)
    409     alerts = anomaly.Anomaly.query(
    410         anomaly.Anomaly.timestamp >= start_time,
    411         anomaly.Anomaly.timestamp < end_time,
    412         anomaly.Anomaly.sheriff == sheriff_key).fetch()
    413 
    414     details = collections.defaultdict(dict)
    415     details['date'] = '%s-%s-%s' % (year, month, day)
    416     for alert in alerts:
    417       self._AddAlert(alert, details)
    418     new_stat = IndividualStat(parent=container_key, details=details)
    419     new_stat.put()
    420 
    421   def _IncrementDict(self, dictionary, key):
    422     """Increments a count in a dictionary."""
    423     dictionary[key] = dictionary.get(key, 0) + 1
    424 
    425   def _AddAlert(self, anomaly_entity, details):
    426     """Adds the given Anomaly to the stats for the day.
    427 
    428     Args:
    429       anomaly_entity: An Anomaly entity.
    430       details: A dictionary of details for one IndividualStat.
    431     """
    432     test_path_parts = anomaly_entity.test.flat()[1::2]
    433     bot = '%s/%s' % (test_path_parts[0], test_path_parts[1])
    434     suite = test_path_parts[2]
    435     test = '/'.join(test_path_parts[2:])
    436     trace = anomaly_entity.test.string_id()
    437     percent_changed_bucket = self._PercentChangedBucket(
    438         anomaly_entity.percent_changed)
    439 
    440     # Increment counts for each category that this alert belongs to.
    441     self._IncrementDict(details['bots'], bot)
    442     self._IncrementDict(details['test_suites'], suite)
    443     self._IncrementDict(details['tests'], test)
    444     self._IncrementDict(details['traces'], trace)
    445     if anomaly_entity.bug_id:
    446       self._IncrementDict(details['bug_ids'], anomaly_entity.bug_id)
    447     self._IncrementDict(details['percent_changed_buckets'],
    448                         percent_changed_bucket)
    449 
    450   def _PercentChangedBucket(self, percent_changed):
    451     """Returns the name of a percent-changed bucket to put alerts into."""
    452     percent_changed = abs(percent_changed)
    453     percent_changed_bucket = None
    454     for bucket in _PERCENT_CHANGED_BUCKETS:
    455       if percent_changed < bucket:
    456         return '%02d%%' % bucket
    457     if not percent_changed_bucket:
    458       return 'largest'
    459