1 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """URL endpoints to show and gather stats on performance and alerts. 6 7 StatsHandler is the main entry point, and provides the interface for requesting 8 statistics to be generated and viewing generated statistics. 9 10 This module also contains other handlers for gathering statistics Test by Test, 11 since querying all Tests at once puts us over the 60s timeout. 12 """ 13 14 import collections 15 import datetime 16 import json 17 import math 18 19 from google.appengine.api import taskqueue 20 from google.appengine.ext import ndb 21 22 from dashboard import datastore_hooks 23 from dashboard import math_utils 24 from dashboard import request_handler 25 from dashboard import utils 26 from dashboard import xsrf 27 from dashboard.models import anomaly 28 from dashboard.models import graph_data 29 from dashboard.models import sheriff 30 31 # Buckets to split alerts into based on relative change size. 32 _PERCENT_CHANGED_BUCKETS = [1, 2, 5, 10, 20] 33 34 # Task queue name, should be present in queue.yaml. 35 _QUEUE_NAME = 'stats-queue' 36 37 38 class StatContainer(ndb.Model): 39 """Represents a set of statistics that is displayed together.""" 40 # Type of statistics, e.g. 'around_revision' or 'alerts_summary'. 41 stat_type = ndb.StringProperty() 42 # A dictionary of information about the set of statistics overall. 43 summary = ndb.JsonProperty() 44 # Number of individual items in this set of statistics. 45 num_stats = ndb.IntegerProperty() 46 # Time that this entity was created. 47 timestamp = ndb.DateTimeProperty(auto_now_add=True, indexed=True) 48 49 50 class IndividualStat(ndb.Model): 51 """Represents one item within a set of statistics.""" 52 # A dictionary which could contain different things for different types of 53 # statistics; could contain information about one Test or one day. 54 details = ndb.JsonProperty() 55 56 57 class StatsHandler(request_handler.RequestHandler): 58 """URL endpoint to request/view stats.""" 59 60 def get(self): 61 """Shows a set of statistics, or a form for producing stats.""" 62 if not utils.IsInternalUser(): 63 self.RenderHtml('result.html', { 64 'errors': ['Only logged-in internal users can access stats.'] 65 }) 66 return 67 68 key = self.request.get('key') 69 if key: 70 self._DisplayResults(key) 71 else: 72 self._DisplayForm() 73 74 def _DisplayResults(self, key): 75 """Displays a set of previously-generated statistics.""" 76 container = ndb.Key(urlsafe=key).get() 77 stats = IndividualStat.query(ancestor=container.key).fetch() 78 total = container.num_stats 79 processed = len(stats) 80 title = self._GetStatTitle(container) 81 82 if processed < total: 83 have_stats = False 84 stat_type = None 85 processed_stats = None 86 else: 87 have_stats = True 88 stat_type = container.stat_type 89 if stat_type == 'around_revision': 90 processed_stats = self._GetAroundRevisionStats(container, stats) 91 elif stat_type == 'alert_summary': 92 processed_stats = self._GetAlertSummaryStats(container, stats) 93 94 self.RenderHtml('stats.html', { 95 'title': title, 96 'waiting': not have_stats, 97 'have_stats': have_stats, 98 'type': stat_type, 99 'stats': processed_stats, 100 'processed': processed, 101 'total': total, 102 }) 103 104 def _DisplayForm(self): 105 """Displays a form for requesting a set of statistics.""" 106 master = ndb.Key('Master', 'ChromiumPerf') 107 bots = graph_data.Bot.query(ancestor=master).fetch(keys_only=True) 108 bots = [b.string_id() for b in bots] 109 sheriffs = sheriff.Sheriff.query().fetch(keys_only=True) 110 sheriffs = [s.string_id() for s in sheriffs] 111 recent_stats = StatContainer.query().order( 112 -StatContainer.timestamp).fetch(limit=20) 113 recent = [] 114 for r in recent_stats: 115 recent.append({ 116 'key': r.key.urlsafe(), 117 'title': self._GetStatTitle(r), 118 }) 119 120 self.RenderHtml('stats.html', { 121 'recent': recent, 122 'bots': bots, 123 'sheriffs': sheriffs, 124 }) 125 126 def _GetStatTitle(self, container): 127 """Returns a title string for the given stat container.""" 128 title_prefix = '' 129 if container.summary.get('name'): 130 title_prefix = '%s: ' % container.summary.get('name') 131 132 if container.stat_type == 'around_revision': 133 revision = container.summary.get('revision') 134 num_around = container.summary.get('num_around') 135 return ('%sChanges around revision %s (%s points each direction)' % 136 (title_prefix, revision, num_around)) 137 138 if container.stat_type == 'alert_summary': 139 start = container.summary.get('start_date') 140 end = container.summary.get('end_date') 141 return '%s: %s-%s' % (title_prefix, start, end) 142 143 def _GetAroundRevisionStats(self, container, stats): 144 """Fetches the template variables needed to display around-revision stats. 145 146 Args: 147 container: A StatContainer entity. 148 stats: A list of IndividualStat entities. 149 150 Returns: 151 A dictionary. 152 """ 153 data = { 154 'revision': int(container.summary['revision']), 155 'num_around': int(container.summary['num_around']), 156 'tests': [], 157 } 158 for stat in stats: 159 data['tests'].append(stat.details) 160 return data 161 162 def _GetAlertSummaryStats(self, container, stats): 163 """Gets all the template variables needed to display alert summary stats. 164 165 Args: 166 container: A StatContainer entity. 167 stats: A list of IndividualStat entities. 168 169 Returns: 170 A dictionary. 171 """ 172 def IndividualStatTimeInt(individual_stat): 173 date = individual_stat.details['date'] 174 return (int(date.split('-')[0]) * 10000 + 175 int(date.split('-')[1]) * 100 + 176 int(date.split('-')[2])) 177 178 stats.sort(key=IndividualStatTimeInt) 179 details = [s.details for s in stats] 180 categories = [ 181 'bots', 182 'test_suites', 183 'traces', 184 'bug_ids', 185 'percent_changed_buckets', 186 ] 187 axis_map = {i: d['date'] for i, d in enumerate(details)} 188 overall_summaries = {} 189 daily_summaries = {} 190 for category in categories: 191 key_names = set() 192 for d in details: 193 key_names |= set(d.get(category, {})) 194 overall_summaries[category] = [] 195 daily_summaries[category] = [] 196 for key_name in key_names: 197 pie_dict = { 198 'label': key_name, 199 'data': sum(d.get(category, {}).get(key_name, 0) for d in details) 200 } 201 overall_summaries[category].append(pie_dict) 202 daily_dict = {'label': key_name, 'data': []} 203 for i, d in enumerate(details): 204 yval = d.get(category, {}).get(key_name, 0) 205 daily_dict['data'].append([i, yval]) 206 daily_summaries[category].append(daily_dict) 207 # Sort by percent. 208 if category != 'percent_changed_buckets': 209 overall_summaries[category].sort(key=lambda d: d['data']) 210 211 data = { 212 'start_date': container.summary['start_date'], 213 'end_date': container.summary['end_date'], 214 'alert_summaries': [s.details for s in stats], 215 'axis_map': json.dumps(axis_map), 216 'overall_summaries': json.dumps(overall_summaries), 217 'daily_summaries': json.dumps(daily_summaries), 218 } 219 return data 220 221 @xsrf.TokenRequired 222 def post(self): 223 """Kicks off a task on the task queue to generate the requested stats.""" 224 if not utils.IsInternalUser(): 225 self.RenderHtml('result.html', { 226 'errors': ['Only logged-in internal users can access stats.'] 227 }) 228 return 229 230 datastore_hooks.SetPrivilegedRequest() 231 stat_type = self.request.get('type') 232 stat_container = StatContainer(stat_type=stat_type) 233 234 if stat_type == 'around_revision': 235 self._StartGeneratingStatsAroundRevision(stat_container) 236 elif stat_type == 'alert_summary': 237 self._StartGeneratingStatsForAlerts(stat_container) 238 self.redirect('/stats?key=%s' % stat_container.key.urlsafe()) 239 240 def _StartGeneratingStatsAroundRevision(self, stat_container): 241 """Adds tasks for generating around_revision stats to the task queue. 242 243 Note: Master and sheriff are hard-coded below. If we want to use this 244 to generate stats about other masters or sheriffs, we should: 245 1. Make master and sheriff specified by parameters. 246 2. Add fields on the form to specify these parameters. 247 248 Args: 249 stat_container: A StatContainer entity to populate. 250 """ 251 bots = self.request.get_all('bots') 252 bots = ['ChromiumPerf/' + bot for bot in bots] 253 sheriff_key = ndb.Key('Sheriff', 'Chromium Perf Sheriff') 254 test_query = graph_data.Test.query(graph_data.Test.sheriff == sheriff_key) 255 test_keys = test_query.fetch(keys_only=True) 256 test_keys = [k for k in test_keys if '/'.join( 257 utils.TestPath(k).split('/')[:2]) in bots] 258 summary = { 259 'revision': int(self.request.get('rev')), 260 'num_around': int(self.request.get('num_around')), 261 'name': self.request.get('name', None), 262 } 263 stat_container.summary = summary 264 stat_container.num_stats = len(test_keys) 265 stat_container.put() 266 for test_key in test_keys: 267 taskqueue.add(url='/stats_around_revision', 268 params={ 269 'revision': summary['revision'], 270 'num_around': summary['num_around'], 271 'test_key': test_key.urlsafe(), 272 'parent_key': stat_container.key.urlsafe(), 273 }, 274 queue_name=_QUEUE_NAME) 275 276 def _StartGeneratingStatsForAlerts(self, stat_container): 277 """Adds tasks for generating alert_summary stats to the task queue. 278 279 Args: 280 stat_container: A StatContainer entity to populate. 281 """ 282 def DateParts(date_string): 283 """Returns the year, month, day numbers in a yyyy-mm-dd string.""" 284 return map(int, date_string.split('-')) 285 start_date = datetime.datetime(*DateParts(self.request.get('start_date'))) 286 end_date = datetime.datetime(*DateParts(self.request.get('end_date'))) 287 288 sheriff_name = self.request.get('sheriff') 289 stat_container.summary = { 290 'name': self.request.get('name', None), 291 'start_date': self.request.get('start_date'), 292 'end_date': self.request.get('end_date'), 293 'sheriff': sheriff_name, 294 } 295 stat_container.num_stats = 0 296 stat_container.put() 297 298 date_to_enqueue = start_date 299 while date_to_enqueue <= end_date: 300 taskqueue.add(url='/stats_for_alerts', 301 params={ 302 'sheriff': sheriff_name, 303 'year': date_to_enqueue.year, 304 'month': date_to_enqueue.month, 305 'day': date_to_enqueue.day, 306 'parent_key': stat_container.key.urlsafe(), 307 }, 308 queue_name=_QUEUE_NAME) 309 date_to_enqueue += datetime.timedelta(days=1) 310 stat_container.num_stats += 1 311 stat_container.put() 312 313 314 class StatsAroundRevisionHandler(request_handler.RequestHandler): 315 """URL endpoint for tasks which generate stats before/after a revision.""" 316 317 def post(self): 318 """Task queue task to get stats before/after a revision of a single Test. 319 320 Request parameters: 321 revision: A central revision to look around. 322 num_around: The number of points before and after the given revision. 323 test_key: The urlsafe string of a Test key. 324 parent_key: The urlsafe string of a StatContainer key. 325 """ 326 datastore_hooks.SetPrivilegedRequest() 327 328 revision = int(self.request.get('revision')) 329 num_around = int(self.request.get('num_around'), 10) 330 test_key = ndb.Key(urlsafe=self.request.get('test_key')) 331 container_key = ndb.Key(urlsafe=self.request.get('parent_key')) 332 333 # Get the Rows and values before and starting from the given revision. 334 before_revs = graph_data.Row.query( 335 graph_data.Row.parent_test == test_key, 336 graph_data.Row.revision < revision).order( 337 -graph_data.Row.revision).fetch(limit=num_around) 338 before_vals = [b.value for b in before_revs] 339 after_revs = graph_data.Row.query( 340 graph_data.Row.parent_test == test_key, 341 graph_data.Row.revision >= revision).order( 342 graph_data.Row.revision).fetch(limit=num_around) 343 after_vals = [a.value for a in after_revs] 344 345 # There may be no Row at the particular revision requested; if so, we use 346 # the first revision after the given revision. 347 actual_revision = None 348 if after_vals: 349 actual_revision = after_revs[0].revision 350 351 test = test_key.get() 352 improvement_direction = self._ImprovementDirection(test) 353 median_before = math_utils.Median(before_vals) 354 median_after = math_utils.Median(after_vals) 355 mean_before = math_utils.Median(before_vals) 356 mean_after = math_utils.Median(after_vals) 357 details = { 358 'test_path': utils.TestPath(test_key), 359 'improvement_direction': improvement_direction, 360 'actual_revision': actual_revision, 361 'median_before': '%.2f' % median_before, 362 'median_after': '%.2f' % median_after, 363 'median_percent_improved': self._PercentImproved( 364 median_before, median_after, improvement_direction), 365 'mean_before': '%.2f' % mean_before, 366 'mean_after': '%.2f' % mean_after, 367 'mean_percent_improved': self._PercentImproved( 368 mean_before, mean_after, improvement_direction), 369 'std': '%.2f' % math_utils.StandardDeviation(before_vals + after_vals), 370 } 371 new_stat = IndividualStat(parent=container_key, details=details) 372 new_stat.put() 373 374 def _ImprovementDirection(self, test): 375 """Returns a string describing improvement direction of a Test.""" 376 if test.improvement_direction == anomaly.UP: 377 return 'up' 378 if test.improvement_direction == anomaly.DOWN: 379 return 'down' 380 return 'unknown' 381 382 def _PercentImproved(self, before, after, improvement_direction): 383 """Returns a string containing percent improvement.""" 384 if math.isnan(before) or math.isnan(after): 385 return 'NaN' 386 if before == 0: 387 return anomaly.FREAKIN_HUGE 388 percent_improved = ((after - before) / before) * 100 389 if improvement_direction == 'down' and percent_improved != 0: 390 percent_improved = -percent_improved 391 return '%.2f' % percent_improved 392 393 394 class StatsForAlertsHandler(request_handler.RequestHandler): 395 """URL endpoint for tasks which generate stats about alerts.""" 396 397 def post(self): 398 """Task queue task to process a single day's alerts for a sheriff.""" 399 datastore_hooks.SetPrivilegedRequest() 400 container_key = ndb.Key(urlsafe=self.request.get('parent_key')) 401 sheriff_key = ndb.Key('Sheriff', self.request.get('sheriff')) 402 year = int(self.request.get('year')) 403 month = int(self.request.get('month')) 404 day = int(self.request.get('day')) 405 406 # Fetch all of the alerts for the day. 407 start_time = datetime.datetime(year, month, day) 408 end_time = start_time + datetime.timedelta(days=1) 409 alerts = anomaly.Anomaly.query( 410 anomaly.Anomaly.timestamp >= start_time, 411 anomaly.Anomaly.timestamp < end_time, 412 anomaly.Anomaly.sheriff == sheriff_key).fetch() 413 414 details = collections.defaultdict(dict) 415 details['date'] = '%s-%s-%s' % (year, month, day) 416 for alert in alerts: 417 self._AddAlert(alert, details) 418 new_stat = IndividualStat(parent=container_key, details=details) 419 new_stat.put() 420 421 def _IncrementDict(self, dictionary, key): 422 """Increments a count in a dictionary.""" 423 dictionary[key] = dictionary.get(key, 0) + 1 424 425 def _AddAlert(self, anomaly_entity, details): 426 """Adds the given Anomaly to the stats for the day. 427 428 Args: 429 anomaly_entity: An Anomaly entity. 430 details: A dictionary of details for one IndividualStat. 431 """ 432 test_path_parts = anomaly_entity.test.flat()[1::2] 433 bot = '%s/%s' % (test_path_parts[0], test_path_parts[1]) 434 suite = test_path_parts[2] 435 test = '/'.join(test_path_parts[2:]) 436 trace = anomaly_entity.test.string_id() 437 percent_changed_bucket = self._PercentChangedBucket( 438 anomaly_entity.percent_changed) 439 440 # Increment counts for each category that this alert belongs to. 441 self._IncrementDict(details['bots'], bot) 442 self._IncrementDict(details['test_suites'], suite) 443 self._IncrementDict(details['tests'], test) 444 self._IncrementDict(details['traces'], trace) 445 if anomaly_entity.bug_id: 446 self._IncrementDict(details['bug_ids'], anomaly_entity.bug_id) 447 self._IncrementDict(details['percent_changed_buckets'], 448 percent_changed_bucket) 449 450 def _PercentChangedBucket(self, percent_changed): 451 """Returns the name of a percent-changed bucket to put alerts into.""" 452 percent_changed = abs(percent_changed) 453 percent_changed_bucket = None 454 for bucket in _PERCENT_CHANGED_BUCKETS: 455 if percent_changed < bucket: 456 return '%02d%%' % bucket 457 if not percent_changed_bucket: 458 return 'largest' 459