Home | History | Annotate | Download | only in perf_expectations
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 # For instructions see:
      7 # http://www.chromium.org/developers/tree-sheriffs/perf-sheriffs
      8 
      9 import hashlib
     10 import math
     11 import optparse
     12 import os
     13 import re
     14 import subprocess
     15 import sys
     16 import time
     17 import urllib2
     18 
     19 
     20 try:
     21   import json
     22 except ImportError:
     23   import simplejson as json
     24 
     25 
     26 __version__ = '1.0'
     27 EXPECTATIONS_DIR = os.path.dirname(os.path.abspath(__file__))
     28 DEFAULT_CONFIG_FILE = os.path.join(EXPECTATIONS_DIR,
     29                                    'chromium_perf_expectations.cfg')
     30 DEFAULT_TOLERANCE = 0.05
     31 USAGE = ''
     32 
     33 
     34 def ReadFile(filename):
     35   try:
     36     file = open(filename, 'rb')
     37   except IOError, e:
     38     print >> sys.stderr, ('I/O Error reading file %s(%s): %s' %
     39                           (filename, e.errno, e.strerror))
     40     raise e
     41   contents = file.read()
     42   file.close()
     43   return contents
     44 
     45 
     46 def ConvertJsonIntoDict(string):
     47   """Read a JSON string and convert its contents into a Python datatype."""
     48   if len(string) == 0:
     49     print >> sys.stderr, ('Error could not parse empty string')
     50     raise Exception('JSON data missing')
     51 
     52   try:
     53     jsondata = json.loads(string)
     54   except ValueError, e:
     55     print >> sys.stderr, ('Error parsing string: "%s"' % string)
     56     raise e
     57   return jsondata
     58 
     59 
     60 # Floating point representation of last time we fetched a URL.
     61 last_fetched_at = None
     62 def FetchUrlContents(url):
     63   global last_fetched_at
     64   if last_fetched_at and ((time.time() - last_fetched_at) <= 0.5):
     65     # Sleep for half a second to avoid overloading the server.
     66     time.sleep(0.5)
     67   try:
     68     last_fetched_at = time.time()
     69     connection = urllib2.urlopen(url)
     70   except urllib2.HTTPError, e:
     71     if e.code == 404:
     72       return None
     73     raise e
     74   text = connection.read().strip()
     75   connection.close()
     76   return text
     77 
     78 
     79 def GetRowData(data, key):
     80   rowdata = []
     81   # reva and revb always come first.
     82   for subkey in ['reva', 'revb']:
     83     if subkey in data[key]:
     84       rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
     85   # Strings, like type, come next.
     86   for subkey in ['type', 'better']:
     87     if subkey in data[key]:
     88       rowdata.append('"%s": "%s"' % (subkey, data[key][subkey]))
     89   # Finally the main numbers come last.
     90   for subkey in ['improve', 'regress', 'tolerance']:
     91     if subkey in data[key]:
     92       rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
     93   return rowdata
     94 
     95 
     96 def GetRowDigest(rowdata, key):
     97   sha1 = hashlib.sha1()
     98   rowdata = [str(possibly_unicode_string).encode('ascii')
     99              for possibly_unicode_string in rowdata]
    100   sha1.update(str(rowdata) + key)
    101   return sha1.hexdigest()[0:8]
    102 
    103 
    104 def WriteJson(filename, data, keys, calculate_sha1=True):
    105   """Write a list of |keys| in |data| to the file specified in |filename|."""
    106   try:
    107     file = open(filename, 'wb')
    108   except IOError, e:
    109     print >> sys.stderr, ('I/O Error writing file %s(%s): %s' %
    110                           (filename, e.errno, e.strerror))
    111     return False
    112   jsondata = []
    113   for key in keys:
    114     rowdata = GetRowData(data, key)
    115     if calculate_sha1:
    116       # Include an updated checksum.
    117       rowdata.append('"sha1": "%s"' % GetRowDigest(rowdata, key))
    118     else:
    119       if 'sha1' in data[key]:
    120         rowdata.append('"sha1": "%s"' % (data[key]['sha1']))
    121     jsondata.append('"%s": {%s}' % (key, ', '.join(rowdata)))
    122   jsondata.append('"load": true')
    123   jsontext = '{%s\n}' % ',\n '.join(jsondata)
    124   file.write(jsontext + '\n')
    125   file.close()
    126   return True
    127 
    128 
    129 def FloatIsInt(f):
    130   epsilon = 1.0e-10
    131   return abs(f - int(f)) <= epsilon
    132 
    133 
    134 last_key_printed = None
    135 def Main(args):
    136   def OutputMessage(message, verbose_message=True):
    137     global last_key_printed
    138     if not options.verbose and verbose_message:
    139       return
    140 
    141     if key != last_key_printed:
    142       last_key_printed = key
    143       print '\n' + key + ':'
    144     print '  %s' % message
    145 
    146   parser = optparse.OptionParser(usage=USAGE, version=__version__)
    147   parser.add_option('-v', '--verbose', action='store_true', default=False,
    148                     help='enable verbose output')
    149   parser.add_option('-s', '--checksum', action='store_true',
    150                     help='test if any changes are pending')
    151   parser.add_option('-c', '--config', dest='config_file',
    152                     default=DEFAULT_CONFIG_FILE,
    153                     help='set the config file to FILE', metavar='FILE')
    154   options, args = parser.parse_args(args)
    155 
    156   if options.verbose:
    157     print 'Verbose output enabled.'
    158 
    159   config = ConvertJsonIntoDict(ReadFile(options.config_file))
    160 
    161   # Get the list of summaries for a test.
    162   base_url = config['base_url']
    163   # Make the perf expectations file relative to the path of the config file.
    164   perf_file = os.path.join(
    165     os.path.dirname(options.config_file), config['perf_file'])
    166   perf = ConvertJsonIntoDict(ReadFile(perf_file))
    167 
    168   # Fetch graphs.dat for this combination.
    169   perfkeys = perf.keys()
    170   # In perf_expectations.json, ignore the 'load' key.
    171   perfkeys.remove('load')
    172   perfkeys.sort()
    173 
    174   write_new_expectations = False
    175   found_checksum_mismatch = False
    176   for key in perfkeys:
    177     value = perf[key]
    178     tolerance = value.get('tolerance', DEFAULT_TOLERANCE)
    179     better = value.get('better', None)
    180 
    181     # Verify the checksum.
    182     original_checksum = value.get('sha1', '')
    183     if 'sha1' in value:
    184       del value['sha1']
    185     rowdata = GetRowData(perf, key)
    186     computed_checksum = GetRowDigest(rowdata, key)
    187     if original_checksum == computed_checksum:
    188       OutputMessage('checksum matches, skipping')
    189       continue
    190     elif options.checksum:
    191       found_checksum_mismatch = True
    192       continue
    193 
    194     # Skip expectations that are missing a reva or revb.  We can't generate
    195     # expectations for those.
    196     if not(value.has_key('reva') and value.has_key('revb')):
    197       OutputMessage('missing revision range, skipping')
    198       continue
    199     revb = int(value['revb'])
    200     reva = int(value['reva'])
    201 
    202     # Ensure that reva is less than revb.
    203     if reva > revb:
    204       temp = reva
    205       reva = revb
    206       revb = temp
    207 
    208     # Get the system/test/graph/tracename and reftracename for the current key.
    209     matchData = re.match(r'^([^/]+)\/([^/]+)\/([^/]+)\/([^/]+)$', key)
    210     if not matchData:
    211       OutputMessage('cannot parse key, skipping')
    212       continue
    213     system = matchData.group(1)
    214     test = matchData.group(2)
    215     graph = matchData.group(3)
    216     tracename = matchData.group(4)
    217     reftracename = tracename + '_ref'
    218 
    219     # Create the summary_url and get the json data for that URL.
    220     # FetchUrlContents() may sleep to avoid overloading the server with
    221     # requests.
    222     summary_url = '%s/%s/%s/%s-summary.dat' % (base_url, system, test, graph)
    223     summaryjson = FetchUrlContents(summary_url)
    224     if not summaryjson:
    225       OutputMessage('ERROR: cannot find json data, please verify',
    226                     verbose_message=False)
    227       return 0
    228 
    229     # Set value's type to 'relative' by default.
    230     value_type = value.get('type', 'relative')
    231 
    232     summarylist = summaryjson.split('\n')
    233     trace_values = {}
    234     traces = [tracename]
    235     if value_type == 'relative':
    236       traces += [reftracename]
    237     for trace in traces:
    238       trace_values.setdefault(trace, {})
    239 
    240     # Find the high and low values for each of the traces.
    241     scanning = False
    242     for line in summarylist:
    243       jsondata = ConvertJsonIntoDict(line)
    244 
    245       # TODO(iannucci): Remove this once http://crbug.com/336471 is resolved.
    246       if 'Force the Chro' in jsondata['rev']:
    247         continue
    248 
    249       if int(jsondata['rev']) <= revb:
    250         scanning = True
    251       if int(jsondata['rev']) < reva:
    252         break
    253 
    254       # We found the upper revision in the range.  Scan for trace data until we
    255       # find the lower revision in the range.
    256       if scanning:
    257         for trace in traces:
    258           if trace not in jsondata['traces']:
    259             OutputMessage('trace %s missing' % trace)
    260             continue
    261           if type(jsondata['traces'][trace]) != type([]):
    262             OutputMessage('trace %s format not recognized' % trace)
    263             continue
    264           try:
    265             tracevalue = float(jsondata['traces'][trace][0])
    266           except ValueError:
    267             OutputMessage('trace %s value error: %s' % (
    268                 trace, str(jsondata['traces'][trace][0])))
    269             continue
    270 
    271           for bound in ['high', 'low']:
    272             trace_values[trace].setdefault(bound, tracevalue)
    273 
    274           trace_values[trace]['high'] = max(trace_values[trace]['high'],
    275                                             tracevalue)
    276           trace_values[trace]['low'] = min(trace_values[trace]['low'],
    277                                            tracevalue)
    278 
    279     if 'high' not in trace_values[tracename]:
    280       OutputMessage('no suitable traces matched, skipping')
    281       continue
    282 
    283     if value_type == 'relative':
    284       # Calculate assuming high deltas are regressions and low deltas are
    285       # improvements.
    286       regress = (float(trace_values[tracename]['high']) -
    287                  float(trace_values[reftracename]['low']))
    288       improve = (float(trace_values[tracename]['low']) -
    289                  float(trace_values[reftracename]['high']))
    290     elif value_type == 'absolute':
    291       # Calculate assuming high absolutes are regressions and low absolutes are
    292       # improvements.
    293       regress = float(trace_values[tracename]['high'])
    294       improve = float(trace_values[tracename]['low'])
    295 
    296     # So far we've assumed better is lower (regress > improve).  If the actual
    297     # values for regress and improve are equal, though, and better was not
    298     # specified, alert the user so we don't let them create a new file with
    299     # ambiguous rules.
    300     if better == None and regress == improve:
    301       OutputMessage('regress (%s) is equal to improve (%s), and "better" is '
    302                     'unspecified, please fix by setting "better": "lower" or '
    303                     '"better": "higher" in this perf trace\'s expectation' % (
    304                     regress, improve), verbose_message=False)
    305       return 1
    306 
    307     # If the existing values assume regressions are low deltas relative to
    308     # improvements, swap our regress and improve.  This value must be a
    309     # scores-like result.
    310     if 'regress' in perf[key] and 'improve' in perf[key]:
    311       if perf[key]['regress'] < perf[key]['improve']:
    312         assert(better != 'lower')
    313         better = 'higher'
    314         temp = regress
    315         regress = improve
    316         improve = temp
    317       else:
    318         # Sometimes values are equal, e.g., when they are both 0,
    319         # 'better' may still be set to 'higher'.
    320         assert(better != 'higher' or
    321                perf[key]['regress'] == perf[key]['improve'])
    322         better = 'lower'
    323 
    324     # If both were ints keep as int, otherwise use the float version.
    325     originally_ints = False
    326     if FloatIsInt(regress) and FloatIsInt(improve):
    327       originally_ints = True
    328 
    329     if better == 'higher':
    330       if originally_ints:
    331         regress = int(math.floor(regress - abs(regress*tolerance)))
    332         improve = int(math.ceil(improve + abs(improve*tolerance)))
    333       else:
    334         regress = regress - abs(regress*tolerance)
    335         improve = improve + abs(improve*tolerance)
    336     else:
    337       if originally_ints:
    338         improve = int(math.floor(improve - abs(improve*tolerance)))
    339         regress = int(math.ceil(regress + abs(regress*tolerance)))
    340       else:
    341         improve = improve - abs(improve*tolerance)
    342         regress = regress + abs(regress*tolerance)
    343 
    344     # Calculate the new checksum to test if this is the only thing that may have
    345     # changed.
    346     checksum_rowdata = GetRowData(perf, key)
    347     new_checksum = GetRowDigest(checksum_rowdata, key)
    348 
    349     if ('regress' in perf[key] and 'improve' in perf[key] and
    350         perf[key]['regress'] == regress and perf[key]['improve'] == improve and
    351         original_checksum == new_checksum):
    352       OutputMessage('no change')
    353       continue
    354 
    355     write_new_expectations = True
    356     OutputMessage('traces: %s' % trace_values, verbose_message=False)
    357     OutputMessage('before: %s' % perf[key], verbose_message=False)
    358     perf[key]['regress'] = regress
    359     perf[key]['improve'] = improve
    360     OutputMessage('after: %s' % perf[key], verbose_message=False)
    361 
    362   if options.checksum:
    363     if found_checksum_mismatch:
    364       return 1
    365     else:
    366       return 0
    367 
    368   if write_new_expectations:
    369     print '\nWriting expectations... ',
    370     WriteJson(perf_file, perf, perfkeys)
    371     print 'done'
    372   else:
    373     if options.verbose:
    374       print ''
    375     print 'No changes.'
    376   return 0
    377 
    378 
    379 if __name__ == '__main__':
    380   sys.exit(Main(sys.argv))
    381