Home | History | Annotate | Download | only in benchtools
      1 #!/usr/bin/env python
      2 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 
      7 """rebase.py: standalone script to batch update bench expectations.
      8 
      9     Requires gsutil to access gs://chromium-skia-gm and Rietveld credentials.
     10 
     11     Usage:
     12       Copy script to a separate dir outside Skia repo. The script will create a
     13           skia dir on the first run to host the repo, and will create/delete
     14           temp dirs as needed.
     15       ./rebase.py --githash <githash prefix to use for getting bench data>
     16 """
     17 
     18 
     19 import argparse
     20 import filecmp
     21 import os
     22 import re
     23 import shutil
     24 import subprocess
     25 import time
     26 import urllib2
     27 
     28 
     29 # googlesource url that has most recent Skia git hash info.
     30 SKIA_GIT_HEAD_URL = 'https://skia.googlesource.com/skia/+log/HEAD'
     31 
     32 # Google Storage bench file prefix.
     33 GS_PREFIX = 'gs://chromium-skia-gm/perfdata'
     34 
     35 # Regular expression for matching githash data.
     36 HA_RE = '<a href="/skia/\+/([0-9a-f]+)">'
     37 HA_RE_COMPILED = re.compile(HA_RE)
     38 
     39 
     40 def get_git_hashes():
     41   print 'Getting recent git hashes...'
     42   hashes = HA_RE_COMPILED.findall(
     43       urllib2.urlopen(SKIA_GIT_HEAD_URL).read())
     44 
     45   return hashes
     46 
     47 def filter_file(f):
     48   if f.find('_msaa') > 0 or f.find('_record') > 0:
     49     return True
     50 
     51   return False
     52 
     53 def clean_dir(d):
     54   if os.path.exists(d):
     55     shutil.rmtree(d)
     56   os.makedirs(d)
     57 
     58 def get_gs_filelist(p, h):
     59   print 'Looking up for the closest bench files in Google Storage...'
     60   proc = subprocess.Popen(['gsutil', 'ls',
     61       '/'.join([GS_PREFIX, p, 'bench_' + h + '_data_skp_*'])],
     62           stdout=subprocess.PIPE)
     63   out, err = proc.communicate()
     64   if err or not out:
     65     return []
     66   return [i for i in out.strip().split('\n') if not filter_file(i)]
     67 
     68 def download_gs_files(p, h, gs_dir):
     69   print 'Downloading raw bench files from Google Storage...'
     70   proc = subprocess.Popen(['gsutil', 'cp',
     71       '/'.join([GS_PREFIX, p, 'bench_' + h + '_data_skp_*']),
     72           '%s/%s' % (gs_dir, p)],
     73           stdout=subprocess.PIPE)
     74   out, err = proc.communicate()
     75   if err:
     76     clean_dir(gs_dir)
     77     return False
     78   files = 0
     79   for f in os.listdir(os.path.join(gs_dir, p)):
     80     if filter_file(f):
     81       os.remove(os.path.join(gs_dir, p, f))
     82     else:
     83       files += 1
     84   if files:
     85     return True
     86   return False
     87 
     88 def get_expectations_dict(f):
     89   """Given an expectations file f, returns a dictionary of data."""
     90   # maps row_key to (expected, lower_bound, upper_bound) float tuple.
     91   dic = {}
     92   for l in open(f).readlines():
     93     line_parts = l.strip().split(',')
     94     if line_parts[0].startswith('#') or len(line_parts) != 5:
     95       continue
     96     dic[','.join(line_parts[:2])] = (float(line_parts[2]), float(line_parts[3]),
     97                                      float(line_parts[4]))
     98 
     99   return dic
    100 
    101 def calc_expectations(p, h, gs_dir, exp_dir, repo_dir, extra_dir, extra_hash):
    102   exp_filename = 'bench_expectations_%s.txt' % p
    103   exp_fullname = os.path.join(exp_dir, exp_filename)
    104   proc = subprocess.Popen(['python', 'skia/bench/gen_bench_expectations.py',
    105       '-r', h, '-b', p, '-d', os.path.join(gs_dir, p), '-o', exp_fullname],
    106               stdout=subprocess.PIPE)
    107   out, err = proc.communicate()
    108   if err:
    109     print 'ERR_CALCULATING_EXPECTATIONS: ' + err
    110     return False
    111   print 'CALCULATED_EXPECTATIONS: ' + out
    112   if extra_dir:  # Adjust data with the ones in extra_dir
    113     print 'USE_EXTRA_DATA_FOR_ADJUSTMENT.'
    114     proc = subprocess.Popen(['python', 'skia/bench/gen_bench_expectations.py',
    115         '-r', extra_hash, '-b', p, '-d', os.path.join(extra_dir, p), '-o',
    116             os.path.join(extra_dir, exp_filename)],
    117                 stdout=subprocess.PIPE)
    118     out, err = proc.communicate()
    119     if err:
    120       print 'ERR_CALCULATING_EXTRA_EXPECTATIONS: ' + err
    121       return False
    122     extra_dic = get_expectations_dict(os.path.join(extra_dir, exp_filename))
    123     output_lines = []
    124     for l in open(exp_fullname).readlines():
    125       parts = l.strip().split(',')
    126       if parts[0].startswith('#') or len(parts) != 5:
    127         output_lines.append(l.strip())
    128         continue
    129       key = ','.join(parts[:2])
    130       if key in extra_dic:
    131         exp, lb, ub = (float(parts[2]), float(parts[3]), float(parts[4]))
    132         alt, _, _ = extra_dic[key]
    133         avg = (exp + alt) / 2
    134         # Keeps the extra range in lower/upper bounds from two actual values.
    135         new_lb = min(exp, alt) - (exp - lb)
    136         new_ub = max(exp, alt) + (ub - exp)
    137         output_lines.append('%s,%.2f,%.2f,%.2f' % (key, avg, new_lb, new_ub))
    138       else:
    139         output_lines.append(l.strip())
    140     with open(exp_fullname, 'w') as f:
    141       f.write('\n'.join(output_lines))
    142 
    143   repo_file = os.path.join(repo_dir, 'expectations', 'bench', exp_filename)
    144   if (os.path.isfile(repo_file) and
    145       filecmp.cmp(repo_file, os.path.join(exp_dir, exp_filename))):
    146       print 'NO CHANGE ON %s' % repo_file
    147       return False
    148   return True
    149 
    150 def checkout_or_update_skia(repo_dir):
    151   status = True
    152   old_cwd = os.getcwd()
    153   os.chdir(repo_dir)
    154   print 'CHECK SKIA REPO...'
    155   if subprocess.call(['git', 'pull'],
    156                      stderr=subprocess.PIPE):
    157     print 'Checking out Skia from git, please be patient...'
    158     os.chdir(old_cwd)
    159     clean_dir(repo_dir)
    160     os.chdir(repo_dir)
    161     if subprocess.call(['git', 'clone', '-q', '--depth=50', '--single-branch',
    162                         'https://skia.googlesource.com/skia.git', '.']):
    163       status = False
    164   subprocess.call(['git', 'checkout', 'master'])
    165   subprocess.call(['git', 'pull'])
    166   os.chdir(old_cwd)
    167   return status
    168 
    169 def git_commit_expectations(repo_dir, exp_dir, update_li, h, commit,
    170                             extra_hash):
    171   if extra_hash:
    172     extra_hash = ', adjusted with ' + extra_hash
    173   commit_msg = """manual bench rebase after %s%s
    174 
    175 TBR=robertphillips (at] google.com
    176 
    177 Bypassing trybots:
    178 NOTRY=true""" % (h, extra_hash)
    179   old_cwd = os.getcwd()
    180   os.chdir(repo_dir)
    181   upload = ['git', 'cl', 'upload', '-f', '--bypass-hooks',
    182             '--bypass-watchlists', '-m', commit_msg]
    183   branch = exp_dir.split('/')[-1]
    184   if commit:
    185     upload.append('--use-commit-queue')
    186   cmds = ([['git', 'checkout', 'master'],
    187            ['git', 'pull'],
    188            ['git', 'checkout', '-b', branch, '-t', 'origin/master']] +
    189           [['cp', '%s/%s' % (exp_dir, f), 'expectations/bench'] for f in
    190            update_li] +
    191           [['git', 'add'] + ['expectations/bench/%s' % i for i in update_li],
    192            ['git', 'commit', '-m', commit_msg],
    193            upload,
    194            ['git', 'checkout', 'master'],
    195            ['git', 'branch', '-D', branch],
    196           ])
    197   status = True
    198   for cmd in cmds:
    199     print 'Running ' + ' '.join(cmd)
    200     if subprocess.call(cmd):
    201       print 'FAILED. Please check if skia git repo is present.'
    202       subprocess.call(['git', 'checkout', 'master'])
    203       status = False
    204       break
    205   os.chdir(old_cwd)
    206   return status
    207 
    208 def delete_dirs(li):
    209   for d in li:
    210     print 'Deleting directory %s' % d
    211     shutil.rmtree(d)
    212 
    213 
    214 def main():
    215   d = os.path.dirname(os.path.abspath(__file__))
    216   os.chdir(d)
    217   if not subprocess.call(['git', 'rev-parse'], stderr=subprocess.PIPE):
    218     print 'Please copy script to a separate dir outside git repos to use.'
    219     return
    220   parser = argparse.ArgumentParser()
    221   parser.add_argument('--githash',
    222                       help=('Githash prefix (7+ chars) to rebaseline to. If '
    223                             'a second one is supplied after comma, and it has '
    224                             'corresponding bench data, will shift the range '
    225                             'center to the average of two expected values.'))
    226   parser.add_argument('--bots',
    227                       help=('Comma-separated list of bots to work on. If no '
    228                             'matching bots are found in the list, will default '
    229                             'to processing all bots.'))
    230   parser.add_argument('--commit', action='store_true',
    231                       help='Whether to commit changes automatically.')
    232   args = parser.parse_args()
    233 
    234   repo_dir = os.path.join(d, 'skia')
    235   if not os.path.exists(repo_dir):
    236     os.makedirs(repo_dir)
    237   if not checkout_or_update_skia(repo_dir):
    238     print 'ERROR setting up Skia repo at %s' % repo_dir
    239     return 1
    240 
    241   file_in_repo = os.path.join(d, 'skia/experimental/benchtools/rebase.py')
    242   if not filecmp.cmp(__file__, file_in_repo):
    243     shutil.copy(file_in_repo, __file__)
    244     print 'Updated this script from repo; please run again.'
    245     return
    246 
    247   all_platforms = []  # Find existing list of platforms with expectations.
    248   for item in os.listdir(os.path.join(d, 'skia/expectations/bench')):
    249     all_platforms.append(
    250         item.replace('bench_expectations_', '').replace('.txt', ''))
    251 
    252   platforms = []
    253   # If at least one given bot is in all_platforms, use list of valid args.bots.
    254   if args.bots:
    255     bots = args.bots.strip().split(',')
    256     for bot in bots:
    257       if bot in all_platforms:  # Filters platforms with given bot list.
    258         platforms.append(bot)
    259   if not platforms:  # Include all existing platforms with expectations.
    260     platforms = all_platforms
    261 
    262   if not args.githash or len(args.githash) < 7:
    263     raise Exception('Please provide --githash with a longer prefix (7+).')
    264   githashes = args.githash.strip().split(',')
    265   if len(githashes[0]) < 7:
    266     raise Exception('Please provide --githash with longer prefixes (7+).')
    267   commit = False
    268   if args.commit:
    269     commit = True
    270   rebase_hash = githashes[0][:7]
    271   extra_hash = ''
    272   if len(githashes) == 2:
    273     extra_hash = githashes[1][:7]
    274   hashes = get_git_hashes()
    275   short_hashes = [h[:7] for h in hashes]
    276   if (rebase_hash not in short_hashes or
    277       (extra_hash and extra_hash not in short_hashes) or
    278       rebase_hash == extra_hash):
    279     raise Exception('Provided --githashes not found, or identical!')
    280   if extra_hash:
    281     extra_hash = hashes[short_hashes.index(extra_hash)]
    282   hashes = hashes[:short_hashes.index(rebase_hash) + 1]
    283   update_li = []
    284 
    285   ts_str = '%s' % time.time()
    286   gs_dir = os.path.join(d, 'gs' + ts_str)
    287   exp_dir = os.path.join(d, 'exp' + ts_str)
    288   extra_dir = os.path.join(d, 'extra' + ts_str)
    289   clean_dir(gs_dir)
    290   clean_dir(exp_dir)
    291   clean_dir(extra_dir)
    292   for p in platforms:
    293     clean_dir(os.path.join(gs_dir, p))
    294     clean_dir(os.path.join(extra_dir, p))
    295     hash_to_use = ''
    296     for h in reversed(hashes):
    297       li = get_gs_filelist(p, h)
    298       if not len(li):  # no data
    299         continue
    300       if download_gs_files(p, h, gs_dir):
    301         print 'Copied %s/%s' % (p, h)
    302         hash_to_use = h
    303         break
    304       else:
    305         print 'DOWNLOAD BENCH FAILED %s/%s' % (p, h)
    306         break
    307     if hash_to_use:
    308       if extra_hash and download_gs_files(p, extra_hash, extra_dir):
    309         print 'Copied extra data %s/%s' % (p, extra_hash)
    310         if calc_expectations(p, h, gs_dir, exp_dir, repo_dir, extra_dir,
    311                              extra_hash):
    312           update_li.append('bench_expectations_%s.txt' % p)
    313       elif calc_expectations(p, h, gs_dir, exp_dir, repo_dir, '', ''):
    314         update_li.append('bench_expectations_%s.txt' % p)
    315   if not update_li:
    316     print 'No bench data to update after %s!' % args.githash
    317   elif not git_commit_expectations(
    318       repo_dir, exp_dir, update_li, rebase_hash, commit, extra_hash):
    319     print 'ERROR uploading expectations using git.'
    320   elif not commit:
    321     print 'CL created. Please take a look at the link above.'
    322   else:
    323     print 'New bench baselines should be in CQ now.'
    324   delete_dirs([gs_dir, exp_dir, extra_dir])
    325 
    326 
    327 if __name__ == "__main__":
    328   main()
    329