Home | History | Annotate | Download | only in wpt_downloader
      1 #!/usr/bin/python
      2 # Copyright 2016 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 import argparse
      7 import json
      8 import os
      9 import urllib
     10 import urlparse
     11 import zlib
     12 
     13 
     14 WPT_TEST_URL = 'http://www.webpagetest.org/jsonResult.php?test={wpt_job}'
     15 ERROR_MISSING_WPT_JOBS = """WPT Job Ids not specified!
     16 
     17 Use --wpt_jobs to specify a list of comma separated ids.
     18 
     19 ie. python download_from_wpt.py --wpt_jobs 1,2,3 output_dir/
     20 """
     21 
     22 def WriteMetadataAndTraceToFile(
     23     output_path, file_name, metadata, trace_contents):
     24   file_name = os.path.join(output_path, file_name)
     25 
     26   with open(os.path.join(output_path, file_name), 'wb') as f:
     27     f.write(trace_contents)
     28 
     29   with open(os.path.join(output_path, '%s.meta' % file_name), 'w') as f:
     30     json.dump(metadata, f)
     31 
     32 
     33 def DownloadFromWPT(wpt_job, output_path):
     34   url = WPT_TEST_URL.format(wpt_job=wpt_job)
     35   job_response = urllib.urlopen(url)
     36   job_data = json.load(job_response)
     37 
     38   blacklist = ['runs', 'median', 'average', 'standardDeviation']
     39   metadata = dict(
     40       [(k, v) for k, v in job_data['data'].iteritems() if not k in blacklist])
     41 
     42   for k,v in job_data['data']['runs'].iteritems():
     43     for a,b in v.iteritems():
     44       if not 'trace' in b['rawData']:
     45         continue
     46       trace_url = b['rawData']['trace']
     47       parsed_url = urlparse.urlparse(trace_url)
     48       query = urlparse.parse_qsl(parsed_url.query)
     49       file_name = '%s_%s' % (wpt_job, query[2][1])
     50       print 'Downloading %s to %s ...' % (trace_url, file_name)
     51 
     52       file_response = urllib.urlopen(trace_url)
     53 
     54       WriteMetadataAndTraceToFile(
     55           output_path, file_name, metadata, file_response.read())
     56 
     57 
     58 def Main():
     59   parser = argparse.ArgumentParser(description='Process traces')
     60   parser.add_argument('output_path', help='Output path')
     61   parser.add_argument('--wpt_jobs', help='WebPageTest Job IDs, comma separated')
     62   args = parser.parse_args()
     63 
     64   output_path = os.path.abspath(args.output_path)
     65 
     66   if not args.wpt_jobs:
     67     parser.exit(1, ERROR_MISSING_WPT_JOBS)
     68 
     69   wpt_jobs =  args.wpt_jobs.split(',')
     70 
     71   for wpt_job in wpt_jobs:
     72     DownloadFromWPT(wpt_job, output_path)
     73