Home | History | Annotate | Download | only in page
      1 # Copyright 2012 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import csv
      6 import inspect
      7 import os
      8 
      9 from telemetry.page import page as page_module
     10 from telemetry.page import page_set_archive_info
     11 from telemetry.util import cloud_storage
     12 
     13 PUBLIC_BUCKET = cloud_storage.PUBLIC_BUCKET
     14 PARTNER_BUCKET = cloud_storage.PARTNER_BUCKET
     15 INTERNAL_BUCKET = cloud_storage.INTERNAL_BUCKET
     16 
     17 
     18 class PageSetError(Exception):
     19   pass
     20 
     21 
     22 class PageSet(object):
     23   def __init__(self, file_path=None, archive_data_file='',
     24                credentials_path=None, user_agent_type=None,
     25                make_javascript_deterministic=True, startup_url='',
     26                serving_dirs=None, bucket=None):
     27     # The default value of file_path is location of the file that define this
     28     # page set instance's class.
     29     if file_path is None:
     30       file_path = inspect.getfile(self.__class__)
     31       # Turn pyc file into py files if we can
     32       if file_path.endswith('.pyc') and os.path.exists(file_path[:-1]):
     33         file_path = file_path[:-1]
     34 
     35     self.file_path = file_path
     36     # These attributes can be set dynamically by the page set.
     37     self.archive_data_file = archive_data_file
     38     self.credentials_path = credentials_path
     39     self.user_agent_type = user_agent_type
     40     self.make_javascript_deterministic = make_javascript_deterministic
     41     self._wpr_archive_info = None
     42     self.startup_url = startup_url
     43     self.pages = []
     44     self.serving_dirs = set()
     45     serving_dirs = [] if serving_dirs is None else serving_dirs
     46     # Makes sure that page_set's serving_dirs are absolute paths
     47     for sd in serving_dirs:
     48       if os.path.isabs(sd):
     49         self.serving_dirs.add(os.path.realpath(sd))
     50       else:
     51         self.serving_dirs.add(os.path.realpath(os.path.join(self.base_dir, sd)))
     52     if self._IsValidPrivacyBucket(bucket):
     53       self._bucket = bucket
     54     else:
     55       raise ValueError("Pageset privacy bucket %s is invalid" % bucket)
     56 
     57   @classmethod
     58   def Name(cls):
     59     return cls.__module__.split('.')[-1]
     60 
     61   @classmethod
     62   def Description(cls):
     63     if cls.__doc__:
     64       return cls.__doc__.splitlines()[0]
     65     else:
     66       return ''
     67 
     68   def AddPage(self, page):
     69     assert page.page_set is self
     70     self.pages.append(page)
     71 
     72   def AddPageWithDefaultRunNavigate(self, page_url):
     73     """ Add a simple page with url equals to page_url that contains only default
     74     RunNavigateSteps.
     75     """
     76     self.AddPage(page_module.Page(
     77       page_url, self, self.base_dir))
     78 
     79   @staticmethod
     80   def _IsValidPrivacyBucket(bucket_name):
     81     if not bucket_name:
     82       return True
     83     if (bucket_name in [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]):
     84       return True
     85     return False
     86 
     87   @property
     88   def base_dir(self):
     89     if os.path.isfile(self.file_path):
     90       return os.path.dirname(self.file_path)
     91     else:
     92       return self.file_path
     93 
     94   @property
     95   def wpr_archive_info(self):  # pylint: disable=E0202
     96     """Lazily constructs wpr_archive_info if it's not set and returns it."""
     97     if self.archive_data_file and not self._wpr_archive_info:
     98       self._wpr_archive_info = (
     99           page_set_archive_info.PageSetArchiveInfo.FromFile(
    100             os.path.join(self.base_dir, self.archive_data_file)))
    101     return self._wpr_archive_info
    102 
    103   @property
    104   def bucket(self):
    105     return self._bucket
    106 
    107   @wpr_archive_info.setter
    108   def wpr_archive_info(self, value):  # pylint: disable=E0202
    109     self._wpr_archive_info = value
    110 
    111   def ContainsOnlyFileURLs(self):
    112     for page in self.pages:
    113       if not page.is_file:
    114         return False
    115     return True
    116 
    117   def ReorderPageSet(self, results_file):
    118     """Reorders this page set based on the results of a past run."""
    119     page_set_dict = {}
    120     for page in self.pages:
    121       page_set_dict[page.url] = page
    122 
    123     pages = []
    124     with open(results_file, 'rb') as csv_file:
    125       csv_reader = csv.reader(csv_file)
    126       csv_header = csv_reader.next()
    127 
    128       if 'url' not in csv_header:
    129         raise Exception('Unusable results_file.')
    130 
    131       url_index = csv_header.index('url')
    132 
    133       for csv_row in csv_reader:
    134         if csv_row[url_index] in page_set_dict:
    135           self.AddPage(page_set_dict[csv_row[url_index]])
    136         else:
    137           raise Exception('Unusable results_file.')
    138 
    139     return pages
    140 
    141   def WprFilePathForPage(self, page):
    142     if not self.wpr_archive_info:
    143       return None
    144     return self.wpr_archive_info.WprFilePathForPage(page)
    145 
    146   def __iter__(self):
    147     return self.pages.__iter__()
    148 
    149   def __len__(self):
    150     return len(self.pages)
    151 
    152   def __getitem__(self, key):
    153     return self.pages[key]
    154 
    155   def __setitem__(self, key, value):
    156     self.pages[key] = value
    157