Home | History | Annotate | Download | only in page
      1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 import json
      5 import logging
      6 import os
      7 import re
      8 import shutil
      9 
     10 from telemetry.page import cloud_storage
     11 
     12 
     13 def _UpdateHashFile(file_path):
     14   with open(file_path + '.sha1', 'wb') as f:
     15     f.write(cloud_storage.GetHash(file_path))
     16     f.flush()
     17 
     18 
     19 class PageSetArchiveInfo(object):
     20   def __init__(self, archive_data_file_path, page_set_file_path, data):
     21     self._archive_data_file_path = archive_data_file_path
     22     self._archive_data_file_dir = os.path.dirname(archive_data_file_path)
     23     # Back pointer to the page set file.
     24     self._page_set_file_path = page_set_file_path
     25 
     26     for archive_path in data['archives']:
     27       cloud_storage.GetIfChanged(cloud_storage.DEFAULT_BUCKET, archive_path)
     28 
     29     # Map from the relative path (as it appears in the metadata file) of the
     30     # .wpr file to a list of urls it supports.
     31     self._wpr_file_to_urls = data['archives']
     32 
     33     # Map from the page url to a relative path (as it appears in the metadata
     34     # file) of the .wpr file.
     35     self._url_to_wpr_file = dict()
     36     # Find out the wpr file names for each page.
     37     for wpr_file in data['archives']:
     38       page_urls = data['archives'][wpr_file]
     39       for url in page_urls:
     40         self._url_to_wpr_file[url] = wpr_file
     41     self.temp_target_wpr_file_path = None
     42 
     43   @classmethod
     44   def FromFile(cls, file_path, page_set_file_path):
     45     cloud_storage.GetIfChanged(cloud_storage.DEFAULT_BUCKET, file_path)
     46 
     47     if os.path.exists(file_path):
     48       with open(file_path, 'r') as f:
     49         data = json.load(f)
     50         return cls(file_path, page_set_file_path, data)
     51     return cls(file_path, page_set_file_path, {'archives': {}})
     52 
     53   def WprFilePathForPage(self, page):
     54     if self.temp_target_wpr_file_path:
     55       return self.temp_target_wpr_file_path
     56     wpr_file = self._url_to_wpr_file.get(page.url, None)
     57     if wpr_file:
     58       return self._WprFileNameToPath(wpr_file)
     59     return None
     60 
     61   def AddNewTemporaryRecording(self, temp_target_wpr_file_path):
     62     self.temp_target_wpr_file_path = temp_target_wpr_file_path
     63 
     64   def AddRecordedPages(self, urls):
     65     (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
     66     for url in urls:
     67       self._SetWprFileForPage(url, target_wpr_file)
     68     shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
     69     _UpdateHashFile(target_wpr_file_path)
     70     self._WriteToFile()
     71     self._DeleteAbandonedWprFiles()
     72 
     73   def _DeleteAbandonedWprFiles(self):
     74     # Update the metadata so that the abandoned wpr files don't have empty url
     75     # arrays.
     76     abandoned_wpr_files = self._AbandonedWprFiles()
     77     for wpr_file in abandoned_wpr_files:
     78       del self._wpr_file_to_urls[wpr_file]
     79       # Don't fail if we're unable to delete some of the files.
     80       wpr_file_path = self._WprFileNameToPath(wpr_file)
     81       try:
     82         os.remove(wpr_file_path)
     83       except Exception:
     84         logging.warning('Failed to delete file: %s' % wpr_file_path)
     85 
     86   def _AbandonedWprFiles(self):
     87     abandoned_wpr_files = []
     88     for wpr_file, urls in self._wpr_file_to_urls.iteritems():
     89       if not urls:
     90         abandoned_wpr_files.append(wpr_file)
     91     return abandoned_wpr_files
     92 
     93   def _WriteToFile(self):
     94     """Writes the metadata into the file passed as constructor parameter."""
     95     metadata = dict()
     96     metadata['description'] = (
     97         'Describes the Web Page Replay archives for a page set. Don\'t edit by '
     98         'hand! Use record_wpr for updating.')
     99     # Pointer from the metadata to the page set .json file.
    100     metadata['page_set'] = os.path.relpath(self._page_set_file_path,
    101                                            self._archive_data_file_dir)
    102     metadata['archives'] = self._wpr_file_to_urls.copy()
    103     # Don't write data for abandoned archives.
    104     abandoned_wpr_files = self._AbandonedWprFiles()
    105     for wpr_file in abandoned_wpr_files:
    106       del metadata['archives'][wpr_file]
    107 
    108     with open(self._archive_data_file_path, 'w') as f:
    109       json.dump(metadata, f, indent=4)
    110       f.flush()
    111     _UpdateHashFile(self._archive_data_file_path)
    112 
    113   def _WprFileNameToPath(self, wpr_file):
    114     return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))
    115 
    116   def _NextWprFileName(self):
    117     """Creates a new file name for a wpr archive file."""
    118     # The names are of the format "some_thing_number.wpr". Read the numbers.
    119     highest_number = -1
    120     base = None
    121     for wpr_file in self._wpr_file_to_urls:
    122       match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
    123       if not match:
    124         raise Exception('Illegal wpr file name ' + wpr_file)
    125       highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
    126       if base and match.groupdict()['BASE'] != base:
    127         raise Exception('Illegal wpr file name ' + wpr_file +
    128                         ', doesn\'t begin with ' + base)
    129       base = match.groupdict()['BASE']
    130     if not base:
    131       # If we're creating a completely new info file, use the base name of the
    132       # page set file.
    133       base = os.path.splitext(os.path.basename(self._page_set_file_path))[0]
    134     new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
    135     return new_filename, self._WprFileNameToPath(new_filename)
    136 
    137   def _SetWprFileForPage(self, url, wpr_file):
    138     """For modifying the metadata when we're going to record a new archive."""
    139     old_wpr_file = self._url_to_wpr_file.get(url, None)
    140     if old_wpr_file:
    141       self._wpr_file_to_urls[old_wpr_file].remove(url)
    142     self._url_to_wpr_file[url] = wpr_file
    143     if wpr_file not in self._wpr_file_to_urls:
    144       self._wpr_file_to_urls[wpr_file] = []
    145     self._wpr_file_to_urls[wpr_file].append(url)
    146