Home | History | Annotate | Download | only in server2
      1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import posixpath
      6 import traceback
      7 import xml.dom.minidom as xml
      8 from xml.parsers.expat import ExpatError
      9 
     10 from appengine_url_fetcher import AppEngineUrlFetcher
     11 from appengine_wrappers import IsDownloadError
     12 from docs_server_utils import StringIdentity
     13 from file_system import (
     14     FileNotFoundError, FileSystem, FileSystemError, StatInfo)
     15 from future import Future
     16 import url_constants
     17 
     18 
     19 def _ParseHTML(html):
     20   '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
     21   of all mismatched tags.
     22   '''
     23   try:
     24     return xml.parseString(html)
     25   except ExpatError as e:
     26     return _ParseHTML('\n'.join(
     27         line for (i, line) in enumerate(html.split('\n'))
     28         if e.lineno != i + 1))
     29 
     30 def _InnerText(node):
     31   '''Like node.innerText in JS DOM, but strips surrounding whitespace.
     32   '''
     33   text = []
     34   if node.nodeValue:
     35     text.append(node.nodeValue)
     36   if hasattr(node, 'childNodes'):
     37     for child_node in node.childNodes:
     38       text.append(_InnerText(child_node))
     39   return ''.join(text).strip()
     40 
     41 def _CreateStatInfo(html):
     42   parent_version = None
     43   child_versions = {}
     44 
     45   # Try all of the tables until we find the ones that contain the data (the
     46   # directory and file versions are in different tables).
     47   for table in _ParseHTML(html).getElementsByTagName('table'):
     48     # Within the table there is a list of files. However, there may be some
     49     # things beforehand; a header, "parent directory" list, etc. We will deal
     50     # with that below by being generous and just ignoring such rows.
     51     rows = table.getElementsByTagName('tr')
     52 
     53     for row in rows:
     54       cells = row.getElementsByTagName('td')
     55 
     56       # The version of the directory will eventually appear in the soup of
     57       # table rows, like this:
     58       #
     59       # <tr>
     60       #   <td>Directory revision:</td>
     61       #   <td><a href=... title="Revision 214692">214692</a> (of...)</td>
     62       # </tr>
     63       #
     64       # So look out for that.
     65       if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
     66         links = cells[1].getElementsByTagName('a')
     67         if len(links) != 2:
     68           raise FileSystemError('ViewVC assumption invalid: directory ' +
     69                                 'revision content did not have 2 <a> ' +
     70                                 ' elements, instead %s' % _InnerText(cells[1]))
     71         this_parent_version = _InnerText(links[0])
     72         int(this_parent_version)  # sanity check
     73         if parent_version is not None:
     74           raise FileSystemError('There was already a parent version %s, and ' +
     75                                 ' we just found a second at %s' %
     76                                 (parent_version, this_parent_version))
     77         parent_version = this_parent_version
     78 
     79       # The version of each file is a list of rows with 5 cells: name, version,
     80       # age, author, and last log entry. Maybe the columns will change; we're
     81       # at the mercy viewvc, but this constant can be easily updated.
     82       if len(cells) != 5:
     83         continue
     84       name_element, version_element, _, __, ___ = cells
     85 
     86       name = _InnerText(name_element)  # note: will end in / for directories
     87       try:
     88         version = int(_InnerText(version_element))
     89       except StandardError:
     90         continue
     91       child_versions[name] = str(version)
     92 
     93     if parent_version and child_versions:
     94       break
     95 
     96   return StatInfo(parent_version, child_versions)
     97 
     98 
     99 class SubversionFileSystem(FileSystem):
    100   '''Class to fetch resources from src.chromium.org.
    101   '''
    102   @staticmethod
    103   def Create(branch='trunk', revision=None):
    104     if branch == 'trunk':
    105       svn_path = 'trunk/src'
    106     else:
    107       svn_path = 'branches/%s/src' % branch
    108     return SubversionFileSystem(
    109         AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
    110         AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
    111         svn_path,
    112         revision=revision)
    113 
    114   def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
    115     self._file_fetcher = file_fetcher
    116     self._stat_fetcher = stat_fetcher
    117     self._svn_path = svn_path
    118     self._revision = revision
    119 
    120   def Read(self, paths, skip_not_found=False):
    121     args = None
    122     if self._revision is not None:
    123       # |fetcher| gets from svn.chromium.org which uses p= for version.
    124       args = 'p=%s' % self._revision
    125 
    126     def apply_args(path):
    127       return path if args is None else '%s?%s' % (path, args)
    128 
    129     def list_dir(directory):
    130       dom = xml.parseString(directory)
    131       files = [elem.childNodes[0].data
    132                for elem in dom.getElementsByTagName('a')]
    133       if '..' in files:
    134         files.remove('..')
    135       return files
    136 
    137     # A list of tuples of the form (path, Future).
    138     fetches = [(path, self._file_fetcher.FetchAsync(apply_args(path)))
    139                for path in paths]
    140 
    141     def resolve():
    142       value = {}
    143       for path, future in fetches:
    144         try:
    145           result = future.Get()
    146         except Exception as e:
    147           if skip_not_found and IsDownloadError(e): continue
    148           exc_type = (FileNotFoundError if IsDownloadError(e)
    149                                        else FileSystemError)
    150           raise exc_type('%s fetching %s for Get: %s' %
    151                          (type(e).__name__, path, traceback.format_exc()))
    152         if result.status_code == 404:
    153           if skip_not_found: continue
    154           raise FileNotFoundError(
    155               'Got 404 when fetching %s for Get, content %s' %
    156               (path, result.content))
    157         if result.status_code != 200:
    158           raise FileSystemError('Got %s when fetching %s for Get, content %s' %
    159               (result.status_code, path, result.content))
    160         if path.endswith('/'):
    161           value[path] = list_dir(result.content)
    162         else:
    163           value[path] = result.content
    164       return value
    165     return Future(callback=resolve)
    166 
    167   def Refresh(self):
    168     return Future(value=())
    169 
    170   def StatAsync(self, path):
    171     directory, filename = posixpath.split(path)
    172     if self._revision is not None:
    173       # |stat_fetch| uses viewvc which uses pathrev= for version.
    174       directory += '?pathrev=%s' % self._revision
    175 
    176     result_future = self._stat_fetcher.FetchAsync(directory)
    177     def resolve():
    178       try:
    179         result = result_future.Get()
    180       except Exception as e:
    181         exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
    182         raise exc_type('%s fetching %s for Stat: %s' %
    183                        (type(e).__name__, path, traceback.format_exc()))
    184 
    185       if result.status_code == 404:
    186         raise FileNotFoundError('Got 404 when fetching %s for Stat, '
    187                                 'content %s' % (path, result.content))
    188       if result.status_code != 200:
    189         raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
    190                                 (result.status_code, path, result.content))
    191 
    192       stat_info = _CreateStatInfo(result.content)
    193       if stat_info.version is None:
    194         raise FileSystemError('Failed to find version of dir %s' % directory)
    195       if path == '' or path.endswith('/'):
    196         return stat_info
    197       if filename not in stat_info.child_versions:
    198         raise FileNotFoundError(
    199             '%s from %s was not in child versions for Stat' % (filename, path))
    200       return StatInfo(stat_info.child_versions[filename])
    201 
    202     return Future(callback=resolve)
    203 
    204   def GetIdentity(self):
    205     # NOTE: no revision here, since it would mess up the caching of reads. It
    206     # probably doesn't matter since all the caching classes will use the result
    207     # of Stat to decide whether to re-read - and Stat has a ceiling of the
    208     # revision - so when the revision changes, so might Stat. That is enough.
    209     return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))
    210