Home | History | Annotate | Download | only in server2
      1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import json
      6 import logging
      7 from StringIO import StringIO
      8 import posixpath
      9 
     10 from appengine_blobstore import AppEngineBlobstore, BLOBSTORE_GITHUB
     11 from appengine_url_fetcher import AppEngineUrlFetcher
     12 from appengine_wrappers import urlfetch, blobstore
     13 from docs_server_utils import StringIdentity
     14 from file_system import FileSystem, StatInfo
     15 from future import Future
     16 from path_util import IsDirectory
     17 import url_constants
     18 from zipfile import ZipFile, BadZipfile
     19 
     20 ZIP_KEY = 'zipball'
     21 USERNAME = None
     22 PASSWORD = None
     23 
     24 
     25 def _MakeBlobstoreKey(version):
     26   return ZIP_KEY + '.' + str(version)
     27 
     28 
     29 def _GetAsyncFetchCallback(fetcher,
     30                            username,
     31                            password,
     32                            blobstore,
     33                            key_to_set,
     34                            key_to_delete=None):
     35   fetch = fetcher.FetchAsync(ZIP_KEY, username=username, password=password)
     36 
     37   def resolve():
     38     try:
     39       result = fetch.Get()
     40       # Check if Github authentication failed.
     41       if result.status_code == 401:
     42         logging.error('Github authentication failed for %s, falling back to '
     43                       'unauthenticated.' % USERNAME)
     44         blob = fetcher.Fetch(ZIP_KEY).content
     45       else:
     46         blob = result.content
     47     except urlfetch.DownloadError as e:
     48       logging.error('Bad github zip file: %s' % e)
     49       return None
     50     if key_to_delete is not None:
     51       blobstore.Delete(_MakeBlobstoreKey(key_to_delete, BLOBSTORE_GITHUB))
     52     try:
     53       return_zip = ZipFile(StringIO(blob))
     54     except BadZipfile as e:
     55       logging.error('Bad github zip file: %s' % e)
     56       return None
     57 
     58     blobstore.Set(_MakeBlobstoreKey(key_to_set), blob, BLOBSTORE_GITHUB)
     59     return return_zip
     60 
     61   return resolve
     62 
     63 
     64 class GithubFileSystem(FileSystem):
     65   @staticmethod
     66   def CreateChromeAppsSamples(object_store_creator):
     67     return GithubFileSystem(
     68         '%s/GoogleChrome/chrome-app-samples' % url_constants.GITHUB_REPOS,
     69         AppEngineBlobstore(),
     70         object_store_creator)
     71 
     72   def __init__(self, url, blobstore, object_store_creator):
     73     # If we key the password store on the app version then the whole advantage
     74     # of having it in the first place is greatly lessened (likewise it should
     75     # always start populated).
     76     password_store = object_store_creator.Create(
     77         GithubFileSystem,
     78         app_version=None,
     79         category='password',
     80         start_empty=False)
     81     if USERNAME is None:
     82       password_data = password_store.GetMulti(('username', 'password')).Get()
     83       self._username, self._password = (password_data.get('username'),
     84                                         password_data.get('password'))
     85     else:
     86       password_store.SetMulti({'username': USERNAME, 'password': PASSWORD})
     87       self._username, self._password = (USERNAME, PASSWORD)
     88 
     89     self._url = url
     90     self._fetcher = AppEngineUrlFetcher(url)
     91     self._blobstore = blobstore
     92     self._stat_object_store = object_store_creator.Create(GithubFileSystem)
     93     self._version = None
     94     self._GetZip(self.Stat(ZIP_KEY).version)
     95 
     96   def _GetZip(self, version):
     97     try:
     98       blob = self._blobstore.Get(_MakeBlobstoreKey(version), BLOBSTORE_GITHUB)
     99     except blobstore.BlobNotFoundError:
    100       self._zip_file = Future(value=None)
    101       return
    102     if blob is not None:
    103       try:
    104         self._zip_file = Future(value=ZipFile(StringIO(blob)))
    105       except BadZipfile as e:
    106         self._blobstore.Delete(_MakeBlobstoreKey(version), BLOBSTORE_GITHUB)
    107         logging.error('Bad github zip file: %s' % e)
    108         self._zip_file = Future(value=None)
    109     else:
    110       self._zip_file = Future(
    111           callback=_GetAsyncFetchCallback(self._fetcher,
    112                                           self._username,
    113                                           self._password,
    114                                           self._blobstore,
    115                                           version,
    116                                           key_to_delete=self._version))
    117     self._version = version
    118 
    119   def _ReadFile(self, path):
    120     try:
    121       zip_file = self._zip_file.Get()
    122     except Exception as e:
    123       logging.error('Github ReadFile error: %s' % e)
    124       return ''
    125     if zip_file is None:
    126       logging.error('Bad github zip file.')
    127       return ''
    128     prefix = zip_file.namelist()[0]
    129     return zip_file.read(prefix + path)
    130 
    131   def _ListDir(self, path):
    132     try:
    133       zip_file = self._zip_file.Get()
    134     except Exception as e:
    135       logging.error('Github ListDir error: %s' % e)
    136       return []
    137     if zip_file is None:
    138       logging.error('Bad github zip file.')
    139       return []
    140     filenames = zip_file.namelist()
    141     # Take out parent directory name (GoogleChrome-chrome-app-samples-c78a30f)
    142     filenames = [f[len(filenames[0]):] for f in filenames]
    143     # Remove the path of the directory we're listing from the filenames.
    144     filenames = [f[len(path):] for f in filenames
    145                  if f != path and f.startswith(path)]
    146     # Remove all files not directly in this directory.
    147     return [f for f in filenames if f[:-1].count('/') == 0]
    148 
    149   def Read(self, paths, skip_not_found=False):
    150     version = self.Stat(ZIP_KEY).version
    151     if version != self._version:
    152       self._GetZip(version)
    153     result = {}
    154     for path in paths:
    155       if IsDirectory(path):
    156         result[path] = self._ListDir(path)
    157       else:
    158         result[path] = self._ReadFile(path)
    159     return Future(value=result)
    160 
    161   def _DefaultStat(self, path):
    162     version = 0
    163     # TODO(kalman): we should replace all of this by wrapping the
    164     # GithubFileSystem in a CachingFileSystem. A lot of work has been put into
    165     # CFS to be robust, and GFS is missing out.
    166     # For example: the following line is wrong, but it could be moot.
    167     self._stat_object_store.Set(path, version)
    168     return StatInfo(version)
    169 
    170   def Stat(self, path):
    171     version = self._stat_object_store.Get(path).Get()
    172     if version is not None:
    173       return StatInfo(version)
    174     try:
    175       result = self._fetcher.Fetch('commits/HEAD',
    176                                    username=USERNAME,
    177                                    password=PASSWORD)
    178     except urlfetch.DownloadError as e:
    179       logging.warning('GithubFileSystem Stat: %s' % e)
    180       return self._DefaultStat(path)
    181 
    182     # Check if Github authentication failed.
    183     if result.status_code == 401:
    184       logging.warning('Github authentication failed for %s, falling back to '
    185                       'unauthenticated.' % USERNAME)
    186       try:
    187         result = self._fetcher.Fetch('commits/HEAD')
    188       except urlfetch.DownloadError as e:
    189         logging.warning('GithubFileSystem Stat: %s' % e)
    190         return self._DefaultStat(path)
    191 
    192     # Parse response JSON - but sometimes github gives us invalid JSON.
    193     try:
    194       version = json.loads(result.content)['sha']
    195       self._stat_object_store.Set(path, version)
    196       return StatInfo(version)
    197     except StandardError as e:
    198       logging.warning(
    199           ('%s: got invalid or unexpected JSON from github. Response status ' +
    200            'was %s, content %s') % (e, result.status_code, result.content))
    201       return self._DefaultStat(path)
    202 
    203   def GetIdentity(self):
    204     return '%s@%s' % (self.__class__.__name__, StringIdentity(self._url))
    205