Home | History | Annotate | Download | only in server2
      1 # Copyright 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 from third_party.cloudstorage import cloudstorage_api
      6 from third_party.cloudstorage import common
      7 from third_party.cloudstorage import errors
      8 
      9 from docs_server_utils import StringIdentity
     10 from file_system import FileSystem, FileNotFoundError, StatInfo
     11 from future import Future
     12 from path_util import (
     13     AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)
     14 
     15 import logging
     16 import traceback
     17 
     18 
     19 # See gcs_file_system_provider.py for documentation on using Google Cloud
     20 # Storage as a filesystem.
     21 #
     22 # Note that the path requirements for GCS are different for the docserver;
     23 # GCS requires that paths start with a /, we require that they don't.
     24 
     25 
     26 # Name of the file containing the Git hash of the latest commit sync'ed
     27 # to Cloud Storage. This file is generated by the Github->GCS sync script
     28 LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'
     29 
     30 def _ReadFile(filename):
     31   AssertIsFile(filename)
     32   try:
     33     with cloudstorage_api.open('/' + filename, 'r') as f:
     34       return f.read()
     35   except errors.Error:
     36     raise FileNotFoundError('Read failed for %s: %s' % (filename,
     37         traceback.format_exc()))
     38 
     39 def _ListDir(dir_name, recursive=False):
     40   AssertIsDirectory(dir_name)
     41   try:
     42     # The listbucket method uses a prefix approach to simulate hierarchy.
     43     # Calling it with the "delimiter" argument set to '/' gets only files
     44     # directly inside the directory, not all recursive content.
     45     delimiter = None if recursive else '/'
     46     files = cloudstorage_api.listbucket('/' + dir_name, delimiter=delimiter)
     47     return [os_path.filename.lstrip('/')[len(dir_name):] for os_path in files]
     48   except errors.Error:
     49     raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' %
     50                             (dir_name, traceback.format_exc()))
     51 
     52 def _CreateStatInfo(bucket, path):
     53   full_path = Join(bucket, path)
     54   last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME)
     55   try:
     56     last_commit = _ReadFile(last_commit_file)
     57     if IsDirectory(full_path):
     58       child_versions = dict((filename, last_commit) 
     59                             for filename in _ListDir(full_path)) 
     60     else:
     61       child_versions = None
     62     return StatInfo(last_commit, child_versions)
     63   except (TypeError, errors.Error):
     64     raise FileNotFoundError('cloudstorage.stat failed for %s: %s' % (path,
     65                             traceback.format_exc()))
     66 
     67 class CloudStorageFileSystem(FileSystem):
     68   '''FileSystem implementation which fetches resources from Google Cloud
     69   Storage.
     70   '''
     71   def __init__(self, bucket, debug_access_token=None, debug_bucket_prefix=None):
     72     self._bucket = bucket
     73     if debug_access_token:
     74       logging.debug('gcs: using debug access token: %s' % debug_access_token)
     75       common.set_access_token(debug_access_token)
     76     if debug_bucket_prefix:
     77       logging.debug('gcs: prefixing all bucket names with %s' %
     78                     debug_bucket_prefix)
     79       self._bucket = debug_bucket_prefix + self._bucket
     80     AssertIsValid(self._bucket)
     81 
     82   def Read(self, paths, skip_not_found=False):
     83     def resolve():
     84       try:
     85         result = {}
     86         for path in paths:
     87           full_path = Join(self._bucket, path)
     88           logging.debug('gcs: requested path "%s", reading "%s"' %
     89                         (path, full_path))
     90           if IsDirectory(path):
     91             result[path] = _ListDir(full_path)
     92           else:
     93             result[path] = _ReadFile(full_path)
     94         return result
     95       except errors.AuthorizationError:
     96         self._warnAboutAuthError()
     97         raise
     98 
     99     return Future(callback=resolve)
    100 
    101   def Refresh(self):
    102     return Future(value=())
    103 
    104   def Stat(self, path):
    105     AssertIsValid(path)
    106     try:
    107       return _CreateStatInfo(self._bucket, path)
    108     except errors.AuthorizationError:
    109       self._warnAboutAuthError()
    110       raise
    111 
    112   def GetIdentity(self):
    113     return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))
    114 
    115   def __repr__(self):
    116     return 'CloudStorageFileSystem(%s)' % self._bucket
    117 
    118   def _warnAboutAuthError(self):
    119     logging.warn(('Authentication error on Cloud Storage. Check if your'
    120                   ' appengine project has permissions to Read the GCS'
    121                   ' buckets. If you are running a local appengine server,'
    122                   ' you need to set an access_token in'
    123                   ' local_debug/gcs_debug.conf.'
    124                   ' Remember that this token expires in less than 10'
    125                   ' minutes, so keep it updated. See'
    126                   ' gcs_file_system_provider.py for instructions.'));
    127     logging.debug(traceback.format_exc())
    128