Home | History | Annotate | Download | only in server2
      1 # Copyright 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 
      6 from base64 import b64decode
      7 from itertools import izip
      8 import json
      9 import logging
     10 import posixpath
     11 import time
     12 import traceback
     13 
     14 from appengine_url_fetcher import AppEngineUrlFetcher
     15 from appengine_wrappers import IsDownloadError, app_identity
     16 from docs_server_utils import StringIdentity
     17 from file_system import (FileNotFoundError,
     18                          FileSystem,
     19                          FileSystemError,
     20                          FileSystemThrottledError,
     21                          StatInfo)
     22 from future import All, Future
     23 from path_util import AssertIsValid, IsDirectory, ToDirectory
     24 from third_party.json_schema_compiler.memoize import memoize
     25 from url_constants import (GITILES_BASE,
     26                            GITILES_SRC_ROOT,
     27                            GITILES_BRANCHES_PATH,
     28                            GITILES_OAUTH2_SCOPE)
     29 
     30 
     31 _JSON_FORMAT = '?format=JSON'
     32 _TEXT_FORMAT = '?format=TEXT'
     33 _AUTH_PATH_PREFIX = '/a'
     34 
     35 
     36 def _ParseGitilesJson(json_data):
     37   '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON
     38   data coming from Gitiles views.
     39   '''
     40   return json.loads(json_data[json_data.find('{'):])
     41 
     42 
     43 def _CreateStatInfo(json_data):
     44   '''Returns a StatInfo object comprised of the tree ID for |json_data|,
     45   as well as the tree IDs for the entries in |json_data|.
     46   '''
     47   tree = _ParseGitilesJson(json_data)
     48   return StatInfo(tree['id'],
     49                   dict((e['name'], e['id']) for e in tree['entries']))
     50 
     51 
     52 class GitilesFileSystem(FileSystem):
     53   '''Class to fetch filesystem data from the Chromium project's gitiles
     54   service.
     55   '''
     56   @staticmethod
     57   def Create(branch='master', commit=None):
     58     token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
     59     path_prefix = '' if token is None else _AUTH_PATH_PREFIX
     60     if commit:
     61       base_url = '%s%s/%s/%s' % (
     62           GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit)
     63     elif branch is 'master':
     64       base_url = '%s%s/%s/master' % (
     65           GITILES_BASE, path_prefix, GITILES_SRC_ROOT)
     66     else:
     67       base_url = '%s%s/%s/%s/%s' % (
     68           GITILES_BASE, path_prefix, GITILES_SRC_ROOT,
     69           GITILES_BRANCHES_PATH, branch)
     70     return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit)
     71 
     72   def __init__(self, fetcher, base_url, branch, commit):
     73     self._fetcher = fetcher
     74     self._base_url = base_url
     75     self._branch = branch
     76     self._commit = commit
     77 
     78   def _FetchAsync(self, url):
     79     '''Convenience wrapper for fetcher.FetchAsync, so callers don't
     80     need to use posixpath.join.
     81     '''
     82     AssertIsValid(url)
     83     access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
     84     return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url),
     85                                     access_token=access_token)
     86 
     87   def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False):
     88     '''Returns a future to cleanly resolve |fetch_future|.
     89     '''
     90     def handle(e):
     91       if skip_not_found and IsDownloadError(e):
     92         return None
     93       exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
     94       raise exc_type('%s fetching %s for Get from %s: %s' %
     95           (type(e).__name__, path, self._base_url, traceback.format_exc()))
     96 
     97     def get_content(result):
     98       if result.status_code == 404:
     99         if skip_not_found:
    100           return None
    101         raise FileNotFoundError('Got 404 when fetching %s for Get from %s' %
    102                                 (path, self._base_url))
    103       if result.status_code == 429:
    104         logging.warning('Access throttled when fetching %s for Get from %s' %
    105             (path, self._base_url))
    106         raise FileSystemThrottledError(
    107             'Access throttled when fetching %s for Get from %s' %
    108             (path, self._base_url))
    109       if result.status_code != 200:
    110         raise FileSystemError(
    111             'Got %s when fetching %s for Get from %s, content %s' %
    112             (result.status_code, path, self._base_url, result.content))
    113       return result.content
    114 
    115     return fetch_future.Then(get_content, handle)
    116 
    117   def Read(self, paths, skip_not_found=False):
    118     # Directory content is formatted in JSON in Gitiles as follows:
    119     #
    120     #   {
    121     #     "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID.
    122     #     "entries": [
    123     #       {
    124     #         "mode": 33188,
    125     #         "type": "blob",
    126     #           "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID.
    127     #         "name": ".gitignore"
    128     #       },
    129     #       ...
    130     #     ]
    131     #   }
    132     def list_dir(json_data):
    133       entries = _ParseGitilesJson(json_data).get('entries', [])
    134       return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries]
    135 
    136     def fixup_url_format(path):
    137       # By default, Gitiles URLs display resources in HTML. To get resources
    138       # suitable for our consumption, a '?format=' string must be appended to
    139       # the URL. The format may be one of 'JSON' or 'TEXT' for directory or
    140       # text resources, respectively.
    141       return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT)
    142 
    143     # A list of tuples of the form (path, Future).
    144     fetches = [(path, self._FetchAsync(fixup_url_format(path)))
    145                for path in paths]
    146 
    147     def parse_contents(results):
    148       value = {}
    149       for path, content in izip(paths, results):
    150         if content is None:
    151           continue
    152         # Gitiles encodes text content in base64 (see
    153         # http://tools.ietf.org/html/rfc4648 for info about base64).
    154         value[path] = (list_dir if IsDirectory(path) else b64decode)(content)
    155       return value
    156 
    157     return All(self._ResolveFetchContent(path, future, skip_not_found)
    158                for path, future in fetches).Then(parse_contents)
    159 
    160   def Refresh(self):
    161     return Future(value=())
    162 
    163   @memoize
    164   def _GetCommitInfo(self, key):
    165     '''Gets the commit information specified by |key|.
    166 
    167     The JSON view for commit info looks like:
    168       {
    169         "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID.
    170         "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53",   # Tree ID.
    171         "parents": [
    172           "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID.
    173         ],
    174         "author": {
    175           "name": "...",
    176           "email": "...",
    177           "time": "Tue Aug 12 17:17:21 2014"
    178         },
    179         "committer": {
    180           "name": "...",
    181           "email": "...",
    182           "time": "Tue Aug 12 17:18:28 2014"
    183         },
    184         "message": "...",
    185         "tree_diff": [...]
    186       }
    187     '''
    188     # Commit information for a branch is obtained by appending '?format=JSON'
    189     # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is
    190     # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves
    191     # the root directory JSON content, whereas the former serves the branch
    192     # commit info JSON content.
    193 
    194     access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
    195     fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT,
    196                                             access_token=access_token)
    197     content_future = self._ResolveFetchContent(self._base_url, fetch_future)
    198     return content_future.Then(lambda json: _ParseGitilesJson(json)[key])
    199 
    200   def GetCommitID(self):
    201     '''Returns a future that resolves to the commit ID for this branch.
    202     '''
    203     return self._GetCommitInfo('commit')
    204 
    205   def GetPreviousCommitID(self):
    206     '''Returns a future that resolves to the previous commit ID for this branch.
    207     '''
    208     return self._GetCommitInfo('parents').Then(lambda parents: parents[0])
    209 
    210   def StatAsync(self, path):
    211     dir_, filename = posixpath.split(path)
    212     def stat(content):
    213       stat_info = _CreateStatInfo(content)
    214       if stat_info.version is None:
    215         raise FileSystemError('Failed to find version of dir %s' % dir_)
    216       if IsDirectory(path):
    217         return stat_info
    218       if filename not in stat_info.child_versions:
    219         raise FileNotFoundError(
    220             '%s from %s was not in child versions for Stat' % (filename, path))
    221       return StatInfo(stat_info.child_versions[filename])
    222 
    223     fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT)
    224     return self._ResolveFetchContent(path, fetch_future).Then(stat)
    225 
    226   def GetIdentity(self):
    227     # NOTE: Do not use commit information to create the string identity.
    228     # Doing so will mess up caching.
    229     if self._commit is None and self._branch != 'master':
    230       str_id = '%s/%s/%s/%s' % (
    231           GITILES_BASE, GITILES_SRC_ROOT, GITILES_BRANCHES_PATH, self._branch)
    232     else:
    233       str_id = '%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT)
    234     return '@'.join((self.__class__.__name__, StringIdentity(str_id)))
    235