1 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 6 from base64 import b64decode 7 from itertools import izip 8 import json 9 import logging 10 import posixpath 11 import time 12 import traceback 13 14 from appengine_url_fetcher import AppEngineUrlFetcher 15 from appengine_wrappers import IsDownloadError, app_identity 16 from docs_server_utils import StringIdentity 17 from file_system import (FileNotFoundError, 18 FileSystem, 19 FileSystemError, 20 FileSystemThrottledError, 21 StatInfo) 22 from future import All, Future 23 from path_util import AssertIsValid, IsDirectory, ToDirectory 24 from third_party.json_schema_compiler.memoize import memoize 25 from url_constants import (GITILES_BASE, 26 GITILES_SRC_ROOT, 27 GITILES_BRANCHES_PATH, 28 GITILES_OAUTH2_SCOPE) 29 30 31 _JSON_FORMAT = '?format=JSON' 32 _TEXT_FORMAT = '?format=TEXT' 33 _AUTH_PATH_PREFIX = '/a' 34 35 36 def _ParseGitilesJson(json_data): 37 '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON 38 data coming from Gitiles views. 39 ''' 40 return json.loads(json_data[json_data.find('{'):]) 41 42 43 def _CreateStatInfo(json_data): 44 '''Returns a StatInfo object comprised of the tree ID for |json_data|, 45 as well as the tree IDs for the entries in |json_data|. 46 ''' 47 tree = _ParseGitilesJson(json_data) 48 return StatInfo(tree['id'], 49 dict((e['name'], e['id']) for e in tree['entries'])) 50 51 52 class GitilesFileSystem(FileSystem): 53 '''Class to fetch filesystem data from the Chromium project's gitiles 54 service. 55 ''' 56 @staticmethod 57 def Create(branch='master', commit=None): 58 token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) 59 path_prefix = '' if token is None else _AUTH_PATH_PREFIX 60 if commit: 61 base_url = '%s%s/%s/%s' % ( 62 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit) 63 elif branch is 'master': 64 base_url = '%s%s/%s/master' % ( 65 GITILES_BASE, path_prefix, GITILES_SRC_ROOT) 66 else: 67 base_url = '%s%s/%s/%s/%s' % ( 68 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, 69 GITILES_BRANCHES_PATH, branch) 70 return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit) 71 72 def __init__(self, fetcher, base_url, branch, commit): 73 self._fetcher = fetcher 74 self._base_url = base_url 75 self._branch = branch 76 self._commit = commit 77 78 def _FetchAsync(self, url): 79 '''Convenience wrapper for fetcher.FetchAsync, so callers don't 80 need to use posixpath.join. 81 ''' 82 AssertIsValid(url) 83 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) 84 return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url), 85 access_token=access_token) 86 87 def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False): 88 '''Returns a future to cleanly resolve |fetch_future|. 89 ''' 90 def handle(e): 91 if skip_not_found and IsDownloadError(e): 92 return None 93 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError 94 raise exc_type('%s fetching %s for Get from %s: %s' % 95 (type(e).__name__, path, self._base_url, traceback.format_exc())) 96 97 def get_content(result): 98 if result.status_code == 404: 99 if skip_not_found: 100 return None 101 raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % 102 (path, self._base_url)) 103 if result.status_code == 429: 104 logging.warning('Access throttled when fetching %s for Get from %s' % 105 (path, self._base_url)) 106 raise FileSystemThrottledError( 107 'Access throttled when fetching %s for Get from %s' % 108 (path, self._base_url)) 109 if result.status_code != 200: 110 raise FileSystemError( 111 'Got %s when fetching %s for Get from %s, content %s' % 112 (result.status_code, path, self._base_url, result.content)) 113 return result.content 114 115 return fetch_future.Then(get_content, handle) 116 117 def Read(self, paths, skip_not_found=False): 118 # Directory content is formatted in JSON in Gitiles as follows: 119 # 120 # { 121 # "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID. 122 # "entries": [ 123 # { 124 # "mode": 33188, 125 # "type": "blob", 126 # "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID. 127 # "name": ".gitignore" 128 # }, 129 # ... 130 # ] 131 # } 132 def list_dir(json_data): 133 entries = _ParseGitilesJson(json_data).get('entries', []) 134 return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries] 135 136 def fixup_url_format(path): 137 # By default, Gitiles URLs display resources in HTML. To get resources 138 # suitable for our consumption, a '?format=' string must be appended to 139 # the URL. The format may be one of 'JSON' or 'TEXT' for directory or 140 # text resources, respectively. 141 return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT) 142 143 # A list of tuples of the form (path, Future). 144 fetches = [(path, self._FetchAsync(fixup_url_format(path))) 145 for path in paths] 146 147 def parse_contents(results): 148 value = {} 149 for path, content in izip(paths, results): 150 if content is None: 151 continue 152 # Gitiles encodes text content in base64 (see 153 # http://tools.ietf.org/html/rfc4648 for info about base64). 154 value[path] = (list_dir if IsDirectory(path) else b64decode)(content) 155 return value 156 157 return All(self._ResolveFetchContent(path, future, skip_not_found) 158 for path, future in fetches).Then(parse_contents) 159 160 def Refresh(self): 161 return Future(value=()) 162 163 @memoize 164 def _GetCommitInfo(self, key): 165 '''Gets the commit information specified by |key|. 166 167 The JSON view for commit info looks like: 168 { 169 "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID. 170 "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53", # Tree ID. 171 "parents": [ 172 "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID. 173 ], 174 "author": { 175 "name": "...", 176 "email": "...", 177 "time": "Tue Aug 12 17:17:21 2014" 178 }, 179 "committer": { 180 "name": "...", 181 "email": "...", 182 "time": "Tue Aug 12 17:18:28 2014" 183 }, 184 "message": "...", 185 "tree_diff": [...] 186 } 187 ''' 188 # Commit information for a branch is obtained by appending '?format=JSON' 189 # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is 190 # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves 191 # the root directory JSON content, whereas the former serves the branch 192 # commit info JSON content. 193 194 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE) 195 fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT, 196 access_token=access_token) 197 content_future = self._ResolveFetchContent(self._base_url, fetch_future) 198 return content_future.Then(lambda json: _ParseGitilesJson(json)[key]) 199 200 def GetCommitID(self): 201 '''Returns a future that resolves to the commit ID for this branch. 202 ''' 203 return self._GetCommitInfo('commit') 204 205 def GetPreviousCommitID(self): 206 '''Returns a future that resolves to the previous commit ID for this branch. 207 ''' 208 return self._GetCommitInfo('parents').Then(lambda parents: parents[0]) 209 210 def StatAsync(self, path): 211 dir_, filename = posixpath.split(path) 212 def stat(content): 213 stat_info = _CreateStatInfo(content) 214 if stat_info.version is None: 215 raise FileSystemError('Failed to find version of dir %s' % dir_) 216 if IsDirectory(path): 217 return stat_info 218 if filename not in stat_info.child_versions: 219 raise FileNotFoundError( 220 '%s from %s was not in child versions for Stat' % (filename, path)) 221 return StatInfo(stat_info.child_versions[filename]) 222 223 fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT) 224 return self._ResolveFetchContent(path, fetch_future).Then(stat) 225 226 def GetIdentity(self): 227 # NOTE: Do not use commit information to create the string identity. 228 # Doing so will mess up caching. 229 if self._commit is None and self._branch != 'master': 230 str_id = '%s/%s/%s/%s' % ( 231 GITILES_BASE, GITILES_SRC_ROOT, GITILES_BRANCHES_PATH, self._branch) 232 else: 233 str_id = '%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT) 234 return '@'.join((self.__class__.__name__, StringIdentity(str_id))) 235