1 #!/usr/bin/env python 2 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 import json 7 from copy import deepcopy 8 from cStringIO import StringIO 9 from functools import partial 10 from hashlib import sha1 11 from random import random 12 import unittest 13 from zipfile import ZipFile 14 15 from caching_file_system import CachingFileSystem 16 from file_system import FileNotFoundError, StatInfo 17 from fake_url_fetcher import FakeURLFSFetcher, MockURLFetcher 18 from local_file_system import LocalFileSystem 19 from new_github_file_system import GithubFileSystem 20 from object_store_creator import ObjectStoreCreator 21 from test_file_system import TestFileSystem 22 23 24 class _TestBundle(object): 25 '''Bundles test file data with a GithubFileSystem and test utilites. Create 26 GithubFileSystems via |CreateGfs()|, the Fetcher it uses as |fetcher|, 27 randomly mutate its contents via |Mutate()|, and access the underlying zip 28 data via |files|. 29 ''' 30 31 def __init__(self): 32 self.files = { 33 'zipfile/': '', 34 'zipfile/hello.txt': 'world', 35 'zipfile/readme': 'test zip', 36 'zipfile/dir/file1': 'contents', 37 'zipfile/dir/file2': 'more contents' 38 } 39 self._test_files = { 40 'test_owner': { 41 'changing-repo': { 42 'commits': { 43 'HEAD': self._MakeShaJson(self._GenerateHash()) 44 }, 45 'zipball': self._ZipFromFiles(self.files) 46 } 47 } 48 } 49 50 51 def CreateGfsAndFetcher(self): 52 fetchers = [] 53 def create_mock_url_fetcher(base_path): 54 assert not fetchers 55 fetchers.append(MockURLFetcher( 56 FakeURLFSFetcher(TestFileSystem(self._test_files), base_path))) 57 return fetchers[-1] 58 59 # Constructing |gfs| will create a fetcher. 60 gfs = GithubFileSystem.ForTest( 61 'changing-repo', create_mock_url_fetcher, path='') 62 assert len(fetchers) == 1 63 return gfs, fetchers[0] 64 65 def Mutate(self): 66 fake_version = self._GenerateHash() 67 fake_data = self._GenerateHash() 68 self.files['zipfile/hello.txt'] = fake_data 69 self.files['zipfile/new-file'] = fake_data 70 self.files['zipfile/dir/file1'] = fake_data 71 self._test_files['test_owner']['changing-repo']['zipball'] = ( 72 self._ZipFromFiles(self.files)) 73 self._test_files['test_owner']['changing-repo']['commits']['HEAD'] = ( 74 self._MakeShaJson(fake_version)) 75 return fake_version, fake_data 76 77 def _GenerateHash(self): 78 '''Generates an arbitrary SHA1 hash. 79 ''' 80 return sha1(str(random())).hexdigest() 81 82 def _MakeShaJson(self, hash_value): 83 commit_json = json.loads(deepcopy(LocalFileSystem('').ReadSingle( 84 'test_data/github_file_system/test_owner/repo/commits/HEAD').Get())) 85 commit_json['sha'] = hash_value 86 return json.dumps(commit_json) 87 88 def _ZipFromFiles(self, file_dict): 89 string = StringIO() 90 zipfile = ZipFile(string, 'w') 91 for filename, contents in file_dict.iteritems(): 92 zipfile.writestr(filename, contents) 93 zipfile.close() 94 return string.getvalue() 95 96 97 class TestGithubFileSystem(unittest.TestCase): 98 def setUp(self): 99 self._gfs = GithubFileSystem.ForTest( 100 'repo', partial(FakeURLFSFetcher, LocalFileSystem(''))) 101 # Start and finish the repository load. 102 self._cgfs = CachingFileSystem(self._gfs, ObjectStoreCreator.ForTest()) 103 104 def testReadDirectory(self): 105 self._gfs.Refresh().Get() 106 self.assertEqual( 107 sorted(['requirements.txt', '.gitignore', 'README.md', 'src/']), 108 sorted(self._gfs.ReadSingle('').Get())) 109 self.assertEqual( 110 sorted(['__init__.notpy', 'hello.notpy']), 111 sorted(self._gfs.ReadSingle('src/').Get())) 112 113 def testReadFile(self): 114 self._gfs.Refresh().Get() 115 expected = ( 116 '# Compiled Python files\n' 117 '*.pyc\n' 118 ) 119 self.assertEqual(expected, self._gfs.ReadSingle('.gitignore').Get()) 120 121 def testMultipleReads(self): 122 self._gfs.Refresh().Get() 123 self.assertEqual( 124 self._gfs.ReadSingle('requirements.txt').Get(), 125 self._gfs.ReadSingle('requirements.txt').Get()) 126 127 def testReads(self): 128 self._gfs.Refresh().Get() 129 expected = { 130 'src/': sorted(['hello.notpy', '__init__.notpy']), 131 '': sorted(['requirements.txt', '.gitignore', 'README.md', 'src/']) 132 } 133 134 read = self._gfs.Read(['', 'src/']).Get() 135 self.assertEqual(expected['src/'], sorted(read['src/'])) 136 self.assertEqual(expected[''], sorted(read[''])) 137 138 def testStat(self): 139 # This is the hash value from the zip on disk. 140 real_hash = 'c36fc23688a9ec9e264d3182905dc0151bfff7d7' 141 142 self._gfs.Refresh().Get() 143 dir_stat = StatInfo(real_hash, { 144 'hello.notpy': StatInfo(real_hash), 145 '__init__.notpy': StatInfo(real_hash) 146 }) 147 148 self.assertEqual(StatInfo(real_hash), self._gfs.Stat('README.md')) 149 self.assertEqual(StatInfo(real_hash), self._gfs.Stat('src/hello.notpy')) 150 self.assertEqual(dir_stat, self._gfs.Stat('src/')) 151 152 def testBadReads(self): 153 self._gfs.Refresh().Get() 154 self.assertRaises(FileNotFoundError, self._gfs.Stat, 'DONT_README.md') 155 self.assertRaises(FileNotFoundError, 156 self._gfs.ReadSingle('DONT_README.md').Get) 157 158 def testCachingFileSystem(self): 159 self._cgfs.Refresh().Get() 160 initial_cgfs_read_one = self._cgfs.ReadSingle('src/hello.notpy').Get() 161 162 self.assertEqual(initial_cgfs_read_one, 163 self._gfs.ReadSingle('src/hello.notpy').Get()) 164 self.assertEqual(initial_cgfs_read_one, 165 self._cgfs.ReadSingle('src/hello.notpy').Get()) 166 167 initial_cgfs_read_two = self._cgfs.Read( 168 ['README.md', 'requirements.txt']).Get() 169 170 self.assertEqual( 171 initial_cgfs_read_two, 172 self._gfs.Read(['README.md', 'requirements.txt']).Get()) 173 self.assertEqual( 174 initial_cgfs_read_two, 175 self._cgfs.Read(['README.md', 'requirements.txt']).Get()) 176 177 def testWithoutRefresh(self): 178 # Without refreshing it will still read the content from blobstore, and it 179 # does this via the magic of the FakeURLFSFetcher. 180 self.assertEqual(['__init__.notpy', 'hello.notpy'], 181 sorted(self._gfs.ReadSingle('src/').Get())) 182 183 def testRefresh(self): 184 test_bundle = _TestBundle() 185 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 186 187 # It shouldn't fetch until Refresh does so; then it will do 2, one for the 188 # stat, and another for the read. 189 self.assertTrue(*fetcher.CheckAndReset()) 190 gfs.Refresh().Get() 191 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, 192 fetch_async_count=1, 193 fetch_resolve_count=1)) 194 195 # Refresh is just an alias for Read(''). 196 gfs.Refresh().Get() 197 self.assertTrue(*fetcher.CheckAndReset()) 198 199 initial_dir_read = sorted(gfs.ReadSingle('').Get()) 200 initial_file_read = gfs.ReadSingle('dir/file1').Get() 201 202 version, data = test_bundle.Mutate() 203 204 # Check that changes have not effected the file system yet. 205 self.assertEqual(initial_dir_read, sorted(gfs.ReadSingle('').Get())) 206 self.assertEqual(initial_file_read, gfs.ReadSingle('dir/file1').Get()) 207 self.assertNotEqual(StatInfo(version), gfs.Stat('')) 208 209 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 210 gfs.Refresh().Get() 211 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, 212 fetch_async_count=1, 213 fetch_resolve_count=1)) 214 215 # Check that the changes have affected the file system. 216 self.assertEqual(data, gfs.ReadSingle('new-file').Get()) 217 self.assertEqual(test_bundle.files['zipfile/dir/file1'], 218 gfs.ReadSingle('dir/file1').Get()) 219 self.assertEqual(StatInfo(version), gfs.Stat('new-file')) 220 221 # Regression test: ensure that reading the data after it's been mutated, 222 # but before Refresh() has been realised, still returns the correct data. 223 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 224 version, data = test_bundle.Mutate() 225 226 refresh_future = gfs.Refresh() 227 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, fetch_async_count=1)) 228 229 self.assertEqual(data, gfs.ReadSingle('new-file').Get()) 230 self.assertEqual(test_bundle.files['zipfile/dir/file1'], 231 gfs.ReadSingle('dir/file1').Get()) 232 self.assertEqual(StatInfo(version), gfs.Stat('new-file')) 233 234 refresh_future.Get() 235 self.assertTrue(*fetcher.CheckAndReset(fetch_resolve_count=1)) 236 237 def testGetThenRefreshOnStartup(self): 238 # Regression test: Test that calling Get() but never resolving the future, 239 # then Refresh()ing the data, causes the data to be refreshed. 240 test_bundle = _TestBundle() 241 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 242 self.assertTrue(*fetcher.CheckAndReset()) 243 244 # Get a predictable version. 245 version, data = test_bundle.Mutate() 246 247 read_future = gfs.ReadSingle('hello.txt') 248 # Fetch for the Stat(), async-fetch for the Read(). 249 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, fetch_async_count=1)) 250 251 refresh_future = gfs.Refresh() 252 self.assertTrue(*fetcher.CheckAndReset()) 253 254 self.assertEqual(data, read_future.Get()) 255 self.assertTrue(*fetcher.CheckAndReset(fetch_resolve_count=1)) 256 self.assertEqual(StatInfo(version), gfs.Stat('hello.txt')) 257 self.assertTrue(*fetcher.CheckAndReset()) 258 259 # The fetch will already have been resolved, so resolving the Refresh won't 260 # affect anything. 261 refresh_future.Get() 262 self.assertTrue(*fetcher.CheckAndReset()) 263 264 # Read data should not have changed. 265 self.assertEqual(data, gfs.ReadSingle('hello.txt').Get()) 266 self.assertEqual(StatInfo(version), gfs.Stat('hello.txt')) 267 self.assertTrue(*fetcher.CheckAndReset()) 268 269 270 if __name__ == '__main__': 271 unittest.main() 272