1 # -*- coding: utf-8 -*- 2 # Copyright 2013 Google Inc. All Rights Reserved. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 """Tests for ls command.""" 16 17 from __future__ import absolute_import 18 19 import posixpath 20 import re 21 import subprocess 22 import sys 23 24 import gslib 25 from gslib.cs_api_map import ApiSelector 26 import gslib.tests.testcase as testcase 27 from gslib.tests.testcase.integration_testcase import SkipForS3 28 from gslib.tests.util import ObjectToURI as suri 29 from gslib.tests.util import unittest 30 from gslib.util import IS_WINDOWS 31 from gslib.util import Retry 32 from gslib.util import UTF8 33 34 35 class TestLs(testcase.GsUtilIntegrationTestCase): 36 """Integration tests for ls command.""" 37 38 def test_blank_ls(self): 39 self.RunGsUtil(['ls']) 40 41 def test_empty_bucket(self): 42 bucket_uri = self.CreateBucket() 43 self.AssertNObjectsInBucket(bucket_uri, 0) 44 45 def test_empty_bucket_with_b(self): 46 bucket_uri = self.CreateBucket() 47 # Use @Retry as hedge against bucket listing eventual consistency. 48 @Retry(AssertionError, tries=3, timeout_secs=1) 49 def _Check1(): 50 stdout = self.RunGsUtil(['ls', '-b', suri(bucket_uri)], 51 return_stdout=True) 52 self.assertEqual('%s/\n' % suri(bucket_uri), stdout) 53 _Check1() 54 55 def test_bucket_with_Lb(self): 56 """Tests ls -Lb.""" 57 bucket_uri = self.CreateBucket() 58 # Use @Retry as hedge against bucket listing eventual consistency. 59 @Retry(AssertionError, tries=3, timeout_secs=1) 60 def _Check1(): 61 stdout = self.RunGsUtil(['ls', '-Lb', suri(bucket_uri)], 62 return_stdout=True) 63 self.assertIn(suri(bucket_uri), stdout) 64 self.assertNotIn('TOTAL:', stdout) 65 _Check1() 66 67 def test_bucket_with_lb(self): 68 """Tests ls -lb.""" 69 bucket_uri = self.CreateBucket() 70 # Use @Retry as hedge against bucket listing eventual consistency. 71 @Retry(AssertionError, tries=3, timeout_secs=1) 72 def _Check1(): 73 stdout = self.RunGsUtil(['ls', '-lb', suri(bucket_uri)], 74 return_stdout=True) 75 self.assertIn(suri(bucket_uri), stdout) 76 self.assertNotIn('TOTAL:', stdout) 77 _Check1() 78 79 def test_bucket_list_wildcard(self): 80 """Tests listing multiple buckets with a wildcard.""" 81 random_prefix = self.MakeRandomTestString() 82 bucket1_name = self.MakeTempName('bucket', prefix=random_prefix) 83 bucket2_name = self.MakeTempName('bucket', prefix=random_prefix) 84 bucket1_uri = self.CreateBucket(bucket_name=bucket1_name) 85 bucket2_uri = self.CreateBucket(bucket_name=bucket2_name) 86 # This just double checks that the common prefix of the two buckets is what 87 # we think it should be (based on implementation detail of CreateBucket). 88 # We want to be careful when setting a wildcard on buckets to make sure we 89 # don't step outside the test buckets to affect other buckets. 90 common_prefix = posixpath.commonprefix([suri(bucket1_uri), 91 suri(bucket2_uri)]) 92 self.assertTrue(common_prefix.startswith( 93 '%s://%sgsutil-test-test_bucket_list_wildcard-bucket-' % 94 (self.default_provider, random_prefix))) 95 wildcard = '%s*' % common_prefix 96 97 # Use @Retry as hedge against bucket listing eventual consistency. 98 @Retry(AssertionError, tries=3, timeout_secs=1) 99 def _Check1(): 100 stdout = self.RunGsUtil(['ls', '-b', wildcard], return_stdout=True) 101 expected = set([suri(bucket1_uri) + '/', suri(bucket2_uri) + '/']) 102 actual = set(stdout.split()) 103 self.assertEqual(expected, actual) 104 _Check1() 105 106 def test_nonexistent_bucket_with_ls(self): 107 """Tests a bucket that is known not to exist.""" 108 stderr = self.RunGsUtil( 109 ['ls', '-lb', 'gs://%s' % self.nonexistent_bucket_name], 110 return_stderr=True, expected_status=1) 111 self.assertIn('404', stderr) 112 113 stderr = self.RunGsUtil( 114 ['ls', '-Lb', 'gs://%s' % self.nonexistent_bucket_name], 115 return_stderr=True, expected_status=1) 116 self.assertIn('404', stderr) 117 118 stderr = self.RunGsUtil( 119 ['ls', '-b', 'gs://%s' % self.nonexistent_bucket_name], 120 return_stderr=True, expected_status=1) 121 self.assertIn('404', stderr) 122 123 def test_list_missing_object(self): 124 """Tests listing a non-existent object.""" 125 bucket_uri = self.CreateBucket() 126 stderr = self.RunGsUtil(['ls', suri(bucket_uri, 'missing')], 127 return_stderr=True, expected_status=1) 128 self.assertIn('matched no objects', stderr) 129 130 def test_with_one_object(self): 131 bucket_uri = self.CreateBucket() 132 obj_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo') 133 # Use @Retry as hedge against bucket listing eventual consistency. 134 @Retry(AssertionError, tries=3, timeout_secs=1) 135 def _Check1(): 136 stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True) 137 self.assertEqual('%s\n' % obj_uri, stdout) 138 _Check1() 139 140 def test_subdir(self): 141 """Tests listing a bucket subdirectory.""" 142 bucket_uri = self.CreateBucket(test_objects=1) 143 k1_uri = bucket_uri.clone_replace_name('foo') 144 k1_uri.set_contents_from_string('baz') 145 k2_uri = bucket_uri.clone_replace_name('dir/foo') 146 k2_uri.set_contents_from_string('bar') 147 # Use @Retry as hedge against bucket listing eventual consistency. 148 @Retry(AssertionError, tries=3, timeout_secs=1) 149 def _Check1(): 150 stdout = self.RunGsUtil(['ls', '%s/dir' % suri(bucket_uri)], 151 return_stdout=True) 152 self.assertEqual('%s\n' % suri(k2_uri), stdout) 153 stdout = self.RunGsUtil(['ls', suri(k1_uri)], return_stdout=True) 154 self.assertEqual('%s\n' % suri(k1_uri), stdout) 155 _Check1() 156 157 def test_versioning(self): 158 """Tests listing a versioned bucket.""" 159 bucket1_uri = self.CreateBucket(test_objects=1) 160 bucket2_uri = self.CreateVersionedBucket(test_objects=1) 161 self.AssertNObjectsInBucket(bucket1_uri, 1, versioned=True) 162 bucket_list = list(bucket1_uri.list_bucket()) 163 164 objuri = [bucket1_uri.clone_replace_key(key).versionless_uri 165 for key in bucket_list][0] 166 self.RunGsUtil(['cp', objuri, suri(bucket2_uri)]) 167 self.RunGsUtil(['cp', objuri, suri(bucket2_uri)]) 168 # Use @Retry as hedge against bucket listing eventual consistency. 169 @Retry(AssertionError, tries=3, timeout_secs=1) 170 def _Check2(): 171 stdout = self.RunGsUtil(['ls', '-a', suri(bucket2_uri)], 172 return_stdout=True) 173 self.assertNumLines(stdout, 3) 174 stdout = self.RunGsUtil(['ls', '-la', suri(bucket2_uri)], 175 return_stdout=True) 176 self.assertIn('%s#' % bucket2_uri.clone_replace_name(bucket_list[0].name), 177 stdout) 178 self.assertIn('metageneration=', stdout) 179 _Check2() 180 181 def test_etag(self): 182 """Tests that listing an object with an etag.""" 183 bucket_uri = self.CreateBucket() 184 obj_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo') 185 # TODO: When testcase setup can use JSON, match against the exact JSON 186 # etag. 187 etag = obj_uri.get_key().etag.strip('"\'') 188 # Use @Retry as hedge against bucket listing eventual consistency. 189 @Retry(AssertionError, tries=3, timeout_secs=1) 190 def _Check1(): 191 stdout = self.RunGsUtil(['ls', '-l', suri(bucket_uri)], 192 return_stdout=True) 193 if self.test_api == ApiSelector.XML: 194 self.assertNotIn(etag, stdout) 195 else: 196 self.assertNotIn('etag=', stdout) 197 _Check1() 198 199 def _Check2(): 200 stdout = self.RunGsUtil(['ls', '-le', suri(bucket_uri)], 201 return_stdout=True) 202 if self.test_api == ApiSelector.XML: 203 self.assertIn(etag, stdout) 204 else: 205 self.assertIn('etag=', stdout) 206 _Check2() 207 208 def _Check3(): 209 stdout = self.RunGsUtil(['ls', '-ale', suri(bucket_uri)], 210 return_stdout=True) 211 if self.test_api == ApiSelector.XML: 212 self.assertIn(etag, stdout) 213 else: 214 self.assertIn('etag=', stdout) 215 _Check3() 216 217 @SkipForS3('S3 bucket configuration values are not supported via ls.') 218 def test_location(self): 219 """Tests listing a bucket with location constraint.""" 220 bucket_uri = self.CreateBucket() 221 bucket_suri = suri(bucket_uri) 222 223 # No location info 224 stdout = self.RunGsUtil(['ls', '-lb', bucket_suri], 225 return_stdout=True) 226 self.assertNotIn('Location constraint', stdout) 227 228 # Default location constraint is US 229 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 230 return_stdout=True) 231 self.assertIn('Location constraint:\t\tUS', stdout) 232 233 @SkipForS3('S3 bucket configuration values are not supported via ls.') 234 def test_logging(self): 235 """Tests listing a bucket with logging config.""" 236 bucket_uri = self.CreateBucket() 237 bucket_suri = suri(bucket_uri) 238 239 # No logging info 240 stdout = self.RunGsUtil(['ls', '-lb', bucket_suri], 241 return_stdout=True) 242 self.assertNotIn('Logging configuration', stdout) 243 244 # Logging configuration is absent by default 245 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 246 return_stdout=True) 247 self.assertIn('Logging configuration:\t\tNone', stdout) 248 249 # Enable and check 250 self.RunGsUtil(['logging', 'set', 'on', '-b', bucket_suri, 251 bucket_suri]) 252 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 253 return_stdout=True) 254 self.assertIn('Logging configuration:\t\tPresent', stdout) 255 256 # Disable and check 257 self.RunGsUtil(['logging', 'set', 'off', bucket_suri]) 258 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 259 return_stdout=True) 260 self.assertIn('Logging configuration:\t\tNone', stdout) 261 262 @SkipForS3('S3 bucket configuration values are not supported via ls.') 263 def test_web(self): 264 """Tests listing a bucket with website config.""" 265 bucket_uri = self.CreateBucket() 266 bucket_suri = suri(bucket_uri) 267 268 # No website configuration 269 stdout = self.RunGsUtil(['ls', '-lb', bucket_suri], 270 return_stdout=True) 271 self.assertNotIn('Website configuration', stdout) 272 273 # Website configuration is absent by default 274 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 275 return_stdout=True) 276 self.assertIn('Website configuration:\t\tNone', stdout) 277 278 # Initialize and check 279 self.RunGsUtil(['web', 'set', '-m', 'google.com', bucket_suri]) 280 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 281 return_stdout=True) 282 self.assertIn('Website configuration:\t\tPresent', stdout) 283 284 # Clear and check 285 self.RunGsUtil(['web', 'set', bucket_suri]) 286 stdout = self.RunGsUtil(['ls', '-Lb', bucket_suri], 287 return_stdout=True) 288 self.assertIn('Website configuration:\t\tNone', stdout) 289 290 def test_list_sizes(self): 291 """Tests various size listing options.""" 292 bucket_uri = self.CreateBucket() 293 self.CreateObject(bucket_uri=bucket_uri, contents='x' * 2048) 294 295 # Use @Retry as hedge against bucket listing eventual consistency. 296 @Retry(AssertionError, tries=3, timeout_secs=1) 297 def _Check1(): 298 stdout = self.RunGsUtil(['ls', '-l', suri(bucket_uri)], 299 return_stdout=True) 300 self.assertIn('2048', stdout) 301 _Check1() 302 303 # Use @Retry as hedge against bucket listing eventual consistency. 304 @Retry(AssertionError, tries=3, timeout_secs=1) 305 def _Check2(): 306 stdout = self.RunGsUtil(['ls', '-L', suri(bucket_uri)], 307 return_stdout=True) 308 self.assertIn('2048', stdout) 309 _Check2() 310 311 # Use @Retry as hedge against bucket listing eventual consistency. 312 @Retry(AssertionError, tries=3, timeout_secs=1) 313 def _Check3(): 314 stdout = self.RunGsUtil(['ls', '-al', suri(bucket_uri)], 315 return_stdout=True) 316 self.assertIn('2048', stdout) 317 _Check3() 318 319 # Use @Retry as hedge against bucket listing eventual consistency. 320 @Retry(AssertionError, tries=3, timeout_secs=1) 321 def _Check4(): 322 stdout = self.RunGsUtil(['ls', '-lh', suri(bucket_uri)], 323 return_stdout=True) 324 self.assertIn('2 KiB', stdout) 325 _Check4() 326 327 # Use @Retry as hedge against bucket listing eventual consistency. 328 @Retry(AssertionError, tries=3, timeout_secs=1) 329 def _Check5(): 330 stdout = self.RunGsUtil(['ls', '-alh', suri(bucket_uri)], 331 return_stdout=True) 332 self.assertIn('2 KiB', stdout) 333 _Check5() 334 335 @unittest.skipIf(IS_WINDOWS, 336 'Unicode handling on Windows requires mods to site-packages') 337 def test_list_unicode_filename(self): 338 """Tests listing an object with a unicode filename.""" 339 # Note: This test fails on Windows (command.exe). I was able to get ls to 340 # output Unicode filenames correctly by hacking the UniStream class code 341 # shown at 342 # http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/3259271 343 # into the start of gslib/commands/ls.py, along with no-op flush and 344 # isastream functions (as an experiment). However, even with that change, 345 # the current test still fails, since it also needs to run that 346 # stdout/stderr-replacement code. That UniStream class replacement really 347 # needs to be added to the site-packages on Windows python. 348 object_name = u'' 349 object_name_bytes = object_name.encode(UTF8) 350 bucket_uri = self.CreateVersionedBucket() 351 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo', 352 object_name=object_name) 353 self.AssertNObjectsInBucket(bucket_uri, 1, versioned=True) 354 stdout = self.RunGsUtil(['ls', '-ael', suri(key_uri)], 355 return_stdout=True) 356 self.assertIn(object_name_bytes, stdout) 357 if self.default_provider == 'gs': 358 self.assertIn(str(key_uri.generation), stdout) 359 self.assertIn( 360 'metageneration=%s' % key_uri.get_key().metageneration, stdout) 361 if self.test_api == ApiSelector.XML: 362 self.assertIn(key_uri.get_key().etag.strip('"\''), stdout) 363 else: 364 # TODO: When testcase setup can use JSON, match against the exact JSON 365 # etag. 366 self.assertIn('etag=', stdout) 367 elif self.default_provider == 's3': 368 self.assertIn(key_uri.version_id, stdout) 369 self.assertIn(key_uri.get_key().etag.strip('"\''), stdout) 370 371 def test_list_gzip_content_length(self): 372 """Tests listing a gzipped object.""" 373 file_size = 10000 374 file_contents = 'x' * file_size 375 fpath = self.CreateTempFile(contents=file_contents, file_name='foo.txt') 376 key_uri = self.CreateObject() 377 self.RunGsUtil(['cp', '-z', 'txt', suri(fpath), suri(key_uri)]) 378 379 # Use @Retry as hedge against bucket listing eventual consistency. 380 @Retry(AssertionError, tries=3, timeout_secs=1) 381 def _Check1(): 382 stdout = self.RunGsUtil(['ls', '-L', suri(key_uri)], return_stdout=True) 383 self.assertRegexpMatches(stdout, r'Content-Encoding:\s+gzip') 384 find_content_length_re = r'Content-Length:\s+(?P<num>\d)' 385 self.assertRegexpMatches(stdout, find_content_length_re) 386 m = re.search(find_content_length_re, stdout) 387 content_length = int(m.group('num')) 388 self.assertGreater(content_length, 0) 389 self.assertLess(content_length, file_size) 390 _Check1() 391 392 def test_output_chopped(self): 393 """Tests that gsutil still succeeds with a truncated stdout.""" 394 bucket_uri = self.CreateBucket(test_objects=2) 395 396 # Run Python with the -u flag so output is not buffered. 397 gsutil_cmd = [ 398 sys.executable, '-u', gslib.GSUTIL_PATH, 'ls', suri(bucket_uri)] 399 # Set bufsize to 0 to make sure output is not buffered. 400 p = subprocess.Popen(gsutil_cmd, stdout=subprocess.PIPE, bufsize=0) 401 # Immediately close the stdout pipe so that gsutil gets a broken pipe error. 402 p.stdout.close() 403 p.wait() 404 # Make sure it still exited cleanly. 405 self.assertEqual(p.returncode, 0) 406 407 def test_recursive_list_trailing_slash(self): 408 """Tests listing an object with a trailing slash.""" 409 bucket_uri = self.CreateBucket() 410 self.CreateObject(bucket_uri=bucket_uri, object_name='/', contents='foo') 411 self.AssertNObjectsInBucket(bucket_uri, 1) 412 stdout = self.RunGsUtil(['ls', '-R', suri(bucket_uri)], return_stdout=True) 413 # Note: The suri function normalizes the URI, so the double slash gets 414 # removed. 415 self.assertIn(suri(bucket_uri) + '/', stdout) 416 417 def test_recursive_list_trailing_two_slash(self): 418 """Tests listing an object with two trailing slashes.""" 419 bucket_uri = self.CreateBucket() 420 self.CreateObject(bucket_uri=bucket_uri, object_name='//', contents='foo') 421 self.AssertNObjectsInBucket(bucket_uri, 1) 422 stdout = self.RunGsUtil(['ls', '-R', suri(bucket_uri)], return_stdout=True) 423 # Note: The suri function normalizes the URI, so the double slash gets 424 # removed. 425 self.assertIn(suri(bucket_uri) + '//', stdout) 426 427 @SkipForS3('S3 anonymous access is not supported.') 428 def test_get_object_without_list_bucket_permission(self): 429 # Bucket is not publicly readable by default. 430 bucket_uri = self.CreateBucket() 431 object_uri = self.CreateObject(bucket_uri=bucket_uri, 432 object_name='permitted', contents='foo') 433 # Set this object to be publicly readable. 434 self.RunGsUtil(['acl', 'set', 'public-read', suri(object_uri)]) 435 # Drop credentials. 436 with self.SetAnonymousBotoCreds(): 437 stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], 438 return_stdout=True) 439 self.assertIn(suri(object_uri), stdout) 440