1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """File IO methods that wrap the C++ FileSystem API. 16 17 The C++ FileSystem API is SWIG wrapped in file_io.i. These functions call those 18 to accomplish basic File IO operations. 19 """ 20 from __future__ import absolute_import 21 from __future__ import division 22 from __future__ import print_function 23 24 import os 25 import uuid 26 27 import six 28 29 from tensorflow.python import pywrap_tensorflow 30 from tensorflow.python.framework import c_api_util 31 from tensorflow.python.framework import errors 32 from tensorflow.python.util import compat 33 from tensorflow.python.util import deprecation 34 from tensorflow.python.util.tf_export import tf_export 35 36 37 class FileIO(object): 38 """FileIO class that exposes methods to read / write to / from files. 39 40 The constructor takes the following arguments: 41 name: name of the file 42 mode: one of 'r', 'w', 'a', 'r+', 'w+', 'a+'. Append 'b' for bytes mode. 43 44 Can be used as an iterator to iterate over lines in the file. 45 46 The default buffer size used for the BufferedInputStream used for reading 47 the file line by line is 1024 * 512 bytes. 48 """ 49 50 def __init__(self, name, mode): 51 self.__name = name 52 self.__mode = mode 53 self._read_buf = None 54 self._writable_file = None 55 self._binary_mode = "b" in mode 56 mode = mode.replace("b", "") 57 if mode not in ("r", "w", "a", "r+", "w+", "a+"): 58 raise errors.InvalidArgumentError( 59 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'") 60 self._read_check_passed = mode in ("r", "r+", "a+", "w+") 61 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+") 62 63 @property 64 def name(self): 65 """Returns the file name.""" 66 return self.__name 67 68 @property 69 def mode(self): 70 """Returns the mode in which the file was opened.""" 71 return self.__mode 72 73 def _preread_check(self): 74 if not self._read_buf: 75 if not self._read_check_passed: 76 raise errors.PermissionDeniedError(None, None, 77 "File isn't open for reading") 78 with errors.raise_exception_on_not_ok_status() as status: 79 self._read_buf = pywrap_tensorflow.CreateBufferedInputStream( 80 compat.as_bytes(self.__name), 1024 * 512, status) 81 82 def _prewrite_check(self): 83 if not self._writable_file: 84 if not self._write_check_passed: 85 raise errors.PermissionDeniedError(None, None, 86 "File isn't open for writing") 87 with errors.raise_exception_on_not_ok_status() as status: 88 self._writable_file = pywrap_tensorflow.CreateWritableFile( 89 compat.as_bytes(self.__name), compat.as_bytes(self.__mode), status) 90 91 def _prepare_value(self, val): 92 if self._binary_mode: 93 return compat.as_bytes(val) 94 else: 95 return compat.as_str_any(val) 96 97 def size(self): 98 """Returns the size of the file.""" 99 return stat(self.__name).length 100 101 def write(self, file_content): 102 """Writes file_content to the file. Appends to the end of the file.""" 103 self._prewrite_check() 104 with errors.raise_exception_on_not_ok_status() as status: 105 pywrap_tensorflow.AppendToFile( 106 compat.as_bytes(file_content), self._writable_file, status) 107 108 def read(self, n=-1): 109 """Returns the contents of a file as a string. 110 111 Starts reading from current position in file. 112 113 Args: 114 n: Read 'n' bytes if n != -1. If n = -1, reads to end of file. 115 116 Returns: 117 'n' bytes of the file (or whole file) in bytes mode or 'n' bytes of the 118 string if in string (regular) mode. 119 """ 120 self._preread_check() 121 with errors.raise_exception_on_not_ok_status() as status: 122 if n == -1: 123 length = self.size() - self.tell() 124 else: 125 length = n 126 return self._prepare_value( 127 pywrap_tensorflow.ReadFromStream(self._read_buf, length, status)) 128 129 @deprecation.deprecated_args( 130 None, 131 "position is deprecated in favor of the offset argument.", 132 "position") 133 def seek(self, offset=None, whence=0, position=None): 134 # TODO(jhseu): Delete later. Used to omit `position` from docs. 135 # pylint: disable=g-doc-args 136 """Seeks to the offset in the file. 137 138 Args: 139 offset: The byte count relative to the whence argument. 140 whence: Valid values for whence are: 141 0: start of the file (default) 142 1: relative to the current position of the file 143 2: relative to the end of file. offset is usually negative. 144 """ 145 # pylint: enable=g-doc-args 146 self._preread_check() 147 # We needed to make offset a keyword argument for backwards-compatibility. 148 # This check exists so that we can convert back to having offset be a 149 # positional argument. 150 # TODO(jhseu): Make `offset` a positional argument after `position` is 151 # deleted. 152 if offset is None and position is None: 153 raise TypeError("seek(): offset argument required") 154 if offset is not None and position is not None: 155 raise TypeError("seek(): offset and position may not be set " 156 "simultaneously.") 157 158 if position is not None: 159 offset = position 160 161 with errors.raise_exception_on_not_ok_status() as status: 162 if whence == 0: 163 pass 164 elif whence == 1: 165 offset += self.tell() 166 elif whence == 2: 167 offset += self.size() 168 else: 169 raise errors.InvalidArgumentError( 170 None, None, 171 "Invalid whence argument: {}. Valid values are 0, 1, or 2." 172 .format(whence)) 173 ret_status = self._read_buf.Seek(offset) 174 pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) 175 176 def readline(self): 177 r"""Reads the next line from the file. Leaves the '\n' at the end.""" 178 self._preread_check() 179 return self._prepare_value(self._read_buf.ReadLineAsString()) 180 181 def readlines(self): 182 """Returns all lines from the file in a list.""" 183 self._preread_check() 184 lines = [] 185 while True: 186 s = self.readline() 187 if not s: 188 break 189 lines.append(s) 190 return lines 191 192 def tell(self): 193 """Returns the current position in the file.""" 194 self._preread_check() 195 return self._read_buf.Tell() 196 197 def __enter__(self): 198 """Make usable with "with" statement.""" 199 return self 200 201 def __exit__(self, unused_type, unused_value, unused_traceback): 202 """Make usable with "with" statement.""" 203 self.close() 204 205 def __iter__(self): 206 return self 207 208 def next(self): 209 retval = self.readline() 210 if not retval: 211 raise StopIteration() 212 return retval 213 214 def __next__(self): 215 return self.next() 216 217 def flush(self): 218 """Flushes the Writable file. 219 220 This only ensures that the data has made its way out of the process without 221 any guarantees on whether it's written to disk. This means that the 222 data would survive an application crash but not necessarily an OS crash. 223 """ 224 if self._writable_file: 225 with errors.raise_exception_on_not_ok_status() as status: 226 ret_status = self._writable_file.Flush() 227 pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) 228 229 def close(self): 230 """Closes FileIO. Should be called for the WritableFile to be flushed.""" 231 self._read_buf = None 232 if self._writable_file: 233 with errors.raise_exception_on_not_ok_status() as status: 234 ret_status = self._writable_file.Close() 235 pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) 236 self._writable_file = None 237 238 239 @tf_export("gfile.Exists") 240 def file_exists(filename): 241 """Determines whether a path exists or not. 242 243 Args: 244 filename: string, a path 245 246 Returns: 247 True if the path exists, whether its a file or a directory. 248 False if the path does not exist and there are no filesystem errors. 249 250 Raises: 251 errors.OpError: Propagates any errors reported by the FileSystem API. 252 """ 253 try: 254 with errors.raise_exception_on_not_ok_status() as status: 255 pywrap_tensorflow.FileExists(compat.as_bytes(filename), status) 256 except errors.NotFoundError: 257 return False 258 return True 259 260 261 @tf_export("gfile.Remove") 262 def delete_file(filename): 263 """Deletes the file located at 'filename'. 264 265 Args: 266 filename: string, a filename 267 268 Raises: 269 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 270 NotFoundError if the file does not exist. 271 """ 272 with errors.raise_exception_on_not_ok_status() as status: 273 pywrap_tensorflow.DeleteFile(compat.as_bytes(filename), status) 274 275 276 def read_file_to_string(filename, binary_mode=False): 277 """Reads the entire contents of a file to a string. 278 279 Args: 280 filename: string, path to a file 281 binary_mode: whether to open the file in binary mode or not. This changes 282 the type of the object returned. 283 284 Returns: 285 contents of the file as a string or bytes. 286 287 Raises: 288 errors.OpError: Raises variety of errors that are subtypes e.g. 289 NotFoundError etc. 290 """ 291 if binary_mode: 292 f = FileIO(filename, mode="rb") 293 else: 294 f = FileIO(filename, mode="r") 295 return f.read() 296 297 298 def write_string_to_file(filename, file_content): 299 """Writes a string to a given file. 300 301 Args: 302 filename: string, path to a file 303 file_content: string, contents that need to be written to the file 304 305 Raises: 306 errors.OpError: If there are errors during the operation. 307 """ 308 with FileIO(filename, mode="w") as f: 309 f.write(file_content) 310 311 312 @tf_export("gfile.Glob") 313 def get_matching_files(filename): 314 """Returns a list of files that match the given pattern(s). 315 316 Args: 317 filename: string or iterable of strings. The glob pattern(s). 318 319 Returns: 320 A list of strings containing filenames that match the given pattern(s). 321 322 Raises: 323 errors.OpError: If there are filesystem / directory listing errors. 324 """ 325 with errors.raise_exception_on_not_ok_status() as status: 326 if isinstance(filename, six.string_types): 327 return [ 328 # Convert the filenames to string from bytes. 329 compat.as_str_any(matching_filename) 330 for matching_filename in pywrap_tensorflow.GetMatchingFiles( 331 compat.as_bytes(filename), status) 332 ] 333 else: 334 return [ 335 # Convert the filenames to string from bytes. 336 compat.as_str_any(matching_filename) 337 for single_filename in filename 338 for matching_filename in pywrap_tensorflow.GetMatchingFiles( 339 compat.as_bytes(single_filename), status) 340 ] 341 342 343 @tf_export("gfile.MkDir") 344 def create_dir(dirname): 345 """Creates a directory with the name 'dirname'. 346 347 Args: 348 dirname: string, name of the directory to be created 349 350 Notes: 351 The parent directories need to exist. Use recursive_create_dir instead if 352 there is the possibility that the parent dirs don't exist. 353 354 Raises: 355 errors.OpError: If the operation fails. 356 """ 357 with errors.raise_exception_on_not_ok_status() as status: 358 pywrap_tensorflow.CreateDir(compat.as_bytes(dirname), status) 359 360 361 @tf_export("gfile.MakeDirs") 362 def recursive_create_dir(dirname): 363 """Creates a directory and all parent/intermediate directories. 364 365 It succeeds if dirname already exists and is writable. 366 367 Args: 368 dirname: string, name of the directory to be created 369 370 Raises: 371 errors.OpError: If the operation fails. 372 """ 373 with errors.raise_exception_on_not_ok_status() as status: 374 pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(dirname), status) 375 376 377 @tf_export("gfile.Copy") 378 def copy(oldpath, newpath, overwrite=False): 379 """Copies data from oldpath to newpath. 380 381 Args: 382 oldpath: string, name of the file who's contents need to be copied 383 newpath: string, name of the file to which to copy to 384 overwrite: boolean, if false its an error for newpath to be occupied by an 385 existing file. 386 387 Raises: 388 errors.OpError: If the operation fails. 389 """ 390 with errors.raise_exception_on_not_ok_status() as status: 391 pywrap_tensorflow.CopyFile( 392 compat.as_bytes(oldpath), compat.as_bytes(newpath), overwrite, status) 393 394 395 @tf_export("gfile.Rename") 396 def rename(oldname, newname, overwrite=False): 397 """Rename or move a file / directory. 398 399 Args: 400 oldname: string, pathname for a file 401 newname: string, pathname to which the file needs to be moved 402 overwrite: boolean, if false it's an error for `newname` to be occupied by 403 an existing file. 404 405 Raises: 406 errors.OpError: If the operation fails. 407 """ 408 with errors.raise_exception_on_not_ok_status() as status: 409 pywrap_tensorflow.RenameFile( 410 compat.as_bytes(oldname), compat.as_bytes(newname), overwrite, status) 411 412 413 def atomic_write_string_to_file(filename, contents, overwrite=True): 414 """Writes to `filename` atomically. 415 416 This means that when `filename` appears in the filesystem, it will contain 417 all of `contents`. With write_string_to_file, it is possible for the file 418 to appear in the filesystem with `contents` only partially written. 419 420 Accomplished by writing to a temp file and then renaming it. 421 422 Args: 423 filename: string, pathname for a file 424 contents: string, contents that need to be written to the file 425 overwrite: boolean, if false it's an error for `filename` to be occupied by 426 an existing file. 427 """ 428 temp_pathname = filename + ".tmp" + uuid.uuid4().hex 429 write_string_to_file(temp_pathname, contents) 430 try: 431 rename(temp_pathname, filename, overwrite) 432 except errors.OpError: 433 delete_file(temp_pathname) 434 raise 435 436 437 @tf_export("gfile.DeleteRecursively") 438 def delete_recursively(dirname): 439 """Deletes everything under dirname recursively. 440 441 Args: 442 dirname: string, a path to a directory 443 444 Raises: 445 errors.OpError: If the operation fails. 446 """ 447 with errors.raise_exception_on_not_ok_status() as status: 448 pywrap_tensorflow.DeleteRecursively(compat.as_bytes(dirname), status) 449 450 451 @tf_export("gfile.IsDirectory") 452 def is_directory(dirname): 453 """Returns whether the path is a directory or not. 454 455 Args: 456 dirname: string, path to a potential directory 457 458 Returns: 459 True, if the path is a directory; False otherwise 460 """ 461 status = c_api_util.ScopedTFStatus() 462 return pywrap_tensorflow.IsDirectory(compat.as_bytes(dirname), status) 463 464 465 @tf_export("gfile.ListDirectory") 466 def list_directory(dirname): 467 """Returns a list of entries contained within a directory. 468 469 The list is in arbitrary order. It does not contain the special entries "." 470 and "..". 471 472 Args: 473 dirname: string, path to a directory 474 475 Returns: 476 [filename1, filename2, ... filenameN] as strings 477 478 Raises: 479 errors.NotFoundError if directory doesn't exist 480 """ 481 if not is_directory(dirname): 482 raise errors.NotFoundError(None, None, "Could not find directory") 483 with errors.raise_exception_on_not_ok_status() as status: 484 # Convert each element to string, since the return values of the 485 # vector of string should be interpreted as strings, not bytes. 486 return [ 487 compat.as_str_any(filename) 488 for filename in pywrap_tensorflow.GetChildren( 489 compat.as_bytes(dirname), status) 490 ] 491 492 493 @tf_export("gfile.Walk") 494 def walk(top, in_order=True): 495 """Recursive directory tree generator for directories. 496 497 Args: 498 top: string, a Directory name 499 in_order: bool, Traverse in order if True, post order if False. 500 501 Errors that happen while listing directories are ignored. 502 503 Yields: 504 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 505 all its subdirectories and leaf files. 506 (dirname, [subdirname, subdirname, ...], [filename, filename, ...]) 507 as strings 508 """ 509 top = compat.as_str_any(top) 510 try: 511 listing = list_directory(top) 512 except errors.NotFoundError: 513 return 514 515 files = [] 516 subdirs = [] 517 for item in listing: 518 full_path = os.path.join(top, item) 519 if is_directory(full_path): 520 subdirs.append(item) 521 else: 522 files.append(item) 523 524 here = (top, subdirs, files) 525 526 if in_order: 527 yield here 528 529 for subdir in subdirs: 530 for subitem in walk(os.path.join(top, subdir), in_order): 531 yield subitem 532 533 if not in_order: 534 yield here 535 536 537 @tf_export("gfile.Stat") 538 def stat(filename): 539 """Returns file statistics for a given path. 540 541 Args: 542 filename: string, path to a file 543 544 Returns: 545 FileStatistics struct that contains information about the path 546 547 Raises: 548 errors.OpError: If the operation fails. 549 """ 550 file_statistics = pywrap_tensorflow.FileStatistics() 551 with errors.raise_exception_on_not_ok_status() as status: 552 pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status) 553 return file_statistics 554