Home | History | Annotate | Download | only in io
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """File IO methods that wrap the C++ FileSystem API.
     16 
     17 The C++ FileSystem API is SWIG wrapped in file_io.i. These functions call those
     18 to accomplish basic File IO operations.
     19 """
     20 from __future__ import absolute_import
     21 from __future__ import division
     22 from __future__ import print_function
     23 
     24 import os
     25 import uuid
     26 
     27 import six
     28 
     29 from tensorflow.python import pywrap_tensorflow
     30 from tensorflow.python.framework import c_api_util
     31 from tensorflow.python.framework import errors
     32 from tensorflow.python.util import compat
     33 from tensorflow.python.util import deprecation
     34 from tensorflow.python.util.tf_export import tf_export
     35 
     36 
     37 class FileIO(object):
     38   """FileIO class that exposes methods to read / write to / from files.
     39 
     40   The constructor takes the following arguments:
     41   name: name of the file
     42   mode: one of 'r', 'w', 'a', 'r+', 'w+', 'a+'. Append 'b' for bytes mode.
     43 
     44   Can be used as an iterator to iterate over lines in the file.
     45 
     46   The default buffer size used for the BufferedInputStream used for reading
     47   the file line by line is 1024 * 512 bytes.
     48   """
     49 
     50   def __init__(self, name, mode):
     51     self.__name = name
     52     self.__mode = mode
     53     self._read_buf = None
     54     self._writable_file = None
     55     self._binary_mode = "b" in mode
     56     mode = mode.replace("b", "")
     57     if mode not in ("r", "w", "a", "r+", "w+", "a+"):
     58       raise errors.InvalidArgumentError(
     59           None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'")
     60     self._read_check_passed = mode in ("r", "r+", "a+", "w+")
     61     self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+")
     62 
     63   @property
     64   def name(self):
     65     """Returns the file name."""
     66     return self.__name
     67 
     68   @property
     69   def mode(self):
     70     """Returns the mode in which the file was opened."""
     71     return self.__mode
     72 
     73   def _preread_check(self):
     74     if not self._read_buf:
     75       if not self._read_check_passed:
     76         raise errors.PermissionDeniedError(None, None,
     77                                            "File isn't open for reading")
     78       with errors.raise_exception_on_not_ok_status() as status:
     79         self._read_buf = pywrap_tensorflow.CreateBufferedInputStream(
     80             compat.as_bytes(self.__name), 1024 * 512, status)
     81 
     82   def _prewrite_check(self):
     83     if not self._writable_file:
     84       if not self._write_check_passed:
     85         raise errors.PermissionDeniedError(None, None,
     86                                            "File isn't open for writing")
     87       with errors.raise_exception_on_not_ok_status() as status:
     88         self._writable_file = pywrap_tensorflow.CreateWritableFile(
     89             compat.as_bytes(self.__name), compat.as_bytes(self.__mode), status)
     90 
     91   def _prepare_value(self, val):
     92     if self._binary_mode:
     93       return compat.as_bytes(val)
     94     else:
     95       return compat.as_str_any(val)
     96 
     97   def size(self):
     98     """Returns the size of the file."""
     99     return stat(self.__name).length
    100 
    101   def write(self, file_content):
    102     """Writes file_content to the file. Appends to the end of the file."""
    103     self._prewrite_check()
    104     with errors.raise_exception_on_not_ok_status() as status:
    105       pywrap_tensorflow.AppendToFile(
    106           compat.as_bytes(file_content), self._writable_file, status)
    107 
    108   def read(self, n=-1):
    109     """Returns the contents of a file as a string.
    110 
    111     Starts reading from current position in file.
    112 
    113     Args:
    114       n: Read 'n' bytes if n != -1. If n = -1, reads to end of file.
    115 
    116     Returns:
    117       'n' bytes of the file (or whole file) in bytes mode or 'n' bytes of the
    118       string if in string (regular) mode.
    119     """
    120     self._preread_check()
    121     with errors.raise_exception_on_not_ok_status() as status:
    122       if n == -1:
    123         length = self.size() - self.tell()
    124       else:
    125         length = n
    126       return self._prepare_value(
    127           pywrap_tensorflow.ReadFromStream(self._read_buf, length, status))
    128 
    129   @deprecation.deprecated_args(
    130       None,
    131       "position is deprecated in favor of the offset argument.",
    132       "position")
    133   def seek(self, offset=None, whence=0, position=None):
    134     # TODO(jhseu): Delete later. Used to omit `position` from docs.
    135     # pylint: disable=g-doc-args
    136     """Seeks to the offset in the file.
    137 
    138     Args:
    139       offset: The byte count relative to the whence argument.
    140       whence: Valid values for whence are:
    141         0: start of the file (default)
    142         1: relative to the current position of the file
    143         2: relative to the end of file. offset is usually negative.
    144     """
    145     # pylint: enable=g-doc-args
    146     self._preread_check()
    147     # We needed to make offset a keyword argument for backwards-compatibility.
    148     # This check exists so that we can convert back to having offset be a
    149     # positional argument.
    150     # TODO(jhseu): Make `offset` a positional argument after `position` is
    151     # deleted.
    152     if offset is None and position is None:
    153       raise TypeError("seek(): offset argument required")
    154     if offset is not None and position is not None:
    155       raise TypeError("seek(): offset and position may not be set "
    156                       "simultaneously.")
    157 
    158     if position is not None:
    159       offset = position
    160 
    161     with errors.raise_exception_on_not_ok_status() as status:
    162       if whence == 0:
    163         pass
    164       elif whence == 1:
    165         offset += self.tell()
    166       elif whence == 2:
    167         offset += self.size()
    168       else:
    169         raise errors.InvalidArgumentError(
    170             None, None,
    171             "Invalid whence argument: {}. Valid values are 0, 1, or 2."
    172             .format(whence))
    173       ret_status = self._read_buf.Seek(offset)
    174       pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
    175 
    176   def readline(self):
    177     r"""Reads the next line from the file. Leaves the '\n' at the end."""
    178     self._preread_check()
    179     return self._prepare_value(self._read_buf.ReadLineAsString())
    180 
    181   def readlines(self):
    182     """Returns all lines from the file in a list."""
    183     self._preread_check()
    184     lines = []
    185     while True:
    186       s = self.readline()
    187       if not s:
    188         break
    189       lines.append(s)
    190     return lines
    191 
    192   def tell(self):
    193     """Returns the current position in the file."""
    194     self._preread_check()
    195     return self._read_buf.Tell()
    196 
    197   def __enter__(self):
    198     """Make usable with "with" statement."""
    199     return self
    200 
    201   def __exit__(self, unused_type, unused_value, unused_traceback):
    202     """Make usable with "with" statement."""
    203     self.close()
    204 
    205   def __iter__(self):
    206     return self
    207 
    208   def next(self):
    209     retval = self.readline()
    210     if not retval:
    211       raise StopIteration()
    212     return retval
    213 
    214   def __next__(self):
    215     return self.next()
    216 
    217   def flush(self):
    218     """Flushes the Writable file.
    219 
    220     This only ensures that the data has made its way out of the process without
    221     any guarantees on whether it's written to disk. This means that the
    222     data would survive an application crash but not necessarily an OS crash.
    223     """
    224     if self._writable_file:
    225       with errors.raise_exception_on_not_ok_status() as status:
    226         ret_status = self._writable_file.Flush()
    227         pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
    228 
    229   def close(self):
    230     """Closes FileIO. Should be called for the WritableFile to be flushed."""
    231     self._read_buf = None
    232     if self._writable_file:
    233       with errors.raise_exception_on_not_ok_status() as status:
    234         ret_status = self._writable_file.Close()
    235         pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
    236     self._writable_file = None
    237 
    238 
    239 @tf_export("gfile.Exists")
    240 def file_exists(filename):
    241   """Determines whether a path exists or not.
    242 
    243   Args:
    244     filename: string, a path
    245 
    246   Returns:
    247     True if the path exists, whether its a file or a directory.
    248     False if the path does not exist and there are no filesystem errors.
    249 
    250   Raises:
    251     errors.OpError: Propagates any errors reported by the FileSystem API.
    252   """
    253   try:
    254     with errors.raise_exception_on_not_ok_status() as status:
    255       pywrap_tensorflow.FileExists(compat.as_bytes(filename), status)
    256   except errors.NotFoundError:
    257     return False
    258   return True
    259 
    260 
    261 @tf_export("gfile.Remove")
    262 def delete_file(filename):
    263   """Deletes the file located at 'filename'.
    264 
    265   Args:
    266     filename: string, a filename
    267 
    268   Raises:
    269     errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
    270     NotFoundError if the file does not exist.
    271   """
    272   with errors.raise_exception_on_not_ok_status() as status:
    273     pywrap_tensorflow.DeleteFile(compat.as_bytes(filename), status)
    274 
    275 
    276 def read_file_to_string(filename, binary_mode=False):
    277   """Reads the entire contents of a file to a string.
    278 
    279   Args:
    280     filename: string, path to a file
    281     binary_mode: whether to open the file in binary mode or not. This changes
    282         the type of the object returned.
    283 
    284   Returns:
    285     contents of the file as a string or bytes.
    286 
    287   Raises:
    288     errors.OpError: Raises variety of errors that are subtypes e.g.
    289     NotFoundError etc.
    290   """
    291   if binary_mode:
    292     f = FileIO(filename, mode="rb")
    293   else:
    294     f = FileIO(filename, mode="r")
    295   return f.read()
    296 
    297 
    298 def write_string_to_file(filename, file_content):
    299   """Writes a string to a given file.
    300 
    301   Args:
    302     filename: string, path to a file
    303     file_content: string, contents that need to be written to the file
    304 
    305   Raises:
    306     errors.OpError: If there are errors during the operation.
    307   """
    308   with FileIO(filename, mode="w") as f:
    309     f.write(file_content)
    310 
    311 
    312 @tf_export("gfile.Glob")
    313 def get_matching_files(filename):
    314   """Returns a list of files that match the given pattern(s).
    315 
    316   Args:
    317     filename: string or iterable of strings. The glob pattern(s).
    318 
    319   Returns:
    320     A list of strings containing filenames that match the given pattern(s).
    321 
    322   Raises:
    323     errors.OpError: If there are filesystem / directory listing errors.
    324   """
    325   with errors.raise_exception_on_not_ok_status() as status:
    326     if isinstance(filename, six.string_types):
    327       return [
    328           # Convert the filenames to string from bytes.
    329           compat.as_str_any(matching_filename)
    330           for matching_filename in pywrap_tensorflow.GetMatchingFiles(
    331               compat.as_bytes(filename), status)
    332       ]
    333     else:
    334       return [
    335           # Convert the filenames to string from bytes.
    336           compat.as_str_any(matching_filename)
    337           for single_filename in filename
    338           for matching_filename in pywrap_tensorflow.GetMatchingFiles(
    339               compat.as_bytes(single_filename), status)
    340       ]
    341 
    342 
    343 @tf_export("gfile.MkDir")
    344 def create_dir(dirname):
    345   """Creates a directory with the name 'dirname'.
    346 
    347   Args:
    348     dirname: string, name of the directory to be created
    349 
    350   Notes:
    351     The parent directories need to exist. Use recursive_create_dir instead if
    352     there is the possibility that the parent dirs don't exist.
    353 
    354   Raises:
    355     errors.OpError: If the operation fails.
    356   """
    357   with errors.raise_exception_on_not_ok_status() as status:
    358     pywrap_tensorflow.CreateDir(compat.as_bytes(dirname), status)
    359 
    360 
    361 @tf_export("gfile.MakeDirs")
    362 def recursive_create_dir(dirname):
    363   """Creates a directory and all parent/intermediate directories.
    364 
    365   It succeeds if dirname already exists and is writable.
    366 
    367   Args:
    368     dirname: string, name of the directory to be created
    369 
    370   Raises:
    371     errors.OpError: If the operation fails.
    372   """
    373   with errors.raise_exception_on_not_ok_status() as status:
    374     pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(dirname), status)
    375 
    376 
    377 @tf_export("gfile.Copy")
    378 def copy(oldpath, newpath, overwrite=False):
    379   """Copies data from oldpath to newpath.
    380 
    381   Args:
    382     oldpath: string, name of the file who's contents need to be copied
    383     newpath: string, name of the file to which to copy to
    384     overwrite: boolean, if false its an error for newpath to be occupied by an
    385         existing file.
    386 
    387   Raises:
    388     errors.OpError: If the operation fails.
    389   """
    390   with errors.raise_exception_on_not_ok_status() as status:
    391     pywrap_tensorflow.CopyFile(
    392         compat.as_bytes(oldpath), compat.as_bytes(newpath), overwrite, status)
    393 
    394 
    395 @tf_export("gfile.Rename")
    396 def rename(oldname, newname, overwrite=False):
    397   """Rename or move a file / directory.
    398 
    399   Args:
    400     oldname: string, pathname for a file
    401     newname: string, pathname to which the file needs to be moved
    402     overwrite: boolean, if false it's an error for `newname` to be occupied by
    403         an existing file.
    404 
    405   Raises:
    406     errors.OpError: If the operation fails.
    407   """
    408   with errors.raise_exception_on_not_ok_status() as status:
    409     pywrap_tensorflow.RenameFile(
    410         compat.as_bytes(oldname), compat.as_bytes(newname), overwrite, status)
    411 
    412 
    413 def atomic_write_string_to_file(filename, contents, overwrite=True):
    414   """Writes to `filename` atomically.
    415 
    416   This means that when `filename` appears in the filesystem, it will contain
    417   all of `contents`. With write_string_to_file, it is possible for the file
    418   to appear in the filesystem with `contents` only partially written.
    419 
    420   Accomplished by writing to a temp file and then renaming it.
    421 
    422   Args:
    423     filename: string, pathname for a file
    424     contents: string, contents that need to be written to the file
    425     overwrite: boolean, if false it's an error for `filename` to be occupied by
    426         an existing file.
    427   """
    428   temp_pathname = filename + ".tmp" + uuid.uuid4().hex
    429   write_string_to_file(temp_pathname, contents)
    430   try:
    431     rename(temp_pathname, filename, overwrite)
    432   except errors.OpError:
    433     delete_file(temp_pathname)
    434     raise
    435 
    436 
    437 @tf_export("gfile.DeleteRecursively")
    438 def delete_recursively(dirname):
    439   """Deletes everything under dirname recursively.
    440 
    441   Args:
    442     dirname: string, a path to a directory
    443 
    444   Raises:
    445     errors.OpError: If the operation fails.
    446   """
    447   with errors.raise_exception_on_not_ok_status() as status:
    448     pywrap_tensorflow.DeleteRecursively(compat.as_bytes(dirname), status)
    449 
    450 
    451 @tf_export("gfile.IsDirectory")
    452 def is_directory(dirname):
    453   """Returns whether the path is a directory or not.
    454 
    455   Args:
    456     dirname: string, path to a potential directory
    457 
    458   Returns:
    459     True, if the path is a directory; False otherwise
    460   """
    461   status = c_api_util.ScopedTFStatus()
    462   return pywrap_tensorflow.IsDirectory(compat.as_bytes(dirname), status)
    463 
    464 
    465 @tf_export("gfile.ListDirectory")
    466 def list_directory(dirname):
    467   """Returns a list of entries contained within a directory.
    468 
    469   The list is in arbitrary order. It does not contain the special entries "."
    470   and "..".
    471 
    472   Args:
    473     dirname: string, path to a directory
    474 
    475   Returns:
    476     [filename1, filename2, ... filenameN] as strings
    477 
    478   Raises:
    479     errors.NotFoundError if directory doesn't exist
    480   """
    481   if not is_directory(dirname):
    482     raise errors.NotFoundError(None, None, "Could not find directory")
    483   with errors.raise_exception_on_not_ok_status() as status:
    484     # Convert each element to string, since the return values of the
    485     # vector of string should be interpreted as strings, not bytes.
    486     return [
    487         compat.as_str_any(filename)
    488         for filename in pywrap_tensorflow.GetChildren(
    489             compat.as_bytes(dirname), status)
    490     ]
    491 
    492 
    493 @tf_export("gfile.Walk")
    494 def walk(top, in_order=True):
    495   """Recursive directory tree generator for directories.
    496 
    497   Args:
    498     top: string, a Directory name
    499     in_order: bool, Traverse in order if True, post order if False.
    500 
    501   Errors that happen while listing directories are ignored.
    502 
    503   Yields:
    504     Each yield is a 3-tuple:  the pathname of a directory, followed by lists of
    505     all its subdirectories and leaf files.
    506     (dirname, [subdirname, subdirname, ...], [filename, filename, ...])
    507     as strings
    508   """
    509   top = compat.as_str_any(top)
    510   try:
    511     listing = list_directory(top)
    512   except errors.NotFoundError:
    513     return
    514 
    515   files = []
    516   subdirs = []
    517   for item in listing:
    518     full_path = os.path.join(top, item)
    519     if is_directory(full_path):
    520       subdirs.append(item)
    521     else:
    522       files.append(item)
    523 
    524   here = (top, subdirs, files)
    525 
    526   if in_order:
    527     yield here
    528 
    529   for subdir in subdirs:
    530     for subitem in walk(os.path.join(top, subdir), in_order):
    531       yield subitem
    532 
    533   if not in_order:
    534     yield here
    535 
    536 
    537 @tf_export("gfile.Stat")
    538 def stat(filename):
    539   """Returns file statistics for a given path.
    540 
    541   Args:
    542     filename: string, path to a file
    543 
    544   Returns:
    545     FileStatistics struct that contains information about the path
    546 
    547   Raises:
    548     errors.OpError: If the operation fails.
    549   """
    550   file_statistics = pywrap_tensorflow.FileStatistics()
    551   with errors.raise_exception_on_not_ok_status() as status:
    552     pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status)
    553     return file_statistics
    554