Home | History | Annotate | Download | only in python2.7
      1 """
      2 Python implementation of the io module.
      3 """
      4 
      5 from __future__ import (print_function, unicode_literals)
      6 
      7 import os
      8 import abc
      9 import codecs
     10 import warnings
     11 import errno
     12 # Import thread instead of threading to reduce startup cost
     13 try:
     14     from thread import allocate_lock as Lock
     15 except ImportError:
     16     from dummy_thread import allocate_lock as Lock
     17 
     18 import io
     19 from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
     20 from errno import EINTR
     21 
     22 __metaclass__ = type
     23 
     24 # open() uses st_blksize whenever we can
     25 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
     26 
     27 # NOTE: Base classes defined here are registered with the "official" ABCs
     28 # defined in io.py. We don't use real inheritance though, because we don't
     29 # want to inherit the C implementations.
     30 
     31 
     32 class BlockingIOError(IOError):
     33 
     34     """Exception raised when I/O would block on a non-blocking I/O stream."""
     35 
     36     def __init__(self, errno, strerror, characters_written=0):
     37         super(IOError, self).__init__(errno, strerror)
     38         if not isinstance(characters_written, (int, long)):
     39             raise TypeError("characters_written must be a integer")
     40         self.characters_written = characters_written
     41 
     42 
     43 def open(file, mode="r", buffering=-1,
     44          encoding=None, errors=None,
     45          newline=None, closefd=True):
     46 
     47     r"""Open file and return a stream.  Raise IOError upon failure.
     48 
     49     file is either a text or byte string giving the name (and the path
     50     if the file isn't in the current working directory) of the file to
     51     be opened or an integer file descriptor of the file to be
     52     wrapped. (If a file descriptor is given, it is closed when the
     53     returned I/O object is closed, unless closefd is set to False.)
     54 
     55     mode is an optional string that specifies the mode in which the file
     56     is opened. It defaults to 'r' which means open for reading in text
     57     mode.  Other common values are 'w' for writing (truncating the file if
     58     it already exists), and 'a' for appending (which on some Unix systems,
     59     means that all writes append to the end of the file regardless of the
     60     current seek position). In text mode, if encoding is not specified the
     61     encoding used is platform dependent. (For reading and writing raw
     62     bytes use binary mode and leave encoding unspecified.) The available
     63     modes are:
     64 
     65     ========= ===============================================================
     66     Character Meaning
     67     --------- ---------------------------------------------------------------
     68     'r'       open for reading (default)
     69     'w'       open for writing, truncating the file first
     70     'a'       open for writing, appending to the end of the file if it exists
     71     'b'       binary mode
     72     't'       text mode (default)
     73     '+'       open a disk file for updating (reading and writing)
     74     'U'       universal newline mode (for backwards compatibility; unneeded
     75               for new code)
     76     ========= ===============================================================
     77 
     78     The default mode is 'rt' (open for reading text). For binary random
     79     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
     80     'r+b' opens the file without truncation.
     81 
     82     Python distinguishes between files opened in binary and text modes,
     83     even when the underlying operating system doesn't. Files opened in
     84     binary mode (appending 'b' to the mode argument) return contents as
     85     bytes objects without any decoding. In text mode (the default, or when
     86     't' is appended to the mode argument), the contents of the file are
     87     returned as strings, the bytes having been first decoded using a
     88     platform-dependent encoding or using the specified encoding if given.
     89 
     90     buffering is an optional integer used to set the buffering policy.
     91     Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
     92     line buffering (only usable in text mode), and an integer > 1 to indicate
     93     the size of a fixed-size chunk buffer.  When no buffering argument is
     94     given, the default buffering policy works as follows:
     95 
     96     * Binary files are buffered in fixed-size chunks; the size of the buffer
     97       is chosen using a heuristic trying to determine the underlying device's
     98       "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
     99       On many systems, the buffer will typically be 4096 or 8192 bytes long.
    100 
    101     * "Interactive" text files (files for which isatty() returns True)
    102       use line buffering.  Other text files use the policy described above
    103       for binary files.
    104 
    105     encoding is the name of the encoding used to decode or encode the
    106     file. This should only be used in text mode. The default encoding is
    107     platform dependent, but any encoding supported by Python can be
    108     passed.  See the codecs module for the list of supported encodings.
    109 
    110     errors is an optional string that specifies how encoding errors are to
    111     be handled---this argument should not be used in binary mode. Pass
    112     'strict' to raise a ValueError exception if there is an encoding error
    113     (the default of None has the same effect), or pass 'ignore' to ignore
    114     errors. (Note that ignoring encoding errors can lead to data loss.)
    115     See the documentation for codecs.register for a list of the permitted
    116     encoding error strings.
    117 
    118     newline controls how universal newlines works (it only applies to text
    119     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
    120     follows:
    121 
    122     * On input, if newline is None, universal newlines mode is
    123       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
    124       these are translated into '\n' before being returned to the
    125       caller. If it is '', universal newline mode is enabled, but line
    126       endings are returned to the caller untranslated. If it has any of
    127       the other legal values, input lines are only terminated by the given
    128       string, and the line ending is returned to the caller untranslated.
    129 
    130     * On output, if newline is None, any '\n' characters written are
    131       translated to the system default line separator, os.linesep. If
    132       newline is '', no translation takes place. If newline is any of the
    133       other legal values, any '\n' characters written are translated to
    134       the given string.
    135 
    136     If closefd is False, the underlying file descriptor will be kept open
    137     when the file is closed. This does not work when a file name is given
    138     and must be True in that case.
    139 
    140     open() returns a file object whose type depends on the mode, and
    141     through which the standard file operations such as reading and writing
    142     are performed. When open() is used to open a file in a text mode ('w',
    143     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
    144     a file in a binary mode, the returned class varies: in read binary
    145     mode, it returns a BufferedReader; in write binary and append binary
    146     modes, it returns a BufferedWriter, and in read/write mode, it returns
    147     a BufferedRandom.
    148 
    149     It is also possible to use a string or bytearray as a file for both
    150     reading and writing. For strings StringIO can be used like a file
    151     opened in a text mode, and for bytes a BytesIO can be used like a file
    152     opened in a binary mode.
    153     """
    154     if not isinstance(file, (basestring, int, long)):
    155         raise TypeError("invalid file: %r" % file)
    156     if not isinstance(mode, basestring):
    157         raise TypeError("invalid mode: %r" % mode)
    158     if not isinstance(buffering, (int, long)):
    159         raise TypeError("invalid buffering: %r" % buffering)
    160     if encoding is not None and not isinstance(encoding, basestring):
    161         raise TypeError("invalid encoding: %r" % encoding)
    162     if errors is not None and not isinstance(errors, basestring):
    163         raise TypeError("invalid errors: %r" % errors)
    164     modes = set(mode)
    165     if modes - set("arwb+tU") or len(mode) > len(modes):
    166         raise ValueError("invalid mode: %r" % mode)
    167     reading = "r" in modes
    168     writing = "w" in modes
    169     appending = "a" in modes
    170     updating = "+" in modes
    171     text = "t" in modes
    172     binary = "b" in modes
    173     if "U" in modes:
    174         if writing or appending:
    175             raise ValueError("can't use U and writing mode at once")
    176         reading = True
    177     if text and binary:
    178         raise ValueError("can't have text and binary mode at once")
    179     if reading + writing + appending > 1:
    180         raise ValueError("can't have read/write/append mode at once")
    181     if not (reading or writing or appending):
    182         raise ValueError("must have exactly one of read/write/append mode")
    183     if binary and encoding is not None:
    184         raise ValueError("binary mode doesn't take an encoding argument")
    185     if binary and errors is not None:
    186         raise ValueError("binary mode doesn't take an errors argument")
    187     if binary and newline is not None:
    188         raise ValueError("binary mode doesn't take a newline argument")
    189     raw = FileIO(file,
    190                  (reading and "r" or "") +
    191                  (writing and "w" or "") +
    192                  (appending and "a" or "") +
    193                  (updating and "+" or ""),
    194                  closefd)
    195     line_buffering = False
    196     if buffering == 1 or buffering < 0 and raw.isatty():
    197         buffering = -1
    198         line_buffering = True
    199     if buffering < 0:
    200         buffering = DEFAULT_BUFFER_SIZE
    201         try:
    202             bs = os.fstat(raw.fileno()).st_blksize
    203         except (os.error, AttributeError):
    204             pass
    205         else:
    206             if bs > 1:
    207                 buffering = bs
    208     if buffering < 0:
    209         raise ValueError("invalid buffering size")
    210     if buffering == 0:
    211         if binary:
    212             return raw
    213         raise ValueError("can't have unbuffered text I/O")
    214     if updating:
    215         buffer = BufferedRandom(raw, buffering)
    216     elif writing or appending:
    217         buffer = BufferedWriter(raw, buffering)
    218     elif reading:
    219         buffer = BufferedReader(raw, buffering)
    220     else:
    221         raise ValueError("unknown mode: %r" % mode)
    222     if binary:
    223         return buffer
    224     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
    225     text.mode = mode
    226     return text
    227 
    228 
    229 class DocDescriptor:
    230     """Helper for builtins.open.__doc__
    231     """
    232     def __get__(self, obj, typ):
    233         return (
    234             "open(file, mode='r', buffering=-1, encoding=None, "
    235                  "errors=None, newline=None, closefd=True)\n\n" +
    236             open.__doc__)
    237 
    238 class OpenWrapper:
    239     """Wrapper for builtins.open
    240 
    241     Trick so that open won't become a bound method when stored
    242     as a class variable (as dbm.dumb does).
    243 
    244     See initstdio() in Python/pythonrun.c.
    245     """
    246     __doc__ = DocDescriptor()
    247 
    248     def __new__(cls, *args, **kwargs):
    249         return open(*args, **kwargs)
    250 
    251 
    252 class UnsupportedOperation(ValueError, IOError):
    253     pass
    254 
    255 
    256 class IOBase:
    257     __metaclass__ = abc.ABCMeta
    258 
    259     """The abstract base class for all I/O classes, acting on streams of
    260     bytes. There is no public constructor.
    261 
    262     This class provides dummy implementations for many methods that
    263     derived classes can override selectively; the default implementations
    264     represent a file that cannot be read, written or seeked.
    265 
    266     Even though IOBase does not declare read, readinto, or write because
    267     their signatures will vary, implementations and clients should
    268     consider those methods part of the interface. Also, implementations
    269     may raise a IOError when operations they do not support are called.
    270 
    271     The basic type used for binary data read from or written to a file is
    272     bytes. bytearrays are accepted too, and in some cases (such as
    273     readinto) needed. Text I/O classes work with str data.
    274 
    275     Note that calling any method (even inquiries) on a closed stream is
    276     undefined. Implementations may raise IOError in this case.
    277 
    278     IOBase (and its subclasses) support the iterator protocol, meaning
    279     that an IOBase object can be iterated over yielding the lines in a
    280     stream.
    281 
    282     IOBase also supports the :keyword:`with` statement. In this example,
    283     fp is closed after the suite of the with statement is complete:
    284 
    285     with open('spam.txt', 'r') as fp:
    286         fp.write('Spam and eggs!')
    287     """
    288 
    289     ### Internal ###
    290 
    291     def _unsupported(self, name):
    292         """Internal: raise an exception for unsupported operations."""
    293         raise UnsupportedOperation("%s.%s() not supported" %
    294                                    (self.__class__.__name__, name))
    295 
    296     ### Positioning ###
    297 
    298     def seek(self, pos, whence=0):
    299         """Change stream position.
    300 
    301         Change the stream position to byte offset pos. Argument pos is
    302         interpreted relative to the position indicated by whence.  Values
    303         for whence are:
    304 
    305         * 0 -- start of stream (the default); offset should be zero or positive
    306         * 1 -- current stream position; offset may be negative
    307         * 2 -- end of stream; offset is usually negative
    308 
    309         Return the new absolute position.
    310         """
    311         self._unsupported("seek")
    312 
    313     def tell(self):
    314         """Return current stream position."""
    315         return self.seek(0, 1)
    316 
    317     def truncate(self, pos=None):
    318         """Truncate file to size bytes.
    319 
    320         Size defaults to the current IO position as reported by tell().  Return
    321         the new size.
    322         """
    323         self._unsupported("truncate")
    324 
    325     ### Flush and close ###
    326 
    327     def flush(self):
    328         """Flush write buffers, if applicable.
    329 
    330         This is not implemented for read-only and non-blocking streams.
    331         """
    332         self._checkClosed()
    333         # XXX Should this return the number of bytes written???
    334 
    335     __closed = False
    336 
    337     def close(self):
    338         """Flush and close the IO object.
    339 
    340         This method has no effect if the file is already closed.
    341         """
    342         if not self.__closed:
    343             try:
    344                 self.flush()
    345             finally:
    346                 self.__closed = True
    347 
    348     def __del__(self):
    349         """Destructor.  Calls close()."""
    350         # The try/except block is in case this is called at program
    351         # exit time, when it's possible that globals have already been
    352         # deleted, and then the close() call might fail.  Since
    353         # there's nothing we can do about such failures and they annoy
    354         # the end users, we suppress the traceback.
    355         try:
    356             self.close()
    357         except:
    358             pass
    359 
    360     ### Inquiries ###
    361 
    362     def seekable(self):
    363         """Return whether object supports random access.
    364 
    365         If False, seek(), tell() and truncate() will raise IOError.
    366         This method may need to do a test seek().
    367         """
    368         return False
    369 
    370     def _checkSeekable(self, msg=None):
    371         """Internal: raise an IOError if file is not seekable
    372         """
    373         if not self.seekable():
    374             raise IOError("File or stream is not seekable."
    375                           if msg is None else msg)
    376 
    377 
    378     def readable(self):
    379         """Return whether object was opened for reading.
    380 
    381         If False, read() will raise IOError.
    382         """
    383         return False
    384 
    385     def _checkReadable(self, msg=None):
    386         """Internal: raise an IOError if file is not readable
    387         """
    388         if not self.readable():
    389             raise IOError("File or stream is not readable."
    390                           if msg is None else msg)
    391 
    392     def writable(self):
    393         """Return whether object was opened for writing.
    394 
    395         If False, write() and truncate() will raise IOError.
    396         """
    397         return False
    398 
    399     def _checkWritable(self, msg=None):
    400         """Internal: raise an IOError if file is not writable
    401         """
    402         if not self.writable():
    403             raise IOError("File or stream is not writable."
    404                           if msg is None else msg)
    405 
    406     @property
    407     def closed(self):
    408         """closed: bool.  True iff the file has been closed.
    409 
    410         For backwards compatibility, this is a property, not a predicate.
    411         """
    412         return self.__closed
    413 
    414     def _checkClosed(self, msg=None):
    415         """Internal: raise an ValueError if file is closed
    416         """
    417         if self.closed:
    418             raise ValueError("I/O operation on closed file."
    419                              if msg is None else msg)
    420 
    421     ### Context manager ###
    422 
    423     def __enter__(self):
    424         """Context management protocol.  Returns self."""
    425         self._checkClosed()
    426         return self
    427 
    428     def __exit__(self, *args):
    429         """Context management protocol.  Calls close()"""
    430         self.close()
    431 
    432     ### Lower-level APIs ###
    433 
    434     # XXX Should these be present even if unimplemented?
    435 
    436     def fileno(self):
    437         """Returns underlying file descriptor if one exists.
    438 
    439         An IOError is raised if the IO object does not use a file descriptor.
    440         """
    441         self._unsupported("fileno")
    442 
    443     def isatty(self):
    444         """Return whether this is an 'interactive' stream.
    445 
    446         Return False if it can't be determined.
    447         """
    448         self._checkClosed()
    449         return False
    450 
    451     ### Readline[s] and writelines ###
    452 
    453     def readline(self, limit=-1):
    454         r"""Read and return a line from the stream.
    455 
    456         If limit is specified, at most limit bytes will be read.
    457 
    458         The line terminator is always b'\n' for binary files; for text
    459         files, the newlines argument to open can be used to select the line
    460         terminator(s) recognized.
    461         """
    462         # For backwards compatibility, a (slowish) readline().
    463         if hasattr(self, "peek"):
    464             def nreadahead():
    465                 readahead = self.peek(1)
    466                 if not readahead:
    467                     return 1
    468                 n = (readahead.find(b"\n") + 1) or len(readahead)
    469                 if limit >= 0:
    470                     n = min(n, limit)
    471                 return n
    472         else:
    473             def nreadahead():
    474                 return 1
    475         if limit is None:
    476             limit = -1
    477         elif not isinstance(limit, (int, long)):
    478             raise TypeError("limit must be an integer")
    479         res = bytearray()
    480         while limit < 0 or len(res) < limit:
    481             b = self.read(nreadahead())
    482             if not b:
    483                 break
    484             res += b
    485             if res.endswith(b"\n"):
    486                 break
    487         return bytes(res)
    488 
    489     def __iter__(self):
    490         self._checkClosed()
    491         return self
    492 
    493     def next(self):
    494         line = self.readline()
    495         if not line:
    496             raise StopIteration
    497         return line
    498 
    499     def readlines(self, hint=None):
    500         """Return a list of lines from the stream.
    501 
    502         hint can be specified to control the number of lines read: no more
    503         lines will be read if the total size (in bytes/characters) of all
    504         lines so far exceeds hint.
    505         """
    506         if hint is not None and not isinstance(hint, (int, long)):
    507             raise TypeError("integer or None expected")
    508         if hint is None or hint <= 0:
    509             return list(self)
    510         n = 0
    511         lines = []
    512         for line in self:
    513             lines.append(line)
    514             n += len(line)
    515             if n >= hint:
    516                 break
    517         return lines
    518 
    519     def writelines(self, lines):
    520         self._checkClosed()
    521         for line in lines:
    522             self.write(line)
    523 
    524 io.IOBase.register(IOBase)
    525 
    526 
    527 class RawIOBase(IOBase):
    528 
    529     """Base class for raw binary I/O."""
    530 
    531     # The read() method is implemented by calling readinto(); derived
    532     # classes that want to support read() only need to implement
    533     # readinto() as a primitive operation.  In general, readinto() can be
    534     # more efficient than read().
    535 
    536     # (It would be tempting to also provide an implementation of
    537     # readinto() in terms of read(), in case the latter is a more suitable
    538     # primitive operation, but that would lead to nasty recursion in case
    539     # a subclass doesn't implement either.)
    540 
    541     def read(self, n=-1):
    542         """Read and return up to n bytes.
    543 
    544         Returns an empty bytes object on EOF, or None if the object is
    545         set not to block and has no data to read.
    546         """
    547         if n is None:
    548             n = -1
    549         if n < 0:
    550             return self.readall()
    551         b = bytearray(n.__index__())
    552         n = self.readinto(b)
    553         if n is None:
    554             return None
    555         del b[n:]
    556         return bytes(b)
    557 
    558     def readall(self):
    559         """Read until EOF, using multiple read() call."""
    560         res = bytearray()
    561         while True:
    562             data = self.read(DEFAULT_BUFFER_SIZE)
    563             if not data:
    564                 break
    565             res += data
    566         if res:
    567             return bytes(res)
    568         else:
    569             # b'' or None
    570             return data
    571 
    572     def readinto(self, b):
    573         """Read up to len(b) bytes into b.
    574 
    575         Returns number of bytes read (0 for EOF), or None if the object
    576         is set not to block and has no data to read.
    577         """
    578         self._unsupported("readinto")
    579 
    580     def write(self, b):
    581         """Write the given buffer to the IO stream.
    582 
    583         Returns the number of bytes written, which may be less than len(b).
    584         """
    585         self._unsupported("write")
    586 
    587 io.RawIOBase.register(RawIOBase)
    588 from _io import FileIO
    589 RawIOBase.register(FileIO)
    590 
    591 
    592 class BufferedIOBase(IOBase):
    593 
    594     """Base class for buffered IO objects.
    595 
    596     The main difference with RawIOBase is that the read() method
    597     supports omitting the size argument, and does not have a default
    598     implementation that defers to readinto().
    599 
    600     In addition, read(), readinto() and write() may raise
    601     BlockingIOError if the underlying raw stream is in non-blocking
    602     mode and not ready; unlike their raw counterparts, they will never
    603     return None.
    604 
    605     A typical implementation should not inherit from a RawIOBase
    606     implementation, but wrap one.
    607     """
    608 
    609     def read(self, n=None):
    610         """Read and return up to n bytes.
    611 
    612         If the argument is omitted, None, or negative, reads and
    613         returns all data until EOF.
    614 
    615         If the argument is positive, and the underlying raw stream is
    616         not 'interactive', multiple raw reads may be issued to satisfy
    617         the byte count (unless EOF is reached first).  But for
    618         interactive raw streams (XXX and for pipes?), at most one raw
    619         read will be issued, and a short result does not imply that
    620         EOF is imminent.
    621 
    622         Returns an empty bytes array on EOF.
    623 
    624         Raises BlockingIOError if the underlying raw stream has no
    625         data at the moment.
    626         """
    627         self._unsupported("read")
    628 
    629     def read1(self, n=None):
    630         """Read up to n bytes with at most one read() system call."""
    631         self._unsupported("read1")
    632 
    633     def readinto(self, b):
    634         """Read up to len(b) bytes into b.
    635 
    636         Like read(), this may issue multiple reads to the underlying raw
    637         stream, unless the latter is 'interactive'.
    638 
    639         Returns the number of bytes read (0 for EOF).
    640 
    641         Raises BlockingIOError if the underlying raw stream has no
    642         data at the moment.
    643         """
    644         # XXX This ought to work with anything that supports the buffer API
    645         data = self.read(len(b))
    646         n = len(data)
    647         try:
    648             b[:n] = data
    649         except TypeError as err:
    650             import array
    651             if not isinstance(b, array.array):
    652                 raise err
    653             b[:n] = array.array(b'b', data)
    654         return n
    655 
    656     def write(self, b):
    657         """Write the given buffer to the IO stream.
    658 
    659         Return the number of bytes written, which is never less than
    660         len(b).
    661 
    662         Raises BlockingIOError if the buffer is full and the
    663         underlying raw stream cannot accept more data at the moment.
    664         """
    665         self._unsupported("write")
    666 
    667     def detach(self):
    668         """
    669         Separate the underlying raw stream from the buffer and return it.
    670 
    671         After the raw stream has been detached, the buffer is in an unusable
    672         state.
    673         """
    674         self._unsupported("detach")
    675 
    676 io.BufferedIOBase.register(BufferedIOBase)
    677 
    678 
    679 class _BufferedIOMixin(BufferedIOBase):
    680 
    681     """A mixin implementation of BufferedIOBase with an underlying raw stream.
    682 
    683     This passes most requests on to the underlying raw stream.  It
    684     does *not* provide implementations of read(), readinto() or
    685     write().
    686     """
    687 
    688     def __init__(self, raw):
    689         self._raw = raw
    690 
    691     ### Positioning ###
    692 
    693     def seek(self, pos, whence=0):
    694         new_position = self.raw.seek(pos, whence)
    695         if new_position < 0:
    696             raise IOError("seek() returned an invalid position")
    697         return new_position
    698 
    699     def tell(self):
    700         pos = self.raw.tell()
    701         if pos < 0:
    702             raise IOError("tell() returned an invalid position")
    703         return pos
    704 
    705     def truncate(self, pos=None):
    706         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
    707         # and a flush may be necessary to synch both views of the current
    708         # file state.
    709         self.flush()
    710 
    711         if pos is None:
    712             pos = self.tell()
    713         # XXX: Should seek() be used, instead of passing the position
    714         # XXX  directly to truncate?
    715         return self.raw.truncate(pos)
    716 
    717     ### Flush and close ###
    718 
    719     def flush(self):
    720         if self.closed:
    721             raise ValueError("flush of closed file")
    722         self.raw.flush()
    723 
    724     def close(self):
    725         if self.raw is not None and not self.closed:
    726             try:
    727                 # may raise BlockingIOError or BrokenPipeError etc
    728                 self.flush()
    729             finally:
    730                 self.raw.close()
    731 
    732     def detach(self):
    733         if self.raw is None:
    734             raise ValueError("raw stream already detached")
    735         self.flush()
    736         raw = self._raw
    737         self._raw = None
    738         return raw
    739 
    740     ### Inquiries ###
    741 
    742     def seekable(self):
    743         return self.raw.seekable()
    744 
    745     def readable(self):
    746         return self.raw.readable()
    747 
    748     def writable(self):
    749         return self.raw.writable()
    750 
    751     @property
    752     def raw(self):
    753         return self._raw
    754 
    755     @property
    756     def closed(self):
    757         return self.raw.closed
    758 
    759     @property
    760     def name(self):
    761         return self.raw.name
    762 
    763     @property
    764     def mode(self):
    765         return self.raw.mode
    766 
    767     def __repr__(self):
    768         clsname = self.__class__.__name__
    769         try:
    770             name = self.name
    771         except AttributeError:
    772             return "<_pyio.{0}>".format(clsname)
    773         else:
    774             return "<_pyio.{0} name={1!r}>".format(clsname, name)
    775 
    776     ### Lower-level APIs ###
    777 
    778     def fileno(self):
    779         return self.raw.fileno()
    780 
    781     def isatty(self):
    782         return self.raw.isatty()
    783 
    784 
    785 class BytesIO(BufferedIOBase):
    786 
    787     """Buffered I/O implementation using an in-memory bytes buffer."""
    788 
    789     def __init__(self, initial_bytes=None):
    790         buf = bytearray()
    791         if initial_bytes is not None:
    792             buf.extend(initial_bytes)
    793         self._buffer = buf
    794         self._pos = 0
    795 
    796     def __getstate__(self):
    797         if self.closed:
    798             raise ValueError("__getstate__ on closed file")
    799         return self.__dict__.copy()
    800 
    801     def getvalue(self):
    802         """Return the bytes value (contents) of the buffer
    803         """
    804         if self.closed:
    805             raise ValueError("getvalue on closed file")
    806         return bytes(self._buffer)
    807 
    808     def read(self, n=None):
    809         if self.closed:
    810             raise ValueError("read from closed file")
    811         if n is None:
    812             n = -1
    813         if not isinstance(n, (int, long)):
    814             raise TypeError("integer argument expected, got {0!r}".format(
    815                 type(n)))
    816         if n < 0:
    817             n = len(self._buffer)
    818         if len(self._buffer) <= self._pos:
    819             return b""
    820         newpos = min(len(self._buffer), self._pos + n)
    821         b = self._buffer[self._pos : newpos]
    822         self._pos = newpos
    823         return bytes(b)
    824 
    825     def read1(self, n):
    826         """This is the same as read.
    827         """
    828         return self.read(n)
    829 
    830     def write(self, b):
    831         if self.closed:
    832             raise ValueError("write to closed file")
    833         if isinstance(b, unicode):
    834             raise TypeError("can't write unicode to binary stream")
    835         n = len(b)
    836         if n == 0:
    837             return 0
    838         pos = self._pos
    839         if pos > len(self._buffer):
    840             # Inserts null bytes between the current end of the file
    841             # and the new write position.
    842             padding = b'\x00' * (pos - len(self._buffer))
    843             self._buffer += padding
    844         self._buffer[pos:pos + n] = b
    845         self._pos += n
    846         return n
    847 
    848     def seek(self, pos, whence=0):
    849         if self.closed:
    850             raise ValueError("seek on closed file")
    851         try:
    852             pos.__index__
    853         except AttributeError:
    854             raise TypeError("an integer is required")
    855         if whence == 0:
    856             if pos < 0:
    857                 raise ValueError("negative seek position %r" % (pos,))
    858             self._pos = pos
    859         elif whence == 1:
    860             self._pos = max(0, self._pos + pos)
    861         elif whence == 2:
    862             self._pos = max(0, len(self._buffer) + pos)
    863         else:
    864             raise ValueError("invalid whence value")
    865         return self._pos
    866 
    867     def tell(self):
    868         if self.closed:
    869             raise ValueError("tell on closed file")
    870         return self._pos
    871 
    872     def truncate(self, pos=None):
    873         if self.closed:
    874             raise ValueError("truncate on closed file")
    875         if pos is None:
    876             pos = self._pos
    877         else:
    878             try:
    879                 pos.__index__
    880             except AttributeError:
    881                 raise TypeError("an integer is required")
    882             if pos < 0:
    883                 raise ValueError("negative truncate position %r" % (pos,))
    884         del self._buffer[pos:]
    885         return pos
    886 
    887     def readable(self):
    888         if self.closed:
    889             raise ValueError("I/O operation on closed file.")
    890         return True
    891 
    892     def writable(self):
    893         if self.closed:
    894             raise ValueError("I/O operation on closed file.")
    895         return True
    896 
    897     def seekable(self):
    898         if self.closed:
    899             raise ValueError("I/O operation on closed file.")
    900         return True
    901 
    902 
    903 class BufferedReader(_BufferedIOMixin):
    904 
    905     """BufferedReader(raw[, buffer_size])
    906 
    907     A buffer for a readable, sequential BaseRawIO object.
    908 
    909     The constructor creates a BufferedReader for the given readable raw
    910     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
    911     is used.
    912     """
    913 
    914     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
    915         """Create a new buffered reader using the given readable raw IO object.
    916         """
    917         if not raw.readable():
    918             raise IOError('"raw" argument must be readable.')
    919 
    920         _BufferedIOMixin.__init__(self, raw)
    921         if buffer_size <= 0:
    922             raise ValueError("invalid buffer size")
    923         self.buffer_size = buffer_size
    924         self._reset_read_buf()
    925         self._read_lock = Lock()
    926 
    927     def _reset_read_buf(self):
    928         self._read_buf = b""
    929         self._read_pos = 0
    930 
    931     def read(self, n=None):
    932         """Read n bytes.
    933 
    934         Returns exactly n bytes of data unless the underlying raw IO
    935         stream reaches EOF or if the call would block in non-blocking
    936         mode. If n is negative, read until EOF or until read() would
    937         block.
    938         """
    939         if n is not None and n < -1:
    940             raise ValueError("invalid number of bytes to read")
    941         with self._read_lock:
    942             return self._read_unlocked(n)
    943 
    944     def _read_unlocked(self, n=None):
    945         nodata_val = b""
    946         empty_values = (b"", None)
    947         buf = self._read_buf
    948         pos = self._read_pos
    949 
    950         # Special case for when the number of bytes to read is unspecified.
    951         if n is None or n == -1:
    952             self._reset_read_buf()
    953             chunks = [buf[pos:]]  # Strip the consumed bytes.
    954             current_size = 0
    955             while True:
    956                 # Read until EOF or until read() would block.
    957                 try:
    958                     chunk = self.raw.read()
    959                 except IOError as e:
    960                     if e.errno != EINTR:
    961                         raise
    962                     continue
    963                 if chunk in empty_values:
    964                     nodata_val = chunk
    965                     break
    966                 current_size += len(chunk)
    967                 chunks.append(chunk)
    968             return b"".join(chunks) or nodata_val
    969 
    970         # The number of bytes to read is specified, return at most n bytes.
    971         avail = len(buf) - pos  # Length of the available buffered data.
    972         if n <= avail:
    973             # Fast path: the data to read is fully buffered.
    974             self._read_pos += n
    975             return buf[pos:pos+n]
    976         # Slow path: read from the stream until enough bytes are read,
    977         # or until an EOF occurs or until read() would block.
    978         chunks = [buf[pos:]]
    979         wanted = max(self.buffer_size, n)
    980         while avail < n:
    981             try:
    982                 chunk = self.raw.read(wanted)
    983             except IOError as e:
    984                 if e.errno != EINTR:
    985                     raise
    986                 continue
    987             if chunk in empty_values:
    988                 nodata_val = chunk
    989                 break
    990             avail += len(chunk)
    991             chunks.append(chunk)
    992         # n is more then avail only when an EOF occurred or when
    993         # read() would have blocked.
    994         n = min(n, avail)
    995         out = b"".join(chunks)
    996         self._read_buf = out[n:]  # Save the extra data in the buffer.
    997         self._read_pos = 0
    998         return out[:n] if out else nodata_val
    999 
   1000     def peek(self, n=0):
   1001         """Returns buffered bytes without advancing the position.
   1002 
   1003         The argument indicates a desired minimal number of bytes; we
   1004         do at most one raw read to satisfy it.  We never return more
   1005         than self.buffer_size.
   1006         """
   1007         with self._read_lock:
   1008             return self._peek_unlocked(n)
   1009 
   1010     def _peek_unlocked(self, n=0):
   1011         want = min(n, self.buffer_size)
   1012         have = len(self._read_buf) - self._read_pos
   1013         if have < want or have <= 0:
   1014             to_read = self.buffer_size - have
   1015             while True:
   1016                 try:
   1017                     current = self.raw.read(to_read)
   1018                 except IOError as e:
   1019                     if e.errno != EINTR:
   1020                         raise
   1021                     continue
   1022                 break
   1023             if current:
   1024                 self._read_buf = self._read_buf[self._read_pos:] + current
   1025                 self._read_pos = 0
   1026         return self._read_buf[self._read_pos:]
   1027 
   1028     def read1(self, n):
   1029         """Reads up to n bytes, with at most one read() system call."""
   1030         # Returns up to n bytes.  If at least one byte is buffered, we
   1031         # only return buffered bytes.  Otherwise, we do one raw read.
   1032         if n < 0:
   1033             raise ValueError("number of bytes to read must be positive")
   1034         if n == 0:
   1035             return b""
   1036         with self._read_lock:
   1037             self._peek_unlocked(1)
   1038             return self._read_unlocked(
   1039                 min(n, len(self._read_buf) - self._read_pos))
   1040 
   1041     def tell(self):
   1042         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
   1043 
   1044     def seek(self, pos, whence=0):
   1045         if not (0 <= whence <= 2):
   1046             raise ValueError("invalid whence value")
   1047         with self._read_lock:
   1048             if whence == 1:
   1049                 pos -= len(self._read_buf) - self._read_pos
   1050             pos = _BufferedIOMixin.seek(self, pos, whence)
   1051             self._reset_read_buf()
   1052             return pos
   1053 
   1054 class BufferedWriter(_BufferedIOMixin):
   1055 
   1056     """A buffer for a writeable sequential RawIO object.
   1057 
   1058     The constructor creates a BufferedWriter for the given writeable raw
   1059     stream. If the buffer_size is not given, it defaults to
   1060     DEFAULT_BUFFER_SIZE.
   1061     """
   1062 
   1063     _warning_stack_offset = 2
   1064 
   1065     def __init__(self, raw,
   1066                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
   1067         if not raw.writable():
   1068             raise IOError('"raw" argument must be writable.')
   1069 
   1070         _BufferedIOMixin.__init__(self, raw)
   1071         if buffer_size <= 0:
   1072             raise ValueError("invalid buffer size")
   1073         if max_buffer_size is not None:
   1074             warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
   1075                           self._warning_stack_offset)
   1076         self.buffer_size = buffer_size
   1077         self._write_buf = bytearray()
   1078         self._write_lock = Lock()
   1079 
   1080     def write(self, b):
   1081         if self.closed:
   1082             raise ValueError("write to closed file")
   1083         if isinstance(b, unicode):
   1084             raise TypeError("can't write unicode to binary stream")
   1085         with self._write_lock:
   1086             # XXX we can implement some more tricks to try and avoid
   1087             # partial writes
   1088             if len(self._write_buf) > self.buffer_size:
   1089                 # We're full, so let's pre-flush the buffer.  (This may
   1090                 # raise BlockingIOError with characters_written == 0.)
   1091                 self._flush_unlocked()
   1092             before = len(self._write_buf)
   1093             self._write_buf.extend(b)
   1094             written = len(self._write_buf) - before
   1095             if len(self._write_buf) > self.buffer_size:
   1096                 try:
   1097                     self._flush_unlocked()
   1098                 except BlockingIOError as e:
   1099                     if len(self._write_buf) > self.buffer_size:
   1100                         # We've hit the buffer_size. We have to accept a partial
   1101                         # write and cut back our buffer.
   1102                         overage = len(self._write_buf) - self.buffer_size
   1103                         written -= overage
   1104                         self._write_buf = self._write_buf[:self.buffer_size]
   1105                         raise BlockingIOError(e.errno, e.strerror, written)
   1106             return written
   1107 
   1108     def truncate(self, pos=None):
   1109         with self._write_lock:
   1110             self._flush_unlocked()
   1111             if pos is None:
   1112                 pos = self.raw.tell()
   1113             return self.raw.truncate(pos)
   1114 
   1115     def flush(self):
   1116         with self._write_lock:
   1117             self._flush_unlocked()
   1118 
   1119     def _flush_unlocked(self):
   1120         if self.closed:
   1121             raise ValueError("flush of closed file")
   1122         while self._write_buf:
   1123             try:
   1124                 n = self.raw.write(self._write_buf)
   1125             except BlockingIOError:
   1126                 raise RuntimeError("self.raw should implement RawIOBase: it "
   1127                                    "should not raise BlockingIOError")
   1128             except IOError as e:
   1129                 if e.errno != EINTR:
   1130                     raise
   1131                 continue
   1132             if n is None:
   1133                 raise BlockingIOError(
   1134                     errno.EAGAIN,
   1135                     "write could not complete without blocking", 0)
   1136             if n > len(self._write_buf) or n < 0:
   1137                 raise IOError("write() returned incorrect number of bytes")
   1138             del self._write_buf[:n]
   1139 
   1140     def tell(self):
   1141         return _BufferedIOMixin.tell(self) + len(self._write_buf)
   1142 
   1143     def seek(self, pos, whence=0):
   1144         if not (0 <= whence <= 2):
   1145             raise ValueError("invalid whence")
   1146         with self._write_lock:
   1147             self._flush_unlocked()
   1148             return _BufferedIOMixin.seek(self, pos, whence)
   1149 
   1150 
   1151 class BufferedRWPair(BufferedIOBase):
   1152 
   1153     """A buffered reader and writer object together.
   1154 
   1155     A buffered reader object and buffered writer object put together to
   1156     form a sequential IO object that can read and write. This is typically
   1157     used with a socket or two-way pipe.
   1158 
   1159     reader and writer are RawIOBase objects that are readable and
   1160     writeable respectively. If the buffer_size is omitted it defaults to
   1161     DEFAULT_BUFFER_SIZE.
   1162     """
   1163 
   1164     # XXX The usefulness of this (compared to having two separate IO
   1165     # objects) is questionable.
   1166 
   1167     def __init__(self, reader, writer,
   1168                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
   1169         """Constructor.
   1170 
   1171         The arguments are two RawIO instances.
   1172         """
   1173         if max_buffer_size is not None:
   1174             warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
   1175 
   1176         if not reader.readable():
   1177             raise IOError('"reader" argument must be readable.')
   1178 
   1179         if not writer.writable():
   1180             raise IOError('"writer" argument must be writable.')
   1181 
   1182         self.reader = BufferedReader(reader, buffer_size)
   1183         self.writer = BufferedWriter(writer, buffer_size)
   1184 
   1185     def read(self, n=None):
   1186         if n is None:
   1187             n = -1
   1188         return self.reader.read(n)
   1189 
   1190     def readinto(self, b):
   1191         return self.reader.readinto(b)
   1192 
   1193     def write(self, b):
   1194         return self.writer.write(b)
   1195 
   1196     def peek(self, n=0):
   1197         return self.reader.peek(n)
   1198 
   1199     def read1(self, n):
   1200         return self.reader.read1(n)
   1201 
   1202     def readable(self):
   1203         return self.reader.readable()
   1204 
   1205     def writable(self):
   1206         return self.writer.writable()
   1207 
   1208     def flush(self):
   1209         return self.writer.flush()
   1210 
   1211     def close(self):
   1212         self.writer.close()
   1213         self.reader.close()
   1214 
   1215     def isatty(self):
   1216         return self.reader.isatty() or self.writer.isatty()
   1217 
   1218     @property
   1219     def closed(self):
   1220         return self.writer.closed
   1221 
   1222 
   1223 class BufferedRandom(BufferedWriter, BufferedReader):
   1224 
   1225     """A buffered interface to random access streams.
   1226 
   1227     The constructor creates a reader and writer for a seekable stream,
   1228     raw, given in the first argument. If the buffer_size is omitted it
   1229     defaults to DEFAULT_BUFFER_SIZE.
   1230     """
   1231 
   1232     _warning_stack_offset = 3
   1233 
   1234     def __init__(self, raw,
   1235                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
   1236         raw._checkSeekable()
   1237         BufferedReader.__init__(self, raw, buffer_size)
   1238         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
   1239 
   1240     def seek(self, pos, whence=0):
   1241         if not (0 <= whence <= 2):
   1242             raise ValueError("invalid whence")
   1243         self.flush()
   1244         if self._read_buf:
   1245             # Undo read ahead.
   1246             with self._read_lock:
   1247                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
   1248         # First do the raw seek, then empty the read buffer, so that
   1249         # if the raw seek fails, we don't lose buffered data forever.
   1250         pos = self.raw.seek(pos, whence)
   1251         with self._read_lock:
   1252             self._reset_read_buf()
   1253         if pos < 0:
   1254             raise IOError("seek() returned invalid position")
   1255         return pos
   1256 
   1257     def tell(self):
   1258         if self._write_buf:
   1259             return BufferedWriter.tell(self)
   1260         else:
   1261             return BufferedReader.tell(self)
   1262 
   1263     def truncate(self, pos=None):
   1264         if pos is None:
   1265             pos = self.tell()
   1266         # Use seek to flush the read buffer.
   1267         return BufferedWriter.truncate(self, pos)
   1268 
   1269     def read(self, n=None):
   1270         if n is None:
   1271             n = -1
   1272         self.flush()
   1273         return BufferedReader.read(self, n)
   1274 
   1275     def readinto(self, b):
   1276         self.flush()
   1277         return BufferedReader.readinto(self, b)
   1278 
   1279     def peek(self, n=0):
   1280         self.flush()
   1281         return BufferedReader.peek(self, n)
   1282 
   1283     def read1(self, n):
   1284         self.flush()
   1285         return BufferedReader.read1(self, n)
   1286 
   1287     def write(self, b):
   1288         if self._read_buf:
   1289             # Undo readahead
   1290             with self._read_lock:
   1291                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
   1292                 self._reset_read_buf()
   1293         return BufferedWriter.write(self, b)
   1294 
   1295 
   1296 class TextIOBase(IOBase):
   1297 
   1298     """Base class for text I/O.
   1299 
   1300     This class provides a character and line based interface to stream
   1301     I/O. There is no readinto method because Python's character strings
   1302     are immutable. There is no public constructor.
   1303     """
   1304 
   1305     def read(self, n=-1):
   1306         """Read at most n characters from stream.
   1307 
   1308         Read from underlying buffer until we have n characters or we hit EOF.
   1309         If n is negative or omitted, read until EOF.
   1310         """
   1311         self._unsupported("read")
   1312 
   1313     def write(self, s):
   1314         """Write string s to stream."""
   1315         self._unsupported("write")
   1316 
   1317     def truncate(self, pos=None):
   1318         """Truncate size to pos."""
   1319         self._unsupported("truncate")
   1320 
   1321     def readline(self):
   1322         """Read until newline or EOF.
   1323 
   1324         Returns an empty string if EOF is hit immediately.
   1325         """
   1326         self._unsupported("readline")
   1327 
   1328     def detach(self):
   1329         """
   1330         Separate the underlying buffer from the TextIOBase and return it.
   1331 
   1332         After the underlying buffer has been detached, the TextIO is in an
   1333         unusable state.
   1334         """
   1335         self._unsupported("detach")
   1336 
   1337     @property
   1338     def encoding(self):
   1339         """Subclasses should override."""
   1340         return None
   1341 
   1342     @property
   1343     def newlines(self):
   1344         """Line endings translated so far.
   1345 
   1346         Only line endings translated during reading are considered.
   1347 
   1348         Subclasses should override.
   1349         """
   1350         return None
   1351 
   1352     @property
   1353     def errors(self):
   1354         """Error setting of the decoder or encoder.
   1355 
   1356         Subclasses should override."""
   1357         return None
   1358 
   1359 io.TextIOBase.register(TextIOBase)
   1360 
   1361 
   1362 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
   1363     r"""Codec used when reading a file in universal newlines mode.  It wraps
   1364     another incremental decoder, translating \r\n and \r into \n.  It also
   1365     records the types of newlines encountered.  When used with
   1366     translate=False, it ensures that the newline sequence is returned in
   1367     one piece.
   1368     """
   1369     def __init__(self, decoder, translate, errors='strict'):
   1370         codecs.IncrementalDecoder.__init__(self, errors=errors)
   1371         self.translate = translate
   1372         self.decoder = decoder
   1373         self.seennl = 0
   1374         self.pendingcr = False
   1375 
   1376     def decode(self, input, final=False):
   1377         # decode input (with the eventual \r from a previous pass)
   1378         if self.decoder is None:
   1379             output = input
   1380         else:
   1381             output = self.decoder.decode(input, final=final)
   1382         if self.pendingcr and (output or final):
   1383             output = "\r" + output
   1384             self.pendingcr = False
   1385 
   1386         # retain last \r even when not translating data:
   1387         # then readline() is sure to get \r\n in one pass
   1388         if output.endswith("\r") and not final:
   1389             output = output[:-1]
   1390             self.pendingcr = True
   1391 
   1392         # Record which newlines are read
   1393         crlf = output.count('\r\n')
   1394         cr = output.count('\r') - crlf
   1395         lf = output.count('\n') - crlf
   1396         self.seennl |= (lf and self._LF) | (cr and self._CR) \
   1397                     | (crlf and self._CRLF)
   1398 
   1399         if self.translate:
   1400             if crlf:
   1401                 output = output.replace("\r\n", "\n")
   1402             if cr:
   1403                 output = output.replace("\r", "\n")
   1404 
   1405         return output
   1406 
   1407     def getstate(self):
   1408         if self.decoder is None:
   1409             buf = b""
   1410             flag = 0
   1411         else:
   1412             buf, flag = self.decoder.getstate()
   1413         flag <<= 1
   1414         if self.pendingcr:
   1415             flag |= 1
   1416         return buf, flag
   1417 
   1418     def setstate(self, state):
   1419         buf, flag = state
   1420         self.pendingcr = bool(flag & 1)
   1421         if self.decoder is not None:
   1422             self.decoder.setstate((buf, flag >> 1))
   1423 
   1424     def reset(self):
   1425         self.seennl = 0
   1426         self.pendingcr = False
   1427         if self.decoder is not None:
   1428             self.decoder.reset()
   1429 
   1430     _LF = 1
   1431     _CR = 2
   1432     _CRLF = 4
   1433 
   1434     @property
   1435     def newlines(self):
   1436         return (None,
   1437                 "\n",
   1438                 "\r",
   1439                 ("\r", "\n"),
   1440                 "\r\n",
   1441                 ("\n", "\r\n"),
   1442                 ("\r", "\r\n"),
   1443                 ("\r", "\n", "\r\n")
   1444                )[self.seennl]
   1445 
   1446 
   1447 class TextIOWrapper(TextIOBase):
   1448 
   1449     r"""Character and line based layer over a BufferedIOBase object, buffer.
   1450 
   1451     encoding gives the name of the encoding that the stream will be
   1452     decoded or encoded with. It defaults to locale.getpreferredencoding.
   1453 
   1454     errors determines the strictness of encoding and decoding (see the
   1455     codecs.register) and defaults to "strict".
   1456 
   1457     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
   1458     handling of line endings. If it is None, universal newlines is
   1459     enabled.  With this enabled, on input, the lines endings '\n', '\r',
   1460     or '\r\n' are translated to '\n' before being returned to the
   1461     caller. Conversely, on output, '\n' is translated to the system
   1462     default line separator, os.linesep. If newline is any other of its
   1463     legal values, that newline becomes the newline when the file is read
   1464     and it is returned untranslated. On output, '\n' is converted to the
   1465     newline.
   1466 
   1467     If line_buffering is True, a call to flush is implied when a call to
   1468     write contains a newline character.
   1469     """
   1470 
   1471     _CHUNK_SIZE = 2048
   1472 
   1473     def __init__(self, buffer, encoding=None, errors=None, newline=None,
   1474                  line_buffering=False):
   1475         if newline is not None and not isinstance(newline, basestring):
   1476             raise TypeError("illegal newline type: %r" % (type(newline),))
   1477         if newline not in (None, "", "\n", "\r", "\r\n"):
   1478             raise ValueError("illegal newline value: %r" % (newline,))
   1479         if encoding is None:
   1480             try:
   1481                 import locale
   1482             except ImportError:
   1483                 # Importing locale may fail if Python is being built
   1484                 encoding = "ascii"
   1485             else:
   1486                 encoding = locale.getpreferredencoding()
   1487 
   1488         if not isinstance(encoding, basestring):
   1489             raise ValueError("invalid encoding: %r" % encoding)
   1490 
   1491         if errors is None:
   1492             errors = "strict"
   1493         else:
   1494             if not isinstance(errors, basestring):
   1495                 raise ValueError("invalid errors: %r" % errors)
   1496 
   1497         self._buffer = buffer
   1498         self._line_buffering = line_buffering
   1499         self._encoding = encoding
   1500         self._errors = errors
   1501         self._readuniversal = not newline
   1502         self._readtranslate = newline is None
   1503         self._readnl = newline
   1504         self._writetranslate = newline != ''
   1505         self._writenl = newline or os.linesep
   1506         self._encoder = None
   1507         self._decoder = None
   1508         self._decoded_chars = ''  # buffer for text returned from decoder
   1509         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
   1510         self._snapshot = None  # info for reconstructing decoder state
   1511         self._seekable = self._telling = self.buffer.seekable()
   1512 
   1513         if self._seekable and self.writable():
   1514             position = self.buffer.tell()
   1515             if position != 0:
   1516                 try:
   1517                     self._get_encoder().setstate(0)
   1518                 except LookupError:
   1519                     # Sometimes the encoder doesn't exist
   1520                     pass
   1521 
   1522     # self._snapshot is either None, or a tuple (dec_flags, next_input)
   1523     # where dec_flags is the second (integer) item of the decoder state
   1524     # and next_input is the chunk of input bytes that comes next after the
   1525     # snapshot point.  We use this to reconstruct decoder states in tell().
   1526 
   1527     # Naming convention:
   1528     #   - "bytes_..." for integer variables that count input bytes
   1529     #   - "chars_..." for integer variables that count decoded characters
   1530 
   1531     def __repr__(self):
   1532         try:
   1533             name = self.name
   1534         except AttributeError:
   1535             return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
   1536         else:
   1537             return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
   1538                 name, self.encoding)
   1539 
   1540     @property
   1541     def encoding(self):
   1542         return self._encoding
   1543 
   1544     @property
   1545     def errors(self):
   1546         return self._errors
   1547 
   1548     @property
   1549     def line_buffering(self):
   1550         return self._line_buffering
   1551 
   1552     @property
   1553     def buffer(self):
   1554         return self._buffer
   1555 
   1556     def seekable(self):
   1557         if self.closed:
   1558             raise ValueError("I/O operation on closed file.")
   1559         return self._seekable
   1560 
   1561     def readable(self):
   1562         return self.buffer.readable()
   1563 
   1564     def writable(self):
   1565         return self.buffer.writable()
   1566 
   1567     def flush(self):
   1568         self.buffer.flush()
   1569         self._telling = self._seekable
   1570 
   1571     def close(self):
   1572         if self.buffer is not None and not self.closed:
   1573             try:
   1574                 self.flush()
   1575             finally:
   1576                 self.buffer.close()
   1577 
   1578     @property
   1579     def closed(self):
   1580         return self.buffer.closed
   1581 
   1582     @property
   1583     def name(self):
   1584         return self.buffer.name
   1585 
   1586     def fileno(self):
   1587         return self.buffer.fileno()
   1588 
   1589     def isatty(self):
   1590         return self.buffer.isatty()
   1591 
   1592     def write(self, s):
   1593         if self.closed:
   1594             raise ValueError("write to closed file")
   1595         if not isinstance(s, unicode):
   1596             raise TypeError("can't write %s to text stream" %
   1597                             s.__class__.__name__)
   1598         length = len(s)
   1599         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
   1600         if haslf and self._writetranslate and self._writenl != "\n":
   1601             s = s.replace("\n", self._writenl)
   1602         encoder = self._encoder or self._get_encoder()
   1603         # XXX What if we were just reading?
   1604         b = encoder.encode(s)
   1605         self.buffer.write(b)
   1606         if self._line_buffering and (haslf or "\r" in s):
   1607             self.flush()
   1608         self._snapshot = None
   1609         if self._decoder:
   1610             self._decoder.reset()
   1611         return length
   1612 
   1613     def _get_encoder(self):
   1614         make_encoder = codecs.getincrementalencoder(self._encoding)
   1615         self._encoder = make_encoder(self._errors)
   1616         return self._encoder
   1617 
   1618     def _get_decoder(self):
   1619         make_decoder = codecs.getincrementaldecoder(self._encoding)
   1620         decoder = make_decoder(self._errors)
   1621         if self._readuniversal:
   1622             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
   1623         self._decoder = decoder
   1624         return decoder
   1625 
   1626     # The following three methods implement an ADT for _decoded_chars.
   1627     # Text returned from the decoder is buffered here until the client
   1628     # requests it by calling our read() or readline() method.
   1629     def _set_decoded_chars(self, chars):
   1630         """Set the _decoded_chars buffer."""
   1631         self._decoded_chars = chars
   1632         self._decoded_chars_used = 0
   1633 
   1634     def _get_decoded_chars(self, n=None):
   1635         """Advance into the _decoded_chars buffer."""
   1636         offset = self._decoded_chars_used
   1637         if n is None:
   1638             chars = self._decoded_chars[offset:]
   1639         else:
   1640             chars = self._decoded_chars[offset:offset + n]
   1641         self._decoded_chars_used += len(chars)
   1642         return chars
   1643 
   1644     def _rewind_decoded_chars(self, n):
   1645         """Rewind the _decoded_chars buffer."""
   1646         if self._decoded_chars_used < n:
   1647             raise AssertionError("rewind decoded_chars out of bounds")
   1648         self._decoded_chars_used -= n
   1649 
   1650     def _read_chunk(self):
   1651         """
   1652         Read and decode the next chunk of data from the BufferedReader.
   1653         """
   1654 
   1655         # The return value is True unless EOF was reached.  The decoded
   1656         # string is placed in self._decoded_chars (replacing its previous
   1657         # value).  The entire input chunk is sent to the decoder, though
   1658         # some of it may remain buffered in the decoder, yet to be
   1659         # converted.
   1660 
   1661         if self._decoder is None:
   1662             raise ValueError("no decoder")
   1663 
   1664         if self._telling:
   1665             # To prepare for tell(), we need to snapshot a point in the
   1666             # file where the decoder's input buffer is empty.
   1667 
   1668             dec_buffer, dec_flags = self._decoder.getstate()
   1669             # Given this, we know there was a valid snapshot point
   1670             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
   1671 
   1672         # Read a chunk, decode it, and put the result in self._decoded_chars.
   1673         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
   1674         eof = not input_chunk
   1675         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
   1676 
   1677         if self._telling:
   1678             # At the snapshot point, len(dec_buffer) bytes before the read,
   1679             # the next input to be decoded is dec_buffer + input_chunk.
   1680             self._snapshot = (dec_flags, dec_buffer + input_chunk)
   1681 
   1682         return not eof
   1683 
   1684     def _pack_cookie(self, position, dec_flags=0,
   1685                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
   1686         # The meaning of a tell() cookie is: seek to position, set the
   1687         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
   1688         # into the decoder with need_eof as the EOF flag, then skip
   1689         # chars_to_skip characters of the decoded result.  For most simple
   1690         # decoders, tell() will often just give a byte offset in the file.
   1691         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
   1692                (chars_to_skip<<192) | bool(need_eof)<<256)
   1693 
   1694     def _unpack_cookie(self, bigint):
   1695         rest, position = divmod(bigint, 1<<64)
   1696         rest, dec_flags = divmod(rest, 1<<64)
   1697         rest, bytes_to_feed = divmod(rest, 1<<64)
   1698         need_eof, chars_to_skip = divmod(rest, 1<<64)
   1699         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
   1700 
   1701     def tell(self):
   1702         if not self._seekable:
   1703             raise IOError("underlying stream is not seekable")
   1704         if not self._telling:
   1705             raise IOError("telling position disabled by next() call")
   1706         self.flush()
   1707         position = self.buffer.tell()
   1708         decoder = self._decoder
   1709         if decoder is None or self._snapshot is None:
   1710             if self._decoded_chars:
   1711                 # This should never happen.
   1712                 raise AssertionError("pending decoded text")
   1713             return position
   1714 
   1715         # Skip backward to the snapshot point (see _read_chunk).
   1716         dec_flags, next_input = self._snapshot
   1717         position -= len(next_input)
   1718 
   1719         # How many decoded characters have been used up since the snapshot?
   1720         chars_to_skip = self._decoded_chars_used
   1721         if chars_to_skip == 0:
   1722             # We haven't moved from the snapshot point.
   1723             return self._pack_cookie(position, dec_flags)
   1724 
   1725         # Starting from the snapshot position, we will walk the decoder
   1726         # forward until it gives us enough decoded characters.
   1727         saved_state = decoder.getstate()
   1728         try:
   1729             # Note our initial start point.
   1730             decoder.setstate((b'', dec_flags))
   1731             start_pos = position
   1732             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
   1733             need_eof = 0
   1734 
   1735             # Feed the decoder one byte at a time.  As we go, note the
   1736             # nearest "safe start point" before the current location
   1737             # (a point where the decoder has nothing buffered, so seek()
   1738             # can safely start from there and advance to this location).
   1739             for next_byte in next_input:
   1740                 bytes_fed += 1
   1741                 chars_decoded += len(decoder.decode(next_byte))
   1742                 dec_buffer, dec_flags = decoder.getstate()
   1743                 if not dec_buffer and chars_decoded <= chars_to_skip:
   1744                     # Decoder buffer is empty, so this is a safe start point.
   1745                     start_pos += bytes_fed
   1746                     chars_to_skip -= chars_decoded
   1747                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
   1748                 if chars_decoded >= chars_to_skip:
   1749                     break
   1750             else:
   1751                 # We didn't get enough decoded data; signal EOF to get more.
   1752                 chars_decoded += len(decoder.decode(b'', final=True))
   1753                 need_eof = 1
   1754                 if chars_decoded < chars_to_skip:
   1755                     raise IOError("can't reconstruct logical file position")
   1756 
   1757             # The returned cookie corresponds to the last safe start point.
   1758             return self._pack_cookie(
   1759                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
   1760         finally:
   1761             decoder.setstate(saved_state)
   1762 
   1763     def truncate(self, pos=None):
   1764         self.flush()
   1765         if pos is None:
   1766             pos = self.tell()
   1767         return self.buffer.truncate(pos)
   1768 
   1769     def detach(self):
   1770         if self.buffer is None:
   1771             raise ValueError("buffer is already detached")
   1772         self.flush()
   1773         buffer = self._buffer
   1774         self._buffer = None
   1775         return buffer
   1776 
   1777     def seek(self, cookie, whence=0):
   1778         if self.closed:
   1779             raise ValueError("tell on closed file")
   1780         if not self._seekable:
   1781             raise IOError("underlying stream is not seekable")
   1782         if whence == 1: # seek relative to current position
   1783             if cookie != 0:
   1784                 raise IOError("can't do nonzero cur-relative seeks")
   1785             # Seeking to the current position should attempt to
   1786             # sync the underlying buffer with the current position.
   1787             whence = 0
   1788             cookie = self.tell()
   1789         if whence == 2: # seek relative to end of file
   1790             if cookie != 0:
   1791                 raise IOError("can't do nonzero end-relative seeks")
   1792             self.flush()
   1793             position = self.buffer.seek(0, 2)
   1794             self._set_decoded_chars('')
   1795             self._snapshot = None
   1796             if self._decoder:
   1797                 self._decoder.reset()
   1798             return position
   1799         if whence != 0:
   1800             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
   1801                              (whence,))
   1802         if cookie < 0:
   1803             raise ValueError("negative seek position %r" % (cookie,))
   1804         self.flush()
   1805 
   1806         # The strategy of seek() is to go back to the safe start point
   1807         # and replay the effect of read(chars_to_skip) from there.
   1808         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
   1809             self._unpack_cookie(cookie)
   1810 
   1811         # Seek back to the safe start point.
   1812         self.buffer.seek(start_pos)
   1813         self._set_decoded_chars('')
   1814         self._snapshot = None
   1815 
   1816         # Restore the decoder to its state from the safe start point.
   1817         if cookie == 0 and self._decoder:
   1818             self._decoder.reset()
   1819         elif self._decoder or dec_flags or chars_to_skip:
   1820             self._decoder = self._decoder or self._get_decoder()
   1821             self._decoder.setstate((b'', dec_flags))
   1822             self._snapshot = (dec_flags, b'')
   1823 
   1824         if chars_to_skip:
   1825             # Just like _read_chunk, feed the decoder and save a snapshot.
   1826             input_chunk = self.buffer.read(bytes_to_feed)
   1827             self._set_decoded_chars(
   1828                 self._decoder.decode(input_chunk, need_eof))
   1829             self._snapshot = (dec_flags, input_chunk)
   1830 
   1831             # Skip chars_to_skip of the decoded characters.
   1832             if len(self._decoded_chars) < chars_to_skip:
   1833                 raise IOError("can't restore logical file position")
   1834             self._decoded_chars_used = chars_to_skip
   1835 
   1836         # Finally, reset the encoder (merely useful for proper BOM handling)
   1837         try:
   1838             encoder = self._encoder or self._get_encoder()
   1839         except LookupError:
   1840             # Sometimes the encoder doesn't exist
   1841             pass
   1842         else:
   1843             if cookie != 0:
   1844                 encoder.setstate(0)
   1845             else:
   1846                 encoder.reset()
   1847         return cookie
   1848 
   1849     def read(self, n=None):
   1850         self._checkReadable()
   1851         if n is None:
   1852             n = -1
   1853         decoder = self._decoder or self._get_decoder()
   1854         try:
   1855             n.__index__
   1856         except AttributeError:
   1857             raise TypeError("an integer is required")
   1858         if n < 0:
   1859             # Read everything.
   1860             result = (self._get_decoded_chars() +
   1861                       decoder.decode(self.buffer.read(), final=True))
   1862             self._set_decoded_chars('')
   1863             self._snapshot = None
   1864             return result
   1865         else:
   1866             # Keep reading chunks until we have n characters to return.
   1867             eof = False
   1868             result = self._get_decoded_chars(n)
   1869             while len(result) < n and not eof:
   1870                 eof = not self._read_chunk()
   1871                 result += self._get_decoded_chars(n - len(result))
   1872             return result
   1873 
   1874     def next(self):
   1875         self._telling = False
   1876         line = self.readline()
   1877         if not line:
   1878             self._snapshot = None
   1879             self._telling = self._seekable
   1880             raise StopIteration
   1881         return line
   1882 
   1883     def readline(self, limit=None):
   1884         if self.closed:
   1885             raise ValueError("read from closed file")
   1886         if limit is None:
   1887             limit = -1
   1888         elif not isinstance(limit, (int, long)):
   1889             raise TypeError("limit must be an integer")
   1890 
   1891         # Grab all the decoded text (we will rewind any extra bits later).
   1892         line = self._get_decoded_chars()
   1893 
   1894         start = 0
   1895         # Make the decoder if it doesn't already exist.
   1896         if not self._decoder:
   1897             self._get_decoder()
   1898 
   1899         pos = endpos = None
   1900         while True:
   1901             if self._readtranslate:
   1902                 # Newlines are already translated, only search for \n
   1903                 pos = line.find('\n', start)
   1904                 if pos >= 0:
   1905                     endpos = pos + 1
   1906                     break
   1907                 else:
   1908                     start = len(line)
   1909 
   1910             elif self._readuniversal:
   1911                 # Universal newline search. Find any of \r, \r\n, \n
   1912                 # The decoder ensures that \r\n are not split in two pieces
   1913 
   1914                 # In C we'd look for these in parallel of course.
   1915                 nlpos = line.find("\n", start)
   1916                 crpos = line.find("\r", start)
   1917                 if crpos == -1:
   1918                     if nlpos == -1:
   1919                         # Nothing found
   1920                         start = len(line)
   1921                     else:
   1922                         # Found \n
   1923                         endpos = nlpos + 1
   1924                         break
   1925                 elif nlpos == -1:
   1926                     # Found lone \r
   1927                     endpos = crpos + 1
   1928                     break
   1929                 elif nlpos < crpos:
   1930                     # Found \n
   1931                     endpos = nlpos + 1
   1932                     break
   1933                 elif nlpos == crpos + 1:
   1934                     # Found \r\n
   1935                     endpos = crpos + 2
   1936                     break
   1937                 else:
   1938                     # Found \r
   1939                     endpos = crpos + 1
   1940                     break
   1941             else:
   1942                 # non-universal
   1943                 pos = line.find(self._readnl)
   1944                 if pos >= 0:
   1945                     endpos = pos + len(self._readnl)
   1946                     break
   1947 
   1948             if limit >= 0 and len(line) >= limit:
   1949                 endpos = limit  # reached length limit
   1950                 break
   1951 
   1952             # No line ending seen yet - get more data'
   1953             while self._read_chunk():
   1954                 if self._decoded_chars:
   1955                     break
   1956             if self._decoded_chars:
   1957                 line += self._get_decoded_chars()
   1958             else:
   1959                 # end of file
   1960                 self._set_decoded_chars('')
   1961                 self._snapshot = None
   1962                 return line
   1963 
   1964         if limit >= 0 and endpos > limit:
   1965             endpos = limit  # don't exceed limit
   1966 
   1967         # Rewind _decoded_chars to just after the line ending we found.
   1968         self._rewind_decoded_chars(len(line) - endpos)
   1969         return line[:endpos]
   1970 
   1971     @property
   1972     def newlines(self):
   1973         return self._decoder.newlines if self._decoder else None
   1974 
   1975 
   1976 class StringIO(TextIOWrapper):
   1977     """Text I/O implementation using an in-memory buffer.
   1978 
   1979     The initial_value argument sets the value of object.  The newline
   1980     argument is like the one of TextIOWrapper's constructor.
   1981     """
   1982 
   1983     def __init__(self, initial_value="", newline="\n"):
   1984         super(StringIO, self).__init__(BytesIO(),
   1985                                        encoding="utf-8",
   1986                                        errors="strict",
   1987                                        newline=newline)
   1988         # Issue #5645: make universal newlines semantics the same as in the
   1989         # C version, even under Windows.
   1990         if newline is None:
   1991             self._writetranslate = False
   1992         if initial_value:
   1993             if not isinstance(initial_value, unicode):
   1994                 initial_value = unicode(initial_value)
   1995             self.write(initial_value)
   1996             self.seek(0)
   1997 
   1998     def getvalue(self):
   1999         self.flush()
   2000         return self.buffer.getvalue().decode(self._encoding, self._errors)
   2001 
   2002     def __repr__(self):
   2003         # TextIOWrapper tells the encoding in its repr. In StringIO,
   2004         # that's a implementation detail.
   2005         return object.__repr__(self)
   2006 
   2007     @property
   2008     def errors(self):
   2009         return None
   2010 
   2011     @property
   2012     def encoding(self):
   2013         return None
   2014 
   2015     def detach(self):
   2016         # This doesn't make sense on StringIO.
   2017         self._unsupported("detach")
   2018