Home | History | Annotate | Download | only in Lib
      1 """Helper class to quickly write a loop over all standard input files.
      2 
      3 Typical use is:
      4 
      5     import fileinput
      6     for line in fileinput.input():
      7         process(line)
      8 
      9 This iterates over the lines of all files listed in sys.argv[1:],
     10 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
     11 is also replaced by sys.stdin.  To specify an alternative list of
     12 filenames, pass it as the argument to input().  A single file name is
     13 also allowed.
     14 
     15 Functions filename(), lineno() return the filename and cumulative line
     16 number of the line that has just been read; filelineno() returns its
     17 line number in the current file; isfirstline() returns true iff the
     18 line just read is the first line of its file; isstdin() returns true
     19 iff the line was read from sys.stdin.  Function nextfile() closes the
     20 current file so that the next iteration will read the first line from
     21 the next file (if any); lines not read from the file will not count
     22 towards the cumulative line count; the filename is not changed until
     23 after the first line of the next file has been read.  Function close()
     24 closes the sequence.
     25 
     26 Before any lines have been read, filename() returns None and both line
     27 numbers are zero; nextfile() has no effect.  After all lines have been
     28 read, filename() and the line number functions return the values
     29 pertaining to the last line read; nextfile() has no effect.
     30 
     31 All files are opened in text mode by default, you can override this by
     32 setting the mode parameter to input() or FileInput.__init__().
     33 If an I/O error occurs during opening or reading a file, the OSError
     34 exception is raised.
     35 
     36 If sys.stdin is used more than once, the second and further use will
     37 return no lines, except perhaps for interactive use, or if it has been
     38 explicitly reset (e.g. using sys.stdin.seek(0)).
     39 
     40 Empty files are opened and immediately closed; the only time their
     41 presence in the list of filenames is noticeable at all is when the
     42 last file opened is empty.
     43 
     44 It is possible that the last line of a file doesn't end in a newline
     45 character; otherwise lines are returned including the trailing
     46 newline.
     47 
     48 Class FileInput is the implementation; its methods filename(),
     49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
     50 correspond to the functions in the module.  In addition it has a
     51 readline() method which returns the next input line, and a
     52 __getitem__() method which implements the sequence behavior.  The
     53 sequence must be accessed in strictly sequential order; sequence
     54 access and readline() cannot be mixed.
     55 
     56 Optional in-place filtering: if the keyword argument inplace=1 is
     57 passed to input() or to the FileInput constructor, the file is moved
     58 to a backup file and standard output is directed to the input file.
     59 This makes it possible to write a filter that rewrites its input file
     60 in place.  If the keyword argument backup=".<some extension>" is also
     61 given, it specifies the extension for the backup file, and the backup
     62 file remains around; by default, the extension is ".bak" and it is
     63 deleted when the output file is closed.  In-place filtering is
     64 disabled when standard input is read.  XXX The current implementation
     65 does not work for MS-DOS 8+3 filesystems.
     66 
     67 XXX Possible additions:
     68 
     69 - optional getopt argument processing
     70 - isatty()
     71 - read(), read(size), even readlines()
     72 
     73 """
     74 
     75 import sys, os
     76 
     77 __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
     78            "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
     79            "hook_encoded"]
     80 
     81 _state = None
     82 
     83 def input(files=None, inplace=False, backup="", bufsize=0,
     84           mode="r", openhook=None):
     85     """Return an instance of the FileInput class, which can be iterated.
     86 
     87     The parameters are passed to the constructor of the FileInput class.
     88     The returned instance, in addition to being an iterator,
     89     keeps global state for the functions of this module,.
     90     """
     91     global _state
     92     if _state and _state._file:
     93         raise RuntimeError("input() already active")
     94     _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
     95     return _state
     96 
     97 def close():
     98     """Close the sequence."""
     99     global _state
    100     state = _state
    101     _state = None
    102     if state:
    103         state.close()
    104 
    105 def nextfile():
    106     """
    107     Close the current file so that the next iteration will read the first
    108     line from the next file (if any); lines not read from the file will
    109     not count towards the cumulative line count. The filename is not
    110     changed until after the first line of the next file has been read.
    111     Before the first line has been read, this function has no effect;
    112     it cannot be used to skip the first file. After the last line of the
    113     last file has been read, this function has no effect.
    114     """
    115     if not _state:
    116         raise RuntimeError("no active input()")
    117     return _state.nextfile()
    118 
    119 def filename():
    120     """
    121     Return the name of the file currently being read.
    122     Before the first line has been read, returns None.
    123     """
    124     if not _state:
    125         raise RuntimeError("no active input()")
    126     return _state.filename()
    127 
    128 def lineno():
    129     """
    130     Return the cumulative line number of the line that has just been read.
    131     Before the first line has been read, returns 0. After the last line
    132     of the last file has been read, returns the line number of that line.
    133     """
    134     if not _state:
    135         raise RuntimeError("no active input()")
    136     return _state.lineno()
    137 
    138 def filelineno():
    139     """
    140     Return the line number in the current file. Before the first line
    141     has been read, returns 0. After the last line of the last file has
    142     been read, returns the line number of that line within the file.
    143     """
    144     if not _state:
    145         raise RuntimeError("no active input()")
    146     return _state.filelineno()
    147 
    148 def fileno():
    149     """
    150     Return the file number of the current file. When no file is currently
    151     opened, returns -1.
    152     """
    153     if not _state:
    154         raise RuntimeError("no active input()")
    155     return _state.fileno()
    156 
    157 def isfirstline():
    158     """
    159     Returns true the line just read is the first line of its file,
    160     otherwise returns false.
    161     """
    162     if not _state:
    163         raise RuntimeError("no active input()")
    164     return _state.isfirstline()
    165 
    166 def isstdin():
    167     """
    168     Returns true if the last line was read from sys.stdin,
    169     otherwise returns false.
    170     """
    171     if not _state:
    172         raise RuntimeError("no active input()")
    173     return _state.isstdin()
    174 
    175 class FileInput:
    176     """FileInput([files[, inplace[, backup[, bufsize, [, mode[, openhook]]]]]])
    177 
    178     Class FileInput is the implementation of the module; its methods
    179     filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
    180     nextfile() and close() correspond to the functions of the same name
    181     in the module.
    182     In addition it has a readline() method which returns the next
    183     input line, and a __getitem__() method which implements the
    184     sequence behavior. The sequence must be accessed in strictly
    185     sequential order; random access and readline() cannot be mixed.
    186     """
    187 
    188     def __init__(self, files=None, inplace=False, backup="", bufsize=0,
    189                  mode="r", openhook=None):
    190         if isinstance(files, str):
    191             files = (files,)
    192         else:
    193             if files is None:
    194                 files = sys.argv[1:]
    195             if not files:
    196                 files = ('-',)
    197             else:
    198                 files = tuple(files)
    199         self._files = files
    200         self._inplace = inplace
    201         self._backup = backup
    202         if bufsize:
    203             import warnings
    204             warnings.warn('bufsize is deprecated and ignored',
    205                           DeprecationWarning, stacklevel=2)
    206         self._savestdout = None
    207         self._output = None
    208         self._filename = None
    209         self._startlineno = 0
    210         self._filelineno = 0
    211         self._file = None
    212         self._isstdin = False
    213         self._backupfilename = None
    214         # restrict mode argument to reading modes
    215         if mode not in ('r', 'rU', 'U', 'rb'):
    216             raise ValueError("FileInput opening mode must be one of "
    217                              "'r', 'rU', 'U' and 'rb'")
    218         if 'U' in mode:
    219             import warnings
    220             warnings.warn("'U' mode is deprecated",
    221                           DeprecationWarning, 2)
    222         self._mode = mode
    223         if openhook:
    224             if inplace:
    225                 raise ValueError("FileInput cannot use an opening hook in inplace mode")
    226             if not callable(openhook):
    227                 raise ValueError("FileInput openhook must be callable")
    228         self._openhook = openhook
    229 
    230     def __del__(self):
    231         self.close()
    232 
    233     def close(self):
    234         try:
    235             self.nextfile()
    236         finally:
    237             self._files = ()
    238 
    239     def __enter__(self):
    240         return self
    241 
    242     def __exit__(self, type, value, traceback):
    243         self.close()
    244 
    245     def __iter__(self):
    246         return self
    247 
    248     def __next__(self):
    249         while True:
    250             line = self._readline()
    251             if line:
    252                 self._filelineno += 1
    253                 return line
    254             if not self._file:
    255                 raise StopIteration
    256             self.nextfile()
    257             # repeat with next file
    258 
    259     def __getitem__(self, i):
    260         if i != self.lineno():
    261             raise RuntimeError("accessing lines out of order")
    262         try:
    263             return self.__next__()
    264         except StopIteration:
    265             raise IndexError("end of input reached")
    266 
    267     def nextfile(self):
    268         savestdout = self._savestdout
    269         self._savestdout = None
    270         if savestdout:
    271             sys.stdout = savestdout
    272 
    273         output = self._output
    274         self._output = None
    275         try:
    276             if output:
    277                 output.close()
    278         finally:
    279             file = self._file
    280             self._file = None
    281             try:
    282                 del self._readline  # restore FileInput._readline
    283             except AttributeError:
    284                 pass
    285             try:
    286                 if file and not self._isstdin:
    287                     file.close()
    288             finally:
    289                 backupfilename = self._backupfilename
    290                 self._backupfilename = None
    291                 if backupfilename and not self._backup:
    292                     try: os.unlink(backupfilename)
    293                     except OSError: pass
    294 
    295                 self._isstdin = False
    296 
    297     def readline(self):
    298         while True:
    299             line = self._readline()
    300             if line:
    301                 self._filelineno += 1
    302                 return line
    303             if not self._file:
    304                 return line
    305             self.nextfile()
    306             # repeat with next file
    307 
    308     def _readline(self):
    309         if not self._files:
    310             if 'b' in self._mode:
    311                 return b''
    312             else:
    313                 return ''
    314         self._filename = self._files[0]
    315         self._files = self._files[1:]
    316         self._startlineno = self.lineno()
    317         self._filelineno = 0
    318         self._file = None
    319         self._isstdin = False
    320         self._backupfilename = 0
    321         if self._filename == '-':
    322             self._filename = '<stdin>'
    323             if 'b' in self._mode:
    324                 self._file = getattr(sys.stdin, 'buffer', sys.stdin)
    325             else:
    326                 self._file = sys.stdin
    327             self._isstdin = True
    328         else:
    329             if self._inplace:
    330                 self._backupfilename = (
    331                     self._filename + (self._backup or ".bak"))
    332                 try:
    333                     os.unlink(self._backupfilename)
    334                 except OSError:
    335                     pass
    336                 # The next few lines may raise OSError
    337                 os.rename(self._filename, self._backupfilename)
    338                 self._file = open(self._backupfilename, self._mode)
    339                 try:
    340                     perm = os.fstat(self._file.fileno()).st_mode
    341                 except OSError:
    342                     self._output = open(self._filename, "w")
    343                 else:
    344                     mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
    345                     if hasattr(os, 'O_BINARY'):
    346                         mode |= os.O_BINARY
    347 
    348                     fd = os.open(self._filename, mode, perm)
    349                     self._output = os.fdopen(fd, "w")
    350                     try:
    351                         if hasattr(os, 'chmod'):
    352                             os.chmod(self._filename, perm)
    353                     except OSError:
    354                         pass
    355                 self._savestdout = sys.stdout
    356                 sys.stdout = self._output
    357             else:
    358                 # This may raise OSError
    359                 if self._openhook:
    360                     self._file = self._openhook(self._filename, self._mode)
    361                 else:
    362                     self._file = open(self._filename, self._mode)
    363         self._readline = self._file.readline  # hide FileInput._readline
    364         return self._readline()
    365 
    366     def filename(self):
    367         return self._filename
    368 
    369     def lineno(self):
    370         return self._startlineno + self._filelineno
    371 
    372     def filelineno(self):
    373         return self._filelineno
    374 
    375     def fileno(self):
    376         if self._file:
    377             try:
    378                 return self._file.fileno()
    379             except ValueError:
    380                 return -1
    381         else:
    382             return -1
    383 
    384     def isfirstline(self):
    385         return self._filelineno == 1
    386 
    387     def isstdin(self):
    388         return self._isstdin
    389 
    390 
    391 def hook_compressed(filename, mode):
    392     ext = os.path.splitext(filename)[1]
    393     if ext == '.gz':
    394         import gzip
    395         return gzip.open(filename, mode)
    396     elif ext == '.bz2':
    397         import bz2
    398         return bz2.BZ2File(filename, mode)
    399     else:
    400         return open(filename, mode)
    401 
    402 
    403 def hook_encoded(encoding, errors=None):
    404     def openhook(filename, mode):
    405         return open(filename, mode, encoding=encoding, errors=errors)
    406     return openhook
    407 
    408 
    409 def _test():
    410     import getopt
    411     inplace = False
    412     backup = False
    413     opts, args = getopt.getopt(sys.argv[1:], "ib:")
    414     for o, a in opts:
    415         if o == '-i': inplace = True
    416         if o == '-b': backup = a
    417     for line in input(args, inplace=inplace, backup=backup):
    418         if line[-1:] == '\n': line = line[:-1]
    419         if line[-1:] == '\r': line = line[:-1]
    420         print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
    421                                    isfirstline() and "*" or "", line))
    422     print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
    423 
    424 if __name__ == '__main__':
    425     _test()
    426