Home | History | Annotate | Download | only in Lib
      1 """Helper class to quickly write a loop over all standard input files.
      2 
      3 Typical use is:
      4 
      5     import fileinput
      6     for line in fileinput.input():
      7         process(line)
      8 
      9 This iterates over the lines of all files listed in sys.argv[1:],
     10 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
     11 is also replaced by sys.stdin.  To specify an alternative list of
     12 filenames, pass it as the argument to input().  A single file name is
     13 also allowed.
     14 
     15 Functions filename(), lineno() return the filename and cumulative line
     16 number of the line that has just been read; filelineno() returns its
     17 line number in the current file; isfirstline() returns true iff the
     18 line just read is the first line of its file; isstdin() returns true
     19 iff the line was read from sys.stdin.  Function nextfile() closes the
     20 current file so that the next iteration will read the first line from
     21 the next file (if any); lines not read from the file will not count
     22 towards the cumulative line count; the filename is not changed until
     23 after the first line of the next file has been read.  Function close()
     24 closes the sequence.
     25 
     26 Before any lines have been read, filename() returns None and both line
     27 numbers are zero; nextfile() has no effect.  After all lines have been
     28 read, filename() and the line number functions return the values
     29 pertaining to the last line read; nextfile() has no effect.
     30 
     31 All files are opened in text mode by default, you can override this by
     32 setting the mode parameter to input() or FileInput.__init__().
     33 If an I/O error occurs during opening or reading a file, the IOError
     34 exception is raised.
     35 
     36 If sys.stdin is used more than once, the second and further use will
     37 return no lines, except perhaps for interactive use, or if it has been
     38 explicitly reset (e.g. using sys.stdin.seek(0)).
     39 
     40 Empty files are opened and immediately closed; the only time their
     41 presence in the list of filenames is noticeable at all is when the
     42 last file opened is empty.
     43 
     44 It is possible that the last line of a file doesn't end in a newline
     45 character; otherwise lines are returned including the trailing
     46 newline.
     47 
     48 Class FileInput is the implementation; its methods filename(),
     49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
     50 correspond to the functions in the module.  In addition it has a
     51 readline() method which returns the next input line, and a
     52 __getitem__() method which implements the sequence behavior.  The
     53 sequence must be accessed in strictly sequential order; sequence
     54 access and readline() cannot be mixed.
     55 
     56 Optional in-place filtering: if the keyword argument inplace=1 is
     57 passed to input() or to the FileInput constructor, the file is moved
     58 to a backup file and standard output is directed to the input file.
     59 This makes it possible to write a filter that rewrites its input file
     60 in place.  If the keyword argument backup=".<some extension>" is also
     61 given, it specifies the extension for the backup file, and the backup
     62 file remains around; by default, the extension is ".bak" and it is
     63 deleted when the output file is closed.  In-place filtering is
     64 disabled when standard input is read.  XXX The current implementation
     65 does not work for MS-DOS 8+3 filesystems.
     66 
     67 Performance: this module is unfortunately one of the slower ways of
     68 processing large numbers of input lines.  Nevertheless, a significant
     69 speed-up has been obtained by using readlines(bufsize) instead of
     70 readline().  A new keyword argument, bufsize=N, is present on the
     71 input() function and the FileInput() class to override the default
     72 buffer size.
     73 
     74 XXX Possible additions:
     75 
     76 - optional getopt argument processing
     77 - isatty()
     78 - read(), read(size), even readlines()
     79 
     80 """
     81 
     82 import sys, os
     83 
     84 __all__ = ["input","close","nextfile","filename","lineno","filelineno",
     85            "isfirstline","isstdin","FileInput"]
     86 
     87 _state = None
     88 
     89 DEFAULT_BUFSIZE = 8*1024
     90 
     91 def input(files=None, inplace=0, backup="", bufsize=0,
     92           mode="r", openhook=None):
     93     """Return an instance of the FileInput class, which can be iterated.
     94 
     95     The parameters are passed to the constructor of the FileInput class.
     96     The returned instance, in addition to being an iterator,
     97     keeps global state for the functions of this module,.
     98     """
     99     global _state
    100     if _state and _state._file:
    101         raise RuntimeError, "input() already active"
    102     _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
    103     return _state
    104 
    105 def close():
    106     """Close the sequence."""
    107     global _state
    108     state = _state
    109     _state = None
    110     if state:
    111         state.close()
    112 
    113 def nextfile():
    114     """
    115     Close the current file so that the next iteration will read the first
    116     line from the next file (if any); lines not read from the file will
    117     not count towards the cumulative line count. The filename is not
    118     changed until after the first line of the next file has been read.
    119     Before the first line has been read, this function has no effect;
    120     it cannot be used to skip the first file. After the last line of the
    121     last file has been read, this function has no effect.
    122     """
    123     if not _state:
    124         raise RuntimeError, "no active input()"
    125     return _state.nextfile()
    126 
    127 def filename():
    128     """
    129     Return the name of the file currently being read.
    130     Before the first line has been read, returns None.
    131     """
    132     if not _state:
    133         raise RuntimeError, "no active input()"
    134     return _state.filename()
    135 
    136 def lineno():
    137     """
    138     Return the cumulative line number of the line that has just been read.
    139     Before the first line has been read, returns 0. After the last line
    140     of the last file has been read, returns the line number of that line.
    141     """
    142     if not _state:
    143         raise RuntimeError, "no active input()"
    144     return _state.lineno()
    145 
    146 def filelineno():
    147     """
    148     Return the line number in the current file. Before the first line
    149     has been read, returns 0. After the last line of the last file has
    150     been read, returns the line number of that line within the file.
    151     """
    152     if not _state:
    153         raise RuntimeError, "no active input()"
    154     return _state.filelineno()
    155 
    156 def fileno():
    157     """
    158     Return the file number of the current file. When no file is currently
    159     opened, returns -1.
    160     """
    161     if not _state:
    162         raise RuntimeError, "no active input()"
    163     return _state.fileno()
    164 
    165 def isfirstline():
    166     """
    167     Returns true the line just read is the first line of its file,
    168     otherwise returns false.
    169     """
    170     if not _state:
    171         raise RuntimeError, "no active input()"
    172     return _state.isfirstline()
    173 
    174 def isstdin():
    175     """
    176     Returns true if the last line was read from sys.stdin,
    177     otherwise returns false.
    178     """
    179     if not _state:
    180         raise RuntimeError, "no active input()"
    181     return _state.isstdin()
    182 
    183 class FileInput:
    184     """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]])
    185 
    186     Class FileInput is the implementation of the module; its methods
    187     filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
    188     nextfile() and close() correspond to the functions of the same name
    189     in the module.
    190     In addition it has a readline() method which returns the next
    191     input line, and a __getitem__() method which implements the
    192     sequence behavior. The sequence must be accessed in strictly
    193     sequential order; random access and readline() cannot be mixed.
    194     """
    195 
    196     def __init__(self, files=None, inplace=0, backup="", bufsize=0,
    197                  mode="r", openhook=None):
    198         if isinstance(files, basestring):
    199             files = (files,)
    200         else:
    201             if files is None:
    202                 files = sys.argv[1:]
    203             if not files:
    204                 files = ('-',)
    205             else:
    206                 files = tuple(files)
    207         self._files = files
    208         self._inplace = inplace
    209         self._backup = backup
    210         self._bufsize = bufsize or DEFAULT_BUFSIZE
    211         self._savestdout = None
    212         self._output = None
    213         self._filename = None
    214         self._lineno = 0
    215         self._filelineno = 0
    216         self._file = None
    217         self._isstdin = False
    218         self._backupfilename = None
    219         self._buffer = []
    220         self._bufindex = 0
    221         # restrict mode argument to reading modes

    222         if mode not in ('r', 'rU', 'U', 'rb'):
    223             raise ValueError("FileInput opening mode must be one of "
    224                              "'r', 'rU', 'U' and 'rb'")
    225         self._mode = mode
    226         if inplace and openhook:
    227             raise ValueError("FileInput cannot use an opening hook in inplace mode")
    228         elif openhook and not hasattr(openhook, '__call__'):
    229             raise ValueError("FileInput openhook must be callable")
    230         self._openhook = openhook
    231 
    232     def __del__(self):
    233         self.close()
    234 
    235     def close(self):
    236         try:
    237             self.nextfile()
    238         finally:
    239             self._files = ()
    240 
    241     def __iter__(self):
    242         return self
    243 
    244     def next(self):
    245         try:
    246             line = self._buffer[self._bufindex]
    247         except IndexError:
    248             pass
    249         else:
    250             self._bufindex += 1
    251             self._lineno += 1
    252             self._filelineno += 1
    253             return line
    254         line = self.readline()
    255         if not line:
    256             raise StopIteration
    257         return line
    258 
    259     def __getitem__(self, i):
    260         if i != self._lineno:
    261             raise RuntimeError, "accessing lines out of order"
    262         try:
    263             return self.next()
    264         except StopIteration:
    265             raise IndexError, "end of input reached"
    266 
    267     def nextfile(self):
    268         savestdout = self._savestdout
    269         self._savestdout = 0
    270         if savestdout:
    271             sys.stdout = savestdout
    272 
    273         output = self._output
    274         self._output = 0
    275         try:
    276             if output:
    277                 output.close()
    278         finally:
    279             file = self._file
    280             self._file = 0
    281             try:
    282                 if file and not self._isstdin:
    283                     file.close()
    284             finally:
    285                 backupfilename = self._backupfilename
    286                 self._backupfilename = 0
    287                 if backupfilename and not self._backup:
    288                     try: os.unlink(backupfilename)
    289                     except OSError: pass
    290 
    291                 self._isstdin = False
    292                 self._buffer = []
    293                 self._bufindex = 0
    294 
    295     def readline(self):
    296         try:
    297             line = self._buffer[self._bufindex]
    298         except IndexError:
    299             pass
    300         else:
    301             self._bufindex += 1
    302             self._lineno += 1
    303             self._filelineno += 1
    304             return line
    305         if not self._file:
    306             if not self._files:
    307                 return ""
    308             self._filename = self._files[0]
    309             self._files = self._files[1:]
    310             self._filelineno = 0
    311             self._file = None
    312             self._isstdin = False
    313             self._backupfilename = 0
    314             if self._filename == '-':
    315                 self._filename = '<stdin>'
    316                 self._file = sys.stdin
    317                 self._isstdin = True
    318             else:
    319                 if self._inplace:
    320                     self._backupfilename = (
    321                         self._filename + (self._backup or os.extsep+"bak"))
    322                     try: os.unlink(self._backupfilename)
    323                     except os.error: pass
    324                     # The next few lines may raise IOError

    325                     os.rename(self._filename, self._backupfilename)
    326                     self._file = open(self._backupfilename, self._mode)
    327                     try:
    328                         perm = os.fstat(self._file.fileno()).st_mode
    329                     except OSError:
    330                         self._output = open(self._filename, "w")
    331                     else:
    332                         fd = os.open(self._filename,
    333                                      os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
    334                                      perm)
    335                         self._output = os.fdopen(fd, "w")
    336                         try:
    337                             if hasattr(os, 'chmod'):
    338                                 os.chmod(self._filename, perm)
    339                         except OSError:
    340                             pass
    341                     self._savestdout = sys.stdout
    342                     sys.stdout = self._output
    343                 else:
    344                     # This may raise IOError

    345                     if self._openhook:
    346                         self._file = self._openhook(self._filename, self._mode)
    347                     else:
    348                         self._file = open(self._filename, self._mode)
    349         self._buffer = self._file.readlines(self._bufsize)
    350         self._bufindex = 0
    351         if not self._buffer:
    352             self.nextfile()
    353         # Recursive call

    354         return self.readline()
    355 
    356     def filename(self):
    357         return self._filename
    358 
    359     def lineno(self):
    360         return self._lineno
    361 
    362     def filelineno(self):
    363         return self._filelineno
    364 
    365     def fileno(self):
    366         if self._file:
    367             try:
    368                 return self._file.fileno()
    369             except ValueError:
    370                 return -1
    371         else:
    372             return -1
    373 
    374     def isfirstline(self):
    375         return self._filelineno == 1
    376 
    377     def isstdin(self):
    378         return self._isstdin
    379 
    380 
    381 def hook_compressed(filename, mode):
    382     ext = os.path.splitext(filename)[1]
    383     if ext == '.gz':
    384         import gzip
    385         return gzip.open(filename, mode)
    386     elif ext == '.bz2':
    387         import bz2
    388         return bz2.BZ2File(filename, mode)
    389     else:
    390         return open(filename, mode)
    391 
    392 
    393 def hook_encoded(encoding):
    394     import io
    395     def openhook(filename, mode):
    396         mode = mode.replace('U', '').replace('b', '') or 'r'
    397         return io.open(filename, mode, encoding=encoding, newline='')
    398     return openhook
    399 
    400 
    401 def _test():
    402     import getopt
    403     inplace = 0
    404     backup = 0
    405     opts, args = getopt.getopt(sys.argv[1:], "ib:")
    406     for o, a in opts:
    407         if o == '-i': inplace = 1
    408         if o == '-b': backup = a
    409     for line in input(args, inplace=inplace, backup=backup):
    410         if line[-1:] == '\n': line = line[:-1]
    411         if line[-1:] == '\r': line = line[:-1]
    412         print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
    413                                    isfirstline() and "*" or "", line)
    414     print "%d: %s[%d]" % (lineno(), filename(), filelineno())
    415 
    416 if __name__ == '__main__':
    417     _test()
    418