Home | History | Annotate | Download | only in python2.7
      1 """Helper class to quickly write a loop over all standard input files.
      2 
      3 Typical use is:
      4 
      5     import fileinput
      6     for line in fileinput.input():
      7         process(line)
      8 
      9 This iterates over the lines of all files listed in sys.argv[1:],
     10 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
     11 is also replaced by sys.stdin.  To specify an alternative list of
     12 filenames, pass it as the argument to input().  A single file name is
     13 also allowed.
     14 
     15 Functions filename(), lineno() return the filename and cumulative line
     16 number of the line that has just been read; filelineno() returns its
     17 line number in the current file; isfirstline() returns true iff the
     18 line just read is the first line of its file; isstdin() returns true
     19 iff the line was read from sys.stdin.  Function nextfile() closes the
     20 current file so that the next iteration will read the first line from
     21 the next file (if any); lines not read from the file will not count
     22 towards the cumulative line count; the filename is not changed until
     23 after the first line of the next file has been read.  Function close()
     24 closes the sequence.
     25 
     26 Before any lines have been read, filename() returns None and both line
     27 numbers are zero; nextfile() has no effect.  After all lines have been
     28 read, filename() and the line number functions return the values
     29 pertaining to the last line read; nextfile() has no effect.
     30 
     31 All files are opened in text mode by default, you can override this by
     32 setting the mode parameter to input() or FileInput.__init__().
     33 If an I/O error occurs during opening or reading a file, the IOError
     34 exception is raised.
     35 
     36 If sys.stdin is used more than once, the second and further use will
     37 return no lines, except perhaps for interactive use, or if it has been
     38 explicitly reset (e.g. using sys.stdin.seek(0)).
     39 
     40 Empty files are opened and immediately closed; the only time their
     41 presence in the list of filenames is noticeable at all is when the
     42 last file opened is empty.
     43 
     44 It is possible that the last line of a file doesn't end in a newline
     45 character; otherwise lines are returned including the trailing
     46 newline.
     47 
     48 Class FileInput is the implementation; its methods filename(),
     49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
     50 correspond to the functions in the module.  In addition it has a
     51 readline() method which returns the next input line, and a
     52 __getitem__() method which implements the sequence behavior.  The
     53 sequence must be accessed in strictly sequential order; sequence
     54 access and readline() cannot be mixed.
     55 
     56 Optional in-place filtering: if the keyword argument inplace=1 is
     57 passed to input() or to the FileInput constructor, the file is moved
     58 to a backup file and standard output is directed to the input file.
     59 This makes it possible to write a filter that rewrites its input file
     60 in place.  If the keyword argument backup=".<some extension>" is also
     61 given, it specifies the extension for the backup file, and the backup
     62 file remains around; by default, the extension is ".bak" and it is
     63 deleted when the output file is closed.  In-place filtering is
     64 disabled when standard input is read.  XXX The current implementation
     65 does not work for MS-DOS 8+3 filesystems.
     66 
     67 Performance: this module is unfortunately one of the slower ways of
     68 processing large numbers of input lines.  Nevertheless, a significant
     69 speed-up has been obtained by using readlines(bufsize) instead of
     70 readline().  A new keyword argument, bufsize=N, is present on the
     71 input() function and the FileInput() class to override the default
     72 buffer size.
     73 
     74 XXX Possible additions:
     75 
     76 - optional getopt argument processing
     77 - isatty()
     78 - read(), read(size), even readlines()
     79 
     80 """
     81 
     82 import sys, os
     83 
     84 __all__ = ["input","close","nextfile","filename","lineno","filelineno",
     85            "isfirstline","isstdin","FileInput"]
     86 
     87 _state = None
     88 
     89 DEFAULT_BUFSIZE = 8*1024
     90 
     91 def input(files=None, inplace=0, backup="", bufsize=0,
     92           mode="r", openhook=None):
     93     """input([files[, inplace[, backup[, mode[, openhook]]]]])
     94 
     95     Create an instance of the FileInput class. The instance will be used
     96     as global state for the functions of this module, and is also returned
     97     to use during iteration. The parameters to this function will be passed
     98     along to the constructor of the FileInput class.
     99     """
    100     global _state
    101     if _state and _state._file:
    102         raise RuntimeError, "input() already active"
    103     _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
    104     return _state
    105 
    106 def close():
    107     """Close the sequence."""
    108     global _state
    109     state = _state
    110     _state = None
    111     if state:
    112         state.close()
    113 
    114 def nextfile():
    115     """
    116     Close the current file so that the next iteration will read the first
    117     line from the next file (if any); lines not read from the file will
    118     not count towards the cumulative line count. The filename is not
    119     changed until after the first line of the next file has been read.
    120     Before the first line has been read, this function has no effect;
    121     it cannot be used to skip the first file. After the last line of the
    122     last file has been read, this function has no effect.
    123     """
    124     if not _state:
    125         raise RuntimeError, "no active input()"
    126     return _state.nextfile()
    127 
    128 def filename():
    129     """
    130     Return the name of the file currently being read.
    131     Before the first line has been read, returns None.
    132     """
    133     if not _state:
    134         raise RuntimeError, "no active input()"
    135     return _state.filename()
    136 
    137 def lineno():
    138     """
    139     Return the cumulative line number of the line that has just been read.
    140     Before the first line has been read, returns 0. After the last line
    141     of the last file has been read, returns the line number of that line.
    142     """
    143     if not _state:
    144         raise RuntimeError, "no active input()"
    145     return _state.lineno()
    146 
    147 def filelineno():
    148     """
    149     Return the line number in the current file. Before the first line
    150     has been read, returns 0. After the last line of the last file has
    151     been read, returns the line number of that line within the file.
    152     """
    153     if not _state:
    154         raise RuntimeError, "no active input()"
    155     return _state.filelineno()
    156 
    157 def fileno():
    158     """
    159     Return the file number of the current file. When no file is currently
    160     opened, returns -1.
    161     """
    162     if not _state:
    163         raise RuntimeError, "no active input()"
    164     return _state.fileno()
    165 
    166 def isfirstline():
    167     """
    168     Returns true the line just read is the first line of its file,
    169     otherwise returns false.
    170     """
    171     if not _state:
    172         raise RuntimeError, "no active input()"
    173     return _state.isfirstline()
    174 
    175 def isstdin():
    176     """
    177     Returns true if the last line was read from sys.stdin,
    178     otherwise returns false.
    179     """
    180     if not _state:
    181         raise RuntimeError, "no active input()"
    182     return _state.isstdin()
    183 
    184 class FileInput:
    185     """class FileInput([files[, inplace[, backup[, mode[, openhook]]]]])
    186 
    187     Class FileInput is the implementation of the module; its methods
    188     filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
    189     nextfile() and close() correspond to the functions of the same name
    190     in the module.
    191     In addition it has a readline() method which returns the next
    192     input line, and a __getitem__() method which implements the
    193     sequence behavior. The sequence must be accessed in strictly
    194     sequential order; random access and readline() cannot be mixed.
    195     """
    196 
    197     def __init__(self, files=None, inplace=0, backup="", bufsize=0,
    198                  mode="r", openhook=None):
    199         if isinstance(files, basestring):
    200             files = (files,)
    201         else:
    202             if files is None:
    203                 files = sys.argv[1:]
    204             if not files:
    205                 files = ('-',)
    206             else:
    207                 files = tuple(files)
    208         self._files = files
    209         self._inplace = inplace
    210         self._backup = backup
    211         self._bufsize = bufsize or DEFAULT_BUFSIZE
    212         self._savestdout = None
    213         self._output = None
    214         self._filename = None
    215         self._lineno = 0
    216         self._filelineno = 0
    217         self._file = None
    218         self._isstdin = False
    219         self._backupfilename = None
    220         self._buffer = []
    221         self._bufindex = 0
    222         # restrict mode argument to reading modes
    223         if mode not in ('r', 'rU', 'U', 'rb'):
    224             raise ValueError("FileInput opening mode must be one of "
    225                              "'r', 'rU', 'U' and 'rb'")
    226         self._mode = mode
    227         if inplace and openhook:
    228             raise ValueError("FileInput cannot use an opening hook in inplace mode")
    229         elif openhook and not hasattr(openhook, '__call__'):
    230             raise ValueError("FileInput openhook must be callable")
    231         self._openhook = openhook
    232 
    233     def __del__(self):
    234         self.close()
    235 
    236     def close(self):
    237         self.nextfile()
    238         self._files = ()
    239 
    240     def __iter__(self):
    241         return self
    242 
    243     def next(self):
    244         try:
    245             line = self._buffer[self._bufindex]
    246         except IndexError:
    247             pass
    248         else:
    249             self._bufindex += 1
    250             self._lineno += 1
    251             self._filelineno += 1
    252             return line
    253         line = self.readline()
    254         if not line:
    255             raise StopIteration
    256         return line
    257 
    258     def __getitem__(self, i):
    259         if i != self._lineno:
    260             raise RuntimeError, "accessing lines out of order"
    261         try:
    262             return self.next()
    263         except StopIteration:
    264             raise IndexError, "end of input reached"
    265 
    266     def nextfile(self):
    267         savestdout = self._savestdout
    268         self._savestdout = 0
    269         if savestdout:
    270             sys.stdout = savestdout
    271 
    272         output = self._output
    273         self._output = 0
    274         if output:
    275             output.close()
    276 
    277         file = self._file
    278         self._file = 0
    279         if file and not self._isstdin:
    280             file.close()
    281 
    282         backupfilename = self._backupfilename
    283         self._backupfilename = 0
    284         if backupfilename and not self._backup:
    285             try: os.unlink(backupfilename)
    286             except OSError: pass
    287 
    288         self._isstdin = False
    289         self._buffer = []
    290         self._bufindex = 0
    291 
    292     def readline(self):
    293         try:
    294             line = self._buffer[self._bufindex]
    295         except IndexError:
    296             pass
    297         else:
    298             self._bufindex += 1
    299             self._lineno += 1
    300             self._filelineno += 1
    301             return line
    302         if not self._file:
    303             if not self._files:
    304                 return ""
    305             self._filename = self._files[0]
    306             self._files = self._files[1:]
    307             self._filelineno = 0
    308             self._file = None
    309             self._isstdin = False
    310             self._backupfilename = 0
    311             if self._filename == '-':
    312                 self._filename = '<stdin>'
    313                 self._file = sys.stdin
    314                 self._isstdin = True
    315             else:
    316                 if self._inplace:
    317                     self._backupfilename = (
    318                         self._filename + (self._backup or os.extsep+"bak"))
    319                     try: os.unlink(self._backupfilename)
    320                     except os.error: pass
    321                     # The next few lines may raise IOError
    322                     os.rename(self._filename, self._backupfilename)
    323                     self._file = open(self._backupfilename, self._mode)
    324                     try:
    325                         perm = os.fstat(self._file.fileno()).st_mode
    326                     except OSError:
    327                         self._output = open(self._filename, "w")
    328                     else:
    329                         fd = os.open(self._filename,
    330                                      os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
    331                                      perm)
    332                         self._output = os.fdopen(fd, "w")
    333                         try:
    334                             if hasattr(os, 'chmod'):
    335                                 os.chmod(self._filename, perm)
    336                         except OSError:
    337                             pass
    338                     self._savestdout = sys.stdout
    339                     sys.stdout = self._output
    340                 else:
    341                     # This may raise IOError
    342                     if self._openhook:
    343                         self._file = self._openhook(self._filename, self._mode)
    344                     else:
    345                         self._file = open(self._filename, self._mode)
    346         self._buffer = self._file.readlines(self._bufsize)
    347         self._bufindex = 0
    348         if not self._buffer:
    349             self.nextfile()
    350         # Recursive call
    351         return self.readline()
    352 
    353     def filename(self):
    354         return self._filename
    355 
    356     def lineno(self):
    357         return self._lineno
    358 
    359     def filelineno(self):
    360         return self._filelineno
    361 
    362     def fileno(self):
    363         if self._file:
    364             try:
    365                 return self._file.fileno()
    366             except ValueError:
    367                 return -1
    368         else:
    369             return -1
    370 
    371     def isfirstline(self):
    372         return self._filelineno == 1
    373 
    374     def isstdin(self):
    375         return self._isstdin
    376 
    377 
    378 def hook_compressed(filename, mode):
    379     ext = os.path.splitext(filename)[1]
    380     if ext == '.gz':
    381         import gzip
    382         return gzip.open(filename, mode)
    383     elif ext == '.bz2':
    384         import bz2
    385         return bz2.BZ2File(filename, mode)
    386     else:
    387         return open(filename, mode)
    388 
    389 
    390 def hook_encoded(encoding):
    391     import codecs
    392     def openhook(filename, mode):
    393         return codecs.open(filename, mode, encoding)
    394     return openhook
    395 
    396 
    397 def _test():
    398     import getopt
    399     inplace = 0
    400     backup = 0
    401     opts, args = getopt.getopt(sys.argv[1:], "ib:")
    402     for o, a in opts:
    403         if o == '-i': inplace = 1
    404         if o == '-b': backup = a
    405     for line in input(args, inplace=inplace, backup=backup):
    406         if line[-1:] == '\n': line = line[:-1]
    407         if line[-1:] == '\r': line = line[:-1]
    408         print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
    409                                    isfirstline() and "*" or "", line)
    410     print "%d: %s[%d]" % (lineno(), filename(), filelineno())
    411 
    412 if __name__ == '__main__':
    413     _test()
    414