Home | History | Annotate | Download | only in Lib
      1 """Helper class to quickly write a loop over all standard input files.
      2 
      3 Typical use is:
      4 
      5     import fileinput
      6     for line in fileinput.input():
      7         process(line)
      8 
      9 This iterates over the lines of all files listed in sys.argv[1:],
     10 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
     11 is also replaced by sys.stdin.  To specify an alternative list of
     12 filenames, pass it as the argument to input().  A single file name is
     13 also allowed.
     14 
     15 Functions filename(), lineno() return the filename and cumulative line
     16 number of the line that has just been read; filelineno() returns its
     17 line number in the current file; isfirstline() returns true iff the
     18 line just read is the first line of its file; isstdin() returns true
     19 iff the line was read from sys.stdin.  Function nextfile() closes the
     20 current file so that the next iteration will read the first line from
     21 the next file (if any); lines not read from the file will not count
     22 towards the cumulative line count; the filename is not changed until
     23 after the first line of the next file has been read.  Function close()
     24 closes the sequence.
     25 
     26 Before any lines have been read, filename() returns None and both line
     27 numbers are zero; nextfile() has no effect.  After all lines have been
     28 read, filename() and the line number functions return the values
     29 pertaining to the last line read; nextfile() has no effect.
     30 
     31 All files are opened in text mode by default, you can override this by
     32 setting the mode parameter to input() or FileInput.__init__().
     33 If an I/O error occurs during opening or reading a file, the IOError
     34 exception is raised.
     35 
     36 If sys.stdin is used more than once, the second and further use will
     37 return no lines, except perhaps for interactive use, or if it has been
     38 explicitly reset (e.g. using sys.stdin.seek(0)).
     39 
     40 Empty files are opened and immediately closed; the only time their
     41 presence in the list of filenames is noticeable at all is when the
     42 last file opened is empty.
     43 
     44 It is possible that the last line of a file doesn't end in a newline
     45 character; otherwise lines are returned including the trailing
     46 newline.
     47 
     48 Class FileInput is the implementation; its methods filename(),
     49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
     50 correspond to the functions in the module.  In addition it has a
     51 readline() method which returns the next input line, and a
     52 __getitem__() method which implements the sequence behavior.  The
     53 sequence must be accessed in strictly sequential order; sequence
     54 access and readline() cannot be mixed.
     55 
     56 Optional in-place filtering: if the keyword argument inplace=1 is
     57 passed to input() or to the FileInput constructor, the file is moved
     58 to a backup file and standard output is directed to the input file.
     59 This makes it possible to write a filter that rewrites its input file
     60 in place.  If the keyword argument backup=".<some extension>" is also
     61 given, it specifies the extension for the backup file, and the backup
     62 file remains around; by default, the extension is ".bak" and it is
     63 deleted when the output file is closed.  In-place filtering is
     64 disabled when standard input is read.  XXX The current implementation
     65 does not work for MS-DOS 8+3 filesystems.
     66 
     67 XXX Possible additions:
     68 
     69 - optional getopt argument processing
     70 - isatty()
     71 - read(), read(size), even readlines()
     72 
     73 """
     74 
     75 import sys, os
     76 
     77 __all__ = ["input","close","nextfile","filename","lineno","filelineno",
     78            "isfirstline","isstdin","FileInput"]
     79 
     80 _state = None
     81 
     82 # No longer used
     83 DEFAULT_BUFSIZE = 8*1024
     84 
     85 def input(files=None, inplace=0, backup="", bufsize=0,
     86           mode="r", openhook=None):
     87     """Return an instance of the FileInput class, which can be iterated.
     88 
     89     The parameters are passed to the constructor of the FileInput class.
     90     The returned instance, in addition to being an iterator,
     91     keeps global state for the functions of this module,.
     92     """
     93     global _state
     94     if _state and _state._file:
     95         raise RuntimeError, "input() already active"
     96     _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
     97     return _state
     98 
     99 def close():
    100     """Close the sequence."""
    101     global _state
    102     state = _state
    103     _state = None
    104     if state:
    105         state.close()
    106 
    107 def nextfile():
    108     """
    109     Close the current file so that the next iteration will read the first
    110     line from the next file (if any); lines not read from the file will
    111     not count towards the cumulative line count. The filename is not
    112     changed until after the first line of the next file has been read.
    113     Before the first line has been read, this function has no effect;
    114     it cannot be used to skip the first file. After the last line of the
    115     last file has been read, this function has no effect.
    116     """
    117     if not _state:
    118         raise RuntimeError, "no active input()"
    119     return _state.nextfile()
    120 
    121 def filename():
    122     """
    123     Return the name of the file currently being read.
    124     Before the first line has been read, returns None.
    125     """
    126     if not _state:
    127         raise RuntimeError, "no active input()"
    128     return _state.filename()
    129 
    130 def lineno():
    131     """
    132     Return the cumulative line number of the line that has just been read.
    133     Before the first line has been read, returns 0. After the last line
    134     of the last file has been read, returns the line number of that line.
    135     """
    136     if not _state:
    137         raise RuntimeError, "no active input()"
    138     return _state.lineno()
    139 
    140 def filelineno():
    141     """
    142     Return the line number in the current file. Before the first line
    143     has been read, returns 0. After the last line of the last file has
    144     been read, returns the line number of that line within the file.
    145     """
    146     if not _state:
    147         raise RuntimeError, "no active input()"
    148     return _state.filelineno()
    149 
    150 def fileno():
    151     """
    152     Return the file number of the current file. When no file is currently
    153     opened, returns -1.
    154     """
    155     if not _state:
    156         raise RuntimeError, "no active input()"
    157     return _state.fileno()
    158 
    159 def isfirstline():
    160     """
    161     Returns true the line just read is the first line of its file,
    162     otherwise returns false.
    163     """
    164     if not _state:
    165         raise RuntimeError, "no active input()"
    166     return _state.isfirstline()
    167 
    168 def isstdin():
    169     """
    170     Returns true if the last line was read from sys.stdin,
    171     otherwise returns false.
    172     """
    173     if not _state:
    174         raise RuntimeError, "no active input()"
    175     return _state.isstdin()
    176 
    177 class FileInput:
    178     """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]])
    179 
    180     Class FileInput is the implementation of the module; its methods
    181     filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
    182     nextfile() and close() correspond to the functions of the same name
    183     in the module.
    184     In addition it has a readline() method which returns the next
    185     input line, and a __getitem__() method which implements the
    186     sequence behavior. The sequence must be accessed in strictly
    187     sequential order; random access and readline() cannot be mixed.
    188     """
    189 
    190     def __init__(self, files=None, inplace=0, backup="", bufsize=0,
    191                  mode="r", openhook=None):
    192         if isinstance(files, basestring):
    193             files = (files,)
    194         else:
    195             if files is None:
    196                 files = sys.argv[1:]
    197             if not files:
    198                 files = ('-',)
    199             else:
    200                 files = tuple(files)
    201         self._files = files
    202         self._inplace = inplace
    203         self._backup = backup
    204         self._savestdout = None
    205         self._output = None
    206         self._filename = None
    207         self._startlineno = 0
    208         self._filelineno = 0
    209         self._file = None
    210         self._isstdin = False
    211         self._backupfilename = None
    212         # restrict mode argument to reading modes
    213         if mode not in ('r', 'rU', 'U', 'rb'):
    214             raise ValueError("FileInput opening mode must be one of "
    215                              "'r', 'rU', 'U' and 'rb'")
    216         self._mode = mode
    217         if inplace and openhook:
    218             raise ValueError("FileInput cannot use an opening hook in inplace mode")
    219         elif openhook and not hasattr(openhook, '__call__'):
    220             raise ValueError("FileInput openhook must be callable")
    221         self._openhook = openhook
    222 
    223     def __del__(self):
    224         self.close()
    225 
    226     def close(self):
    227         try:
    228             self.nextfile()
    229         finally:
    230             self._files = ()
    231 
    232     def __iter__(self):
    233         return self
    234 
    235     def next(self):
    236         while 1:
    237             line = self._readline()
    238             if line:
    239                 self._filelineno += 1
    240                 return line
    241             if not self._file:
    242                 raise StopIteration
    243             self.nextfile()
    244             # repeat with next file
    245 
    246     def __getitem__(self, i):
    247         if i != self.lineno():
    248             raise RuntimeError, "accessing lines out of order"
    249         try:
    250             return self.next()
    251         except StopIteration:
    252             raise IndexError, "end of input reached"
    253 
    254     def nextfile(self):
    255         savestdout = self._savestdout
    256         self._savestdout = 0
    257         if savestdout:
    258             sys.stdout = savestdout
    259 
    260         output = self._output
    261         self._output = 0
    262         try:
    263             if output:
    264                 output.close()
    265         finally:
    266             file = self._file
    267             self._file = None
    268             try:
    269                 del self._readline  # restore FileInput._readline
    270             except AttributeError:
    271                 pass
    272             try:
    273                 if file and not self._isstdin:
    274                     file.close()
    275             finally:
    276                 backupfilename = self._backupfilename
    277                 self._backupfilename = 0
    278                 if backupfilename and not self._backup:
    279                     try: os.unlink(backupfilename)
    280                     except OSError: pass
    281 
    282                 self._isstdin = False
    283 
    284     def readline(self):
    285         while 1:
    286             line = self._readline()
    287             if line:
    288                 self._filelineno += 1
    289                 return line
    290             if not self._file:
    291                 return line
    292             self.nextfile()
    293             # repeat with next file
    294 
    295     def _readline(self):
    296         if not self._files:
    297             return ""
    298         self._filename = self._files[0]
    299         self._files = self._files[1:]
    300         self._startlineno = self.lineno()
    301         self._filelineno = 0
    302         self._file = None
    303         self._isstdin = False
    304         self._backupfilename = 0
    305         if self._filename == '-':
    306             self._filename = '<stdin>'
    307             self._file = sys.stdin
    308             self._isstdin = True
    309         else:
    310             if self._inplace:
    311                 self._backupfilename = (
    312                     self._filename + (self._backup or os.extsep+"bak"))
    313                 try: os.unlink(self._backupfilename)
    314                 except os.error: pass
    315                 # The next few lines may raise IOError
    316                 os.rename(self._filename, self._backupfilename)
    317                 self._file = open(self._backupfilename, self._mode)
    318                 try:
    319                     perm = os.fstat(self._file.fileno()).st_mode
    320                 except OSError:
    321                     self._output = open(self._filename, "w")
    322                 else:
    323                     fd = os.open(self._filename,
    324                                     os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
    325                                     perm)
    326                     self._output = os.fdopen(fd, "w")
    327                     try:
    328                         if hasattr(os, 'chmod'):
    329                             os.chmod(self._filename, perm)
    330                     except OSError:
    331                         pass
    332                 self._savestdout = sys.stdout
    333                 sys.stdout = self._output
    334             else:
    335                 # This may raise IOError
    336                 if self._openhook:
    337                     self._file = self._openhook(self._filename, self._mode)
    338                 else:
    339                     self._file = open(self._filename, self._mode)
    340 
    341         self._readline = self._file.readline  # hide FileInput._readline
    342         return self._readline()
    343 
    344     def filename(self):
    345         return self._filename
    346 
    347     def lineno(self):
    348         return self._startlineno + self._filelineno
    349 
    350     def filelineno(self):
    351         return self._filelineno
    352 
    353     def fileno(self):
    354         if self._file:
    355             try:
    356                 return self._file.fileno()
    357             except ValueError:
    358                 return -1
    359         else:
    360             return -1
    361 
    362     def isfirstline(self):
    363         return self._filelineno == 1
    364 
    365     def isstdin(self):
    366         return self._isstdin
    367 
    368 
    369 def hook_compressed(filename, mode):
    370     ext = os.path.splitext(filename)[1]
    371     if ext == '.gz':
    372         import gzip
    373         return gzip.open(filename, mode)
    374     elif ext == '.bz2':
    375         import bz2
    376         return bz2.BZ2File(filename, mode)
    377     else:
    378         return open(filename, mode)
    379 
    380 
    381 def hook_encoded(encoding):
    382     import io
    383     def openhook(filename, mode):
    384         mode = mode.replace('U', '').replace('b', '') or 'r'
    385         return io.open(filename, mode, encoding=encoding, newline='')
    386     return openhook
    387 
    388 
    389 def _test():
    390     import getopt
    391     inplace = 0
    392     backup = 0
    393     opts, args = getopt.getopt(sys.argv[1:], "ib:")
    394     for o, a in opts:
    395         if o == '-i': inplace = 1
    396         if o == '-b': backup = a
    397     for line in input(args, inplace=inplace, backup=backup):
    398         if line[-1:] == '\n': line = line[:-1]
    399         if line[-1:] == '\r': line = line[:-1]
    400         print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
    401                                    isfirstline() and "*" or "", line)
    402     print "%d: %s[%d]" % (lineno(), filename(), filelineno())
    403 
    404 if __name__ == '__main__':
    405     _test()
    406