1 """Helper class to quickly write a loop over all standard input files. 2 3 Typical use is: 4 5 import fileinput 6 for line in fileinput.input(): 7 process(line) 8 9 This iterates over the lines of all files listed in sys.argv[1:], 10 defaulting to sys.stdin if the list is empty. If a filename is '-' it 11 is also replaced by sys.stdin. To specify an alternative list of 12 filenames, pass it as the argument to input(). A single file name is 13 also allowed. 14 15 Functions filename(), lineno() return the filename and cumulative line 16 number of the line that has just been read; filelineno() returns its 17 line number in the current file; isfirstline() returns true iff the 18 line just read is the first line of its file; isstdin() returns true 19 iff the line was read from sys.stdin. Function nextfile() closes the 20 current file so that the next iteration will read the first line from 21 the next file (if any); lines not read from the file will not count 22 towards the cumulative line count; the filename is not changed until 23 after the first line of the next file has been read. Function close() 24 closes the sequence. 25 26 Before any lines have been read, filename() returns None and both line 27 numbers are zero; nextfile() has no effect. After all lines have been 28 read, filename() and the line number functions return the values 29 pertaining to the last line read; nextfile() has no effect. 30 31 All files are opened in text mode by default, you can override this by 32 setting the mode parameter to input() or FileInput.__init__(). 33 If an I/O error occurs during opening or reading a file, the OSError 34 exception is raised. 35 36 If sys.stdin is used more than once, the second and further use will 37 return no lines, except perhaps for interactive use, or if it has been 38 explicitly reset (e.g. using sys.stdin.seek(0)). 39 40 Empty files are opened and immediately closed; the only time their 41 presence in the list of filenames is noticeable at all is when the 42 last file opened is empty. 43 44 It is possible that the last line of a file doesn't end in a newline 45 character; otherwise lines are returned including the trailing 46 newline. 47 48 Class FileInput is the implementation; its methods filename(), 49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() 50 correspond to the functions in the module. In addition it has a 51 readline() method which returns the next input line, and a 52 __getitem__() method which implements the sequence behavior. The 53 sequence must be accessed in strictly sequential order; sequence 54 access and readline() cannot be mixed. 55 56 Optional in-place filtering: if the keyword argument inplace=1 is 57 passed to input() or to the FileInput constructor, the file is moved 58 to a backup file and standard output is directed to the input file. 59 This makes it possible to write a filter that rewrites its input file 60 in place. If the keyword argument backup=".<some extension>" is also 61 given, it specifies the extension for the backup file, and the backup 62 file remains around; by default, the extension is ".bak" and it is 63 deleted when the output file is closed. In-place filtering is 64 disabled when standard input is read. XXX The current implementation 65 does not work for MS-DOS 8+3 filesystems. 66 67 XXX Possible additions: 68 69 - optional getopt argument processing 70 - isatty() 71 - read(), read(size), even readlines() 72 73 """ 74 75 import sys, os 76 77 __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno", 78 "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed", 79 "hook_encoded"] 80 81 _state = None 82 83 def input(files=None, inplace=False, backup="", bufsize=0, 84 mode="r", openhook=None): 85 """Return an instance of the FileInput class, which can be iterated. 86 87 The parameters are passed to the constructor of the FileInput class. 88 The returned instance, in addition to being an iterator, 89 keeps global state for the functions of this module,. 90 """ 91 global _state 92 if _state and _state._file: 93 raise RuntimeError("input() already active") 94 _state = FileInput(files, inplace, backup, bufsize, mode, openhook) 95 return _state 96 97 def close(): 98 """Close the sequence.""" 99 global _state 100 state = _state 101 _state = None 102 if state: 103 state.close() 104 105 def nextfile(): 106 """ 107 Close the current file so that the next iteration will read the first 108 line from the next file (if any); lines not read from the file will 109 not count towards the cumulative line count. The filename is not 110 changed until after the first line of the next file has been read. 111 Before the first line has been read, this function has no effect; 112 it cannot be used to skip the first file. After the last line of the 113 last file has been read, this function has no effect. 114 """ 115 if not _state: 116 raise RuntimeError("no active input()") 117 return _state.nextfile() 118 119 def filename(): 120 """ 121 Return the name of the file currently being read. 122 Before the first line has been read, returns None. 123 """ 124 if not _state: 125 raise RuntimeError("no active input()") 126 return _state.filename() 127 128 def lineno(): 129 """ 130 Return the cumulative line number of the line that has just been read. 131 Before the first line has been read, returns 0. After the last line 132 of the last file has been read, returns the line number of that line. 133 """ 134 if not _state: 135 raise RuntimeError("no active input()") 136 return _state.lineno() 137 138 def filelineno(): 139 """ 140 Return the line number in the current file. Before the first line 141 has been read, returns 0. After the last line of the last file has 142 been read, returns the line number of that line within the file. 143 """ 144 if not _state: 145 raise RuntimeError("no active input()") 146 return _state.filelineno() 147 148 def fileno(): 149 """ 150 Return the file number of the current file. When no file is currently 151 opened, returns -1. 152 """ 153 if not _state: 154 raise RuntimeError("no active input()") 155 return _state.fileno() 156 157 def isfirstline(): 158 """ 159 Returns true the line just read is the first line of its file, 160 otherwise returns false. 161 """ 162 if not _state: 163 raise RuntimeError("no active input()") 164 return _state.isfirstline() 165 166 def isstdin(): 167 """ 168 Returns true if the last line was read from sys.stdin, 169 otherwise returns false. 170 """ 171 if not _state: 172 raise RuntimeError("no active input()") 173 return _state.isstdin() 174 175 class FileInput: 176 """FileInput([files[, inplace[, backup[, bufsize, [, mode[, openhook]]]]]]) 177 178 Class FileInput is the implementation of the module; its methods 179 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(), 180 nextfile() and close() correspond to the functions of the same name 181 in the module. 182 In addition it has a readline() method which returns the next 183 input line, and a __getitem__() method which implements the 184 sequence behavior. The sequence must be accessed in strictly 185 sequential order; random access and readline() cannot be mixed. 186 """ 187 188 def __init__(self, files=None, inplace=False, backup="", bufsize=0, 189 mode="r", openhook=None): 190 if isinstance(files, str): 191 files = (files,) 192 else: 193 if files is None: 194 files = sys.argv[1:] 195 if not files: 196 files = ('-',) 197 else: 198 files = tuple(files) 199 self._files = files 200 self._inplace = inplace 201 self._backup = backup 202 if bufsize: 203 import warnings 204 warnings.warn('bufsize is deprecated and ignored', 205 DeprecationWarning, stacklevel=2) 206 self._savestdout = None 207 self._output = None 208 self._filename = None 209 self._startlineno = 0 210 self._filelineno = 0 211 self._file = None 212 self._isstdin = False 213 self._backupfilename = None 214 # restrict mode argument to reading modes 215 if mode not in ('r', 'rU', 'U', 'rb'): 216 raise ValueError("FileInput opening mode must be one of " 217 "'r', 'rU', 'U' and 'rb'") 218 if 'U' in mode: 219 import warnings 220 warnings.warn("'U' mode is deprecated", 221 DeprecationWarning, 2) 222 self._mode = mode 223 if openhook: 224 if inplace: 225 raise ValueError("FileInput cannot use an opening hook in inplace mode") 226 if not callable(openhook): 227 raise ValueError("FileInput openhook must be callable") 228 self._openhook = openhook 229 230 def __del__(self): 231 self.close() 232 233 def close(self): 234 try: 235 self.nextfile() 236 finally: 237 self._files = () 238 239 def __enter__(self): 240 return self 241 242 def __exit__(self, type, value, traceback): 243 self.close() 244 245 def __iter__(self): 246 return self 247 248 def __next__(self): 249 while True: 250 line = self._readline() 251 if line: 252 self._filelineno += 1 253 return line 254 if not self._file: 255 raise StopIteration 256 self.nextfile() 257 # repeat with next file 258 259 def __getitem__(self, i): 260 if i != self.lineno(): 261 raise RuntimeError("accessing lines out of order") 262 try: 263 return self.__next__() 264 except StopIteration: 265 raise IndexError("end of input reached") 266 267 def nextfile(self): 268 savestdout = self._savestdout 269 self._savestdout = None 270 if savestdout: 271 sys.stdout = savestdout 272 273 output = self._output 274 self._output = None 275 try: 276 if output: 277 output.close() 278 finally: 279 file = self._file 280 self._file = None 281 try: 282 del self._readline # restore FileInput._readline 283 except AttributeError: 284 pass 285 try: 286 if file and not self._isstdin: 287 file.close() 288 finally: 289 backupfilename = self._backupfilename 290 self._backupfilename = None 291 if backupfilename and not self._backup: 292 try: os.unlink(backupfilename) 293 except OSError: pass 294 295 self._isstdin = False 296 297 def readline(self): 298 while True: 299 line = self._readline() 300 if line: 301 self._filelineno += 1 302 return line 303 if not self._file: 304 return line 305 self.nextfile() 306 # repeat with next file 307 308 def _readline(self): 309 if not self._files: 310 if 'b' in self._mode: 311 return b'' 312 else: 313 return '' 314 self._filename = self._files[0] 315 self._files = self._files[1:] 316 self._startlineno = self.lineno() 317 self._filelineno = 0 318 self._file = None 319 self._isstdin = False 320 self._backupfilename = 0 321 if self._filename == '-': 322 self._filename = '<stdin>' 323 if 'b' in self._mode: 324 self._file = getattr(sys.stdin, 'buffer', sys.stdin) 325 else: 326 self._file = sys.stdin 327 self._isstdin = True 328 else: 329 if self._inplace: 330 self._backupfilename = ( 331 self._filename + (self._backup or ".bak")) 332 try: 333 os.unlink(self._backupfilename) 334 except OSError: 335 pass 336 # The next few lines may raise OSError 337 os.rename(self._filename, self._backupfilename) 338 self._file = open(self._backupfilename, self._mode) 339 try: 340 perm = os.fstat(self._file.fileno()).st_mode 341 except OSError: 342 self._output = open(self._filename, "w") 343 else: 344 mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC 345 if hasattr(os, 'O_BINARY'): 346 mode |= os.O_BINARY 347 348 fd = os.open(self._filename, mode, perm) 349 self._output = os.fdopen(fd, "w") 350 try: 351 if hasattr(os, 'chmod'): 352 os.chmod(self._filename, perm) 353 except OSError: 354 pass 355 self._savestdout = sys.stdout 356 sys.stdout = self._output 357 else: 358 # This may raise OSError 359 if self._openhook: 360 self._file = self._openhook(self._filename, self._mode) 361 else: 362 self._file = open(self._filename, self._mode) 363 self._readline = self._file.readline # hide FileInput._readline 364 return self._readline() 365 366 def filename(self): 367 return self._filename 368 369 def lineno(self): 370 return self._startlineno + self._filelineno 371 372 def filelineno(self): 373 return self._filelineno 374 375 def fileno(self): 376 if self._file: 377 try: 378 return self._file.fileno() 379 except ValueError: 380 return -1 381 else: 382 return -1 383 384 def isfirstline(self): 385 return self._filelineno == 1 386 387 def isstdin(self): 388 return self._isstdin 389 390 391 def hook_compressed(filename, mode): 392 ext = os.path.splitext(filename)[1] 393 if ext == '.gz': 394 import gzip 395 return gzip.open(filename, mode) 396 elif ext == '.bz2': 397 import bz2 398 return bz2.BZ2File(filename, mode) 399 else: 400 return open(filename, mode) 401 402 403 def hook_encoded(encoding, errors=None): 404 def openhook(filename, mode): 405 return open(filename, mode, encoding=encoding, errors=errors) 406 return openhook 407 408 409 def _test(): 410 import getopt 411 inplace = False 412 backup = False 413 opts, args = getopt.getopt(sys.argv[1:], "ib:") 414 for o, a in opts: 415 if o == '-i': inplace = True 416 if o == '-b': backup = a 417 for line in input(args, inplace=inplace, backup=backup): 418 if line[-1:] == '\n': line = line[:-1] 419 if line[-1:] == '\r': line = line[:-1] 420 print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), 421 isfirstline() and "*" or "", line)) 422 print("%d: %s[%d]" % (lineno(), filename(), filelineno())) 423 424 if __name__ == '__main__': 425 _test() 426