Home | History | Annotate | Download | only in test
      1 '''
      2 Tests for fileinput module.
      3 Nick Mathewson
      4 '''
      5 
      6 import unittest
      7 from test.test_support import verbose, TESTFN, run_unittest
      8 from test.test_support import unlink as safe_unlink, check_warnings
      9 import sys, re
     10 from StringIO import StringIO
     11 from fileinput import FileInput, hook_encoded
     12 
     13 # The fileinput module has 2 interfaces: the FileInput class which does
     14 # all the work, and a few functions (input, etc.) that use a global _state
     15 # variable.  We only test the FileInput class, since the other functions
     16 # only provide a thin facade over FileInput.
     17 
     18 # Write lines (a list of lines) to temp file number i, and return the
     19 # temp file's name.
     20 def writeTmp(i, lines, mode='w'):  # opening in text mode is the default
     21     name = TESTFN + str(i)
     22     f = open(name, mode)
     23     f.writelines(lines)
     24     f.close()
     25     return name
     26 
     27 def remove_tempfiles(*names):
     28     for name in names:
     29         safe_unlink(name)
     30 
     31 class LineReader:
     32 
     33     def __init__(self):
     34         self._linesread = []
     35 
     36     @property
     37     def linesread(self):
     38         try:
     39             return self._linesread[:]
     40         finally:
     41             self._linesread = []
     42 
     43     def openhook(self, filename, mode):
     44         self.it = iter(filename.splitlines(True))
     45         return self
     46 
     47     def readline(self, size=None):
     48         line = next(self.it, '')
     49         self._linesread.append(line)
     50         return line
     51 
     52     def readlines(self, hint=-1):
     53         lines = []
     54         size = 0
     55         while True:
     56             line = self.readline()
     57             if not line:
     58                 return lines
     59             lines.append(line)
     60             size += len(line)
     61             if size >= hint:
     62                 return lines
     63 
     64     def close(self):
     65         pass
     66 
     67 class BufferSizesTests(unittest.TestCase):
     68     def test_buffer_sizes(self):
     69         # First, run the tests with default and teeny buffer size.
     70         for round, bs in (0, 0), (1, 30):
     71             try:
     72                 t1 = writeTmp(1, ["Line %s of file 1\n" % (i+1) for i in range(15)])
     73                 t2 = writeTmp(2, ["Line %s of file 2\n" % (i+1) for i in range(10)])
     74                 t3 = writeTmp(3, ["Line %s of file 3\n" % (i+1) for i in range(5)])
     75                 t4 = writeTmp(4, ["Line %s of file 4\n" % (i+1) for i in range(1)])
     76                 self.buffer_size_test(t1, t2, t3, t4, bs, round)
     77             finally:
     78                 remove_tempfiles(t1, t2, t3, t4)
     79 
     80     def buffer_size_test(self, t1, t2, t3, t4, bs=0, round=0):
     81         pat = re.compile(r'LINE (\d+) OF FILE (\d+)')
     82 
     83         start = 1 + round*6
     84         if verbose:
     85             print '%s. Simple iteration (bs=%s)' % (start+0, bs)
     86         fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
     87         lines = list(fi)
     88         fi.close()
     89         self.assertEqual(len(lines), 31)
     90         self.assertEqual(lines[4], 'Line 5 of file 1\n')
     91         self.assertEqual(lines[30], 'Line 1 of file 4\n')
     92         self.assertEqual(fi.lineno(), 31)
     93         self.assertEqual(fi.filename(), t4)
     94 
     95         if verbose:
     96             print '%s. Status variables (bs=%s)' % (start+1, bs)
     97         fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
     98         s = "x"
     99         while s and s != 'Line 6 of file 2\n':
    100             s = fi.readline()
    101         self.assertEqual(fi.filename(), t2)
    102         self.assertEqual(fi.lineno(), 21)
    103         self.assertEqual(fi.filelineno(), 6)
    104         self.assertFalse(fi.isfirstline())
    105         self.assertFalse(fi.isstdin())
    106 
    107         if verbose:
    108             print '%s. Nextfile (bs=%s)' % (start+2, bs)
    109         fi.nextfile()
    110         self.assertEqual(fi.readline(), 'Line 1 of file 3\n')
    111         self.assertEqual(fi.lineno(), 22)
    112         fi.close()
    113 
    114         if verbose:
    115             print '%s. Stdin (bs=%s)' % (start+3, bs)
    116         fi = FileInput(files=(t1, t2, t3, t4, '-'), bufsize=bs)
    117         savestdin = sys.stdin
    118         try:
    119             sys.stdin = StringIO("Line 1 of stdin\nLine 2 of stdin\n")
    120             lines = list(fi)
    121             self.assertEqual(len(lines), 33)
    122             self.assertEqual(lines[32], 'Line 2 of stdin\n')
    123             self.assertEqual(fi.filename(), '<stdin>')
    124             fi.nextfile()
    125         finally:
    126             sys.stdin = savestdin
    127 
    128         if verbose:
    129             print '%s. Boundary conditions (bs=%s)' % (start+4, bs)
    130         fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
    131         self.assertEqual(fi.lineno(), 0)
    132         self.assertEqual(fi.filename(), None)
    133         fi.nextfile()
    134         self.assertEqual(fi.lineno(), 0)
    135         self.assertEqual(fi.filename(), None)
    136 
    137         if verbose:
    138             print '%s. Inplace (bs=%s)' % (start+5, bs)
    139         savestdout = sys.stdout
    140         try:
    141             fi = FileInput(files=(t1, t2, t3, t4), inplace=1, bufsize=bs)
    142             for line in fi:
    143                 line = line[:-1].upper()
    144                 print line
    145             fi.close()
    146         finally:
    147             sys.stdout = savestdout
    148 
    149         fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
    150         for line in fi:
    151             self.assertEqual(line[-1], '\n')
    152             m = pat.match(line[:-1])
    153             self.assertNotEqual(m, None)
    154             self.assertEqual(int(m.group(1)), fi.filelineno())
    155         fi.close()
    156 
    157 class FileInputTests(unittest.TestCase):
    158     def test_zero_byte_files(self):
    159         try:
    160             t1 = writeTmp(1, [""])
    161             t2 = writeTmp(2, [""])
    162             t3 = writeTmp(3, ["The only line there is.\n"])
    163             t4 = writeTmp(4, [""])
    164             fi = FileInput(files=(t1, t2, t3, t4))
    165 
    166             line = fi.readline()
    167             self.assertEqual(line, 'The only line there is.\n')
    168             self.assertEqual(fi.lineno(), 1)
    169             self.assertEqual(fi.filelineno(), 1)
    170             self.assertEqual(fi.filename(), t3)
    171 
    172             line = fi.readline()
    173             self.assertFalse(line)
    174             self.assertEqual(fi.lineno(), 1)
    175             self.assertEqual(fi.filelineno(), 0)
    176             self.assertEqual(fi.filename(), t4)
    177             fi.close()
    178         finally:
    179             remove_tempfiles(t1, t2, t3, t4)
    180 
    181     def test_files_that_dont_end_with_newline(self):
    182         try:
    183             t1 = writeTmp(1, ["A\nB\nC"])
    184             t2 = writeTmp(2, ["D\nE\nF"])
    185             fi = FileInput(files=(t1, t2))
    186             lines = list(fi)
    187             self.assertEqual(lines, ["A\n", "B\n", "C", "D\n", "E\n", "F"])
    188             self.assertEqual(fi.filelineno(), 3)
    189             self.assertEqual(fi.lineno(), 6)
    190         finally:
    191             remove_tempfiles(t1, t2)
    192 
    193     def test_unicode_filenames(self):
    194         try:
    195             t1 = writeTmp(1, ["A\nB"])
    196             encoding = sys.getfilesystemencoding()
    197             if encoding is None:
    198                 encoding = 'ascii'
    199             fi = FileInput(files=unicode(t1, encoding))
    200             lines = list(fi)
    201             self.assertEqual(lines, ["A\n", "B"])
    202         finally:
    203             remove_tempfiles(t1)
    204 
    205     def test_fileno(self):
    206         try:
    207             t1 = writeTmp(1, ["A\nB"])
    208             t2 = writeTmp(2, ["C\nD"])
    209             fi = FileInput(files=(t1, t2))
    210             self.assertEqual(fi.fileno(), -1)
    211             line = fi.next()
    212             self.assertNotEqual(fi.fileno(), -1)
    213             fi.nextfile()
    214             self.assertEqual(fi.fileno(), -1)
    215             line = list(fi)
    216             self.assertEqual(fi.fileno(), -1)
    217         finally:
    218             remove_tempfiles(t1, t2)
    219 
    220     def test_opening_mode(self):
    221         try:
    222             # invalid mode, should raise ValueError
    223             fi = FileInput(mode="w")
    224             self.fail("FileInput should reject invalid mode argument")
    225         except ValueError:
    226             pass
    227         try:
    228             # try opening in universal newline mode
    229             t1 = writeTmp(1, ["A\nB\r\nC\rD"], mode="wb")
    230             fi = FileInput(files=t1, mode="U")
    231             lines = list(fi)
    232             self.assertEqual(lines, ["A\n", "B\n", "C\n", "D"])
    233         finally:
    234             remove_tempfiles(t1)
    235 
    236     def test_file_opening_hook(self):
    237         try:
    238             # cannot use openhook and inplace mode
    239             fi = FileInput(inplace=1, openhook=lambda f,m: None)
    240             self.fail("FileInput should raise if both inplace "
    241                              "and openhook arguments are given")
    242         except ValueError:
    243             pass
    244         try:
    245             fi = FileInput(openhook=1)
    246             self.fail("FileInput should check openhook for being callable")
    247         except ValueError:
    248             pass
    249         try:
    250             # UTF-7 is a convenient, seldom used encoding
    251             t1 = writeTmp(1, ['+AEE-\n+AEI-'], mode="wb")
    252             fi = FileInput(files=t1, openhook=hook_encoded("utf-7"))
    253             lines = list(fi)
    254             self.assertEqual(lines, [u'A\n', u'B'])
    255         finally:
    256             remove_tempfiles(t1)
    257 
    258     def test_readline(self):
    259         with open(TESTFN, 'wb') as f:
    260             f.write('A\nB\r\nC\r')
    261             # Fill TextIOWrapper buffer.
    262             f.write('123456789\n' * 1000)
    263             # Issue #20501: readline() shouldn't read whole file.
    264             f.write('\x80')
    265         self.addCleanup(safe_unlink, TESTFN)
    266 
    267         fi = FileInput(files=TESTFN, openhook=hook_encoded('ascii'))
    268         # The most likely failure is a UnicodeDecodeError due to the entire
    269         # file being read when it shouldn't have been.
    270         self.assertEqual(fi.readline(), u'A\n')
    271         self.assertEqual(fi.readline(), u'B\r\n')
    272         self.assertEqual(fi.readline(), u'C\r')
    273         with self.assertRaises(UnicodeDecodeError):
    274             # Read to the end of file.
    275             list(fi)
    276         fi.close()
    277 
    278     def test_readline_buffering(self):
    279         src = LineReader()
    280         fi = FileInput(files=['line1\nline2', 'line3\n'], openhook=src.openhook)
    281         self.assertEqual(src.linesread, [])
    282         self.assertEqual(fi.readline(), 'line1\n')
    283         self.assertEqual(src.linesread, ['line1\n'])
    284         self.assertEqual(fi.readline(), 'line2')
    285         self.assertEqual(src.linesread, ['line2'])
    286         self.assertEqual(fi.readline(), 'line3\n')
    287         self.assertEqual(src.linesread, ['', 'line3\n'])
    288         self.assertEqual(fi.readline(), '')
    289         self.assertEqual(src.linesread, [''])
    290         self.assertEqual(fi.readline(), '')
    291         self.assertEqual(src.linesread, [])
    292         fi.close()
    293 
    294     def test_iteration_buffering(self):
    295         src = LineReader()
    296         fi = FileInput(files=['line1\nline2', 'line3\n'], openhook=src.openhook)
    297         self.assertEqual(src.linesread, [])
    298         self.assertEqual(next(fi), 'line1\n')
    299         self.assertEqual(src.linesread, ['line1\n'])
    300         self.assertEqual(next(fi), 'line2')
    301         self.assertEqual(src.linesread, ['line2'])
    302         self.assertEqual(next(fi), 'line3\n')
    303         self.assertEqual(src.linesread, ['', 'line3\n'])
    304         self.assertRaises(StopIteration, next, fi)
    305         self.assertEqual(src.linesread, [''])
    306         self.assertRaises(StopIteration, next, fi)
    307         self.assertEqual(src.linesread, [])
    308         fi.close()
    309 
    310 class Test_hook_encoded(unittest.TestCase):
    311     """Unit tests for fileinput.hook_encoded()"""
    312 
    313     def test_modes(self):
    314         with open(TESTFN, 'wb') as f:
    315             # UTF-7 is a convenient, seldom used encoding
    316             f.write('A\nB\r\nC\rD+IKw-')
    317         self.addCleanup(safe_unlink, TESTFN)
    318 
    319         def check(mode, expected_lines):
    320             fi = FileInput(files=TESTFN, mode=mode,
    321                            openhook=hook_encoded('utf-7'))
    322             lines = list(fi)
    323             fi.close()
    324             self.assertEqual(lines, expected_lines)
    325 
    326         check('r', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
    327         check('rU', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
    328         check('U', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
    329         check('rb', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
    330 
    331 def test_main():
    332     run_unittest(BufferSizesTests, FileInputTests, Test_hook_encoded)
    333 
    334 if __name__ == "__main__":
    335     test_main()
    336