Home | History | Annotate | Download | only in test
      1 import difflib
      2 from test.support import run_unittest, findfile
      3 import unittest
      4 import doctest
      5 import sys
      6 
      7 
      8 class TestWithAscii(unittest.TestCase):
      9     def test_one_insert(self):
     10         sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
     11         self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
     12         self.assertEqual(list(sm.get_opcodes()),
     13             [   ('insert', 0, 0, 0, 1),
     14                 ('equal', 0, 100, 1, 101)])
     15         self.assertEqual(sm.bpopular, set())
     16         sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
     17         self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
     18         self.assertEqual(list(sm.get_opcodes()),
     19             [   ('equal', 0, 50, 0, 50),
     20                 ('insert', 50, 50, 50, 51),
     21                 ('equal', 50, 100, 51, 101)])
     22         self.assertEqual(sm.bpopular, set())
     23 
     24     def test_one_delete(self):
     25         sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
     26         self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
     27         self.assertEqual(list(sm.get_opcodes()),
     28             [   ('equal', 0, 40, 0, 40),
     29                 ('delete', 40, 41, 40, 40),
     30                 ('equal', 41, 81, 40, 80)])
     31 
     32     def test_bjunk(self):
     33         sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
     34                 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
     35         self.assertEqual(sm.bjunk, set())
     36 
     37         sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
     38                 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
     39         self.assertEqual(sm.bjunk, {' '})
     40 
     41         sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
     42                 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
     43         self.assertEqual(sm.bjunk, {' ', 'b'})
     44 
     45 
     46 class TestAutojunk(unittest.TestCase):
     47     """Tests for the autojunk parameter added in 2.7"""
     48     def test_one_insert_homogenous_sequence(self):
     49         # By default autojunk=True and the heuristic kicks in for a sequence
     50         # of length 200+
     51         seq1 = 'b' * 200
     52         seq2 = 'a' + 'b' * 200
     53 
     54         sm = difflib.SequenceMatcher(None, seq1, seq2)
     55         self.assertAlmostEqual(sm.ratio(), 0, places=3)
     56         self.assertEqual(sm.bpopular, {'b'})
     57 
     58         # Now turn the heuristic off
     59         sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
     60         self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
     61         self.assertEqual(sm.bpopular, set())
     62 
     63 
     64 class TestSFbugs(unittest.TestCase):
     65     def test_ratio_for_null_seqn(self):
     66         # Check clearing of SF bug 763023
     67         s = difflib.SequenceMatcher(None, [], [])
     68         self.assertEqual(s.ratio(), 1)
     69         self.assertEqual(s.quick_ratio(), 1)
     70         self.assertEqual(s.real_quick_ratio(), 1)
     71 
     72     def test_comparing_empty_lists(self):
     73         # Check fix for bug #979794
     74         group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
     75         self.assertRaises(StopIteration, next, group_gen)
     76         diff_gen = difflib.unified_diff([], [])
     77         self.assertRaises(StopIteration, next, diff_gen)
     78 
     79     def test_matching_blocks_cache(self):
     80         # Issue #21635
     81         s = difflib.SequenceMatcher(None, "abxcd", "abcd")
     82         first = s.get_matching_blocks()
     83         second = s.get_matching_blocks()
     84         self.assertEqual(second[0].size, 2)
     85         self.assertEqual(second[1].size, 2)
     86         self.assertEqual(second[2].size, 0)
     87 
     88     def test_added_tab_hint(self):
     89         # Check fix for bug #1488943
     90         diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
     91         self.assertEqual("- \tI am a buggy", diff[0])
     92         self.assertEqual("?            --\n", diff[1])
     93         self.assertEqual("+ \t\tI am a bug", diff[2])
     94         self.assertEqual("? +\n", diff[3])
     95 
     96     def test_mdiff_catch_stop_iteration(self):
     97         # Issue #33224
     98         self.assertEqual(
     99             list(difflib._mdiff(["2"], ["3"], 1)),
    100             [((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)],
    101         )
    102 
    103 
    104 patch914575_from1 = """
    105    1. Beautiful is beTTer than ugly.
    106    2. Explicit is better than implicit.
    107    3. Simple is better than complex.
    108    4. Complex is better than complicated.
    109 """
    110 
    111 patch914575_to1 = """
    112    1. Beautiful is better than ugly.
    113    3.   Simple is better than complex.
    114    4. Complicated is better than complex.
    115    5. Flat is better than nested.
    116 """
    117 
    118 patch914575_nonascii_from1 = """
    119    1. Beautiful is beTTer than ugly.
    120    2. Explicit is better than mplct.
    121    3. Simple is better than complex.
    122    4. Complex is better than complicated.
    123 """
    124 
    125 patch914575_nonascii_to1 = """
    126    1. Beautiful is better than gly.
    127    3.   Smple is better than complex.
    128    4. Complicated is better than cmplex.
    129    5. Flat is better than nested.
    130 """
    131 
    132 patch914575_from2 = """
    133 \t\tLine 1: preceded by from:[tt] to:[ssss]
    134   \t\tLine 2: preceded by from:[sstt] to:[sssst]
    135   \t \tLine 3: preceded by from:[sstst] to:[ssssss]
    136 Line 4:  \thas from:[sst] to:[sss] after :
    137 Line 5: has from:[t] to:[ss] at end\t
    138 """
    139 
    140 patch914575_to2 = """
    141     Line 1: preceded by from:[tt] to:[ssss]
    142     \tLine 2: preceded by from:[sstt] to:[sssst]
    143       Line 3: preceded by from:[sstst] to:[ssssss]
    144 Line 4:   has from:[sst] to:[sss] after :
    145 Line 5: has from:[t] to:[ss] at end
    146 """
    147 
    148 patch914575_from3 = """line 0
    149 1234567890123456789012345689012345
    150 line 1
    151 line 2
    152 line 3
    153 line 4   changed
    154 line 5   changed
    155 line 6   changed
    156 line 7
    157 line 8  subtracted
    158 line 9
    159 1234567890123456789012345689012345
    160 short line
    161 just fits in!!
    162 just fits in two lines yup!!
    163 the end"""
    164 
    165 patch914575_to3 = """line 0
    166 1234567890123456789012345689012345
    167 line 1
    168 line 2    added
    169 line 3
    170 line 4   chanGEd
    171 line 5a  chanGed
    172 line 6a  changEd
    173 line 7
    174 line 8
    175 line 9
    176 1234567890
    177 another long line that needs to be wrapped
    178 just fitS in!!
    179 just fits in two lineS yup!!
    180 the end"""
    181 
    182 class TestSFpatches(unittest.TestCase):
    183 
    184     def test_html_diff(self):
    185         # Check SF patch 914575 for generating HTML differences
    186         f1a = ((patch914575_from1 + '123\n'*10)*3)
    187         t1a = (patch914575_to1 + '123\n'*10)*3
    188         f1b = '456\n'*10 + f1a
    189         t1b = '456\n'*10 + t1a
    190         f1a = f1a.splitlines()
    191         t1a = t1a.splitlines()
    192         f1b = f1b.splitlines()
    193         t1b = t1b.splitlines()
    194         f2 = patch914575_from2.splitlines()
    195         t2 = patch914575_to2.splitlines()
    196         f3 = patch914575_from3
    197         t3 = patch914575_to3
    198         i = difflib.HtmlDiff()
    199         j = difflib.HtmlDiff(tabsize=2)
    200         k = difflib.HtmlDiff(wrapcolumn=14)
    201 
    202         full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
    203         tables = '\n'.join(
    204             [
    205              '<h2>Context (first diff within numlines=5(default))</h2>',
    206              i.make_table(f1a,t1a,'from','to',context=True),
    207              '<h2>Context (first diff after numlines=5(default))</h2>',
    208              i.make_table(f1b,t1b,'from','to',context=True),
    209              '<h2>Context (numlines=6)</h2>',
    210              i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
    211              '<h2>Context (numlines=0)</h2>',
    212              i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
    213              '<h2>Same Context</h2>',
    214              i.make_table(f1a,f1a,'from','to',context=True),
    215              '<h2>Same Full</h2>',
    216              i.make_table(f1a,f1a,'from','to',context=False),
    217              '<h2>Empty Context</h2>',
    218              i.make_table([],[],'from','to',context=True),
    219              '<h2>Empty Full</h2>',
    220              i.make_table([],[],'from','to',context=False),
    221              '<h2>tabsize=2</h2>',
    222              j.make_table(f2,t2),
    223              '<h2>tabsize=default</h2>',
    224              i.make_table(f2,t2),
    225              '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
    226              k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
    227              '<h2>wrapcolumn=14,splitlines()</h2>',
    228              k.make_table(f3.splitlines(),t3.splitlines()),
    229              '<h2>wrapcolumn=14,splitlines(True)</h2>',
    230              k.make_table(f3.splitlines(True),t3.splitlines(True)),
    231              ])
    232         actual = full.replace('</body>','\n%s\n</body>' % tables)
    233 
    234         # temporarily uncomment next two lines to baseline this test
    235         #with open('test_difflib_expect.html','w') as fp:
    236         #    fp.write(actual)
    237 
    238         with open(findfile('test_difflib_expect.html')) as fp:
    239             self.assertEqual(actual, fp.read())
    240 
    241     def test_recursion_limit(self):
    242         # Check if the problem described in patch #1413711 exists.
    243         limit = sys.getrecursionlimit()
    244         old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
    245         new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
    246         difflib.SequenceMatcher(None, old, new).get_opcodes()
    247 
    248     def test_make_file_default_charset(self):
    249         html_diff = difflib.HtmlDiff()
    250         output = html_diff.make_file(patch914575_from1.splitlines(),
    251                                      patch914575_to1.splitlines())
    252         self.assertIn('content="text/html; charset=utf-8"', output)
    253 
    254     def test_make_file_iso88591_charset(self):
    255         html_diff = difflib.HtmlDiff()
    256         output = html_diff.make_file(patch914575_from1.splitlines(),
    257                                      patch914575_to1.splitlines(),
    258                                      charset='iso-8859-1')
    259         self.assertIn('content="text/html; charset=iso-8859-1"', output)
    260 
    261     def test_make_file_usascii_charset_with_nonascii_input(self):
    262         html_diff = difflib.HtmlDiff()
    263         output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
    264                                      patch914575_nonascii_to1.splitlines(),
    265                                      charset='us-ascii')
    266         self.assertIn('content="text/html; charset=us-ascii"', output)
    267         self.assertIn('&#305;mpl&#305;c&#305;t', output)
    268 
    269 
    270 class TestOutputFormat(unittest.TestCase):
    271     def test_tab_delimiter(self):
    272         args = ['one', 'two', 'Original', 'Current',
    273             '2005-01-26 23:30:50', '2010-04-02 10:20:52']
    274         ud = difflib.unified_diff(*args, lineterm='')
    275         self.assertEqual(list(ud)[0:2], [
    276                            "--- Original\t2005-01-26 23:30:50",
    277                            "+++ Current\t2010-04-02 10:20:52"])
    278         cd = difflib.context_diff(*args, lineterm='')
    279         self.assertEqual(list(cd)[0:2], [
    280                            "*** Original\t2005-01-26 23:30:50",
    281                            "--- Current\t2010-04-02 10:20:52"])
    282 
    283     def test_no_trailing_tab_on_empty_filedate(self):
    284         args = ['one', 'two', 'Original', 'Current']
    285         ud = difflib.unified_diff(*args, lineterm='')
    286         self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
    287 
    288         cd = difflib.context_diff(*args, lineterm='')
    289         self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
    290 
    291     def test_range_format_unified(self):
    292         # Per the diff spec at http://www.unix.org/single_unix_specification/
    293         spec = '''\
    294            Each <range> field shall be of the form:
    295              %1d", <beginning line number>  if the range contains exactly one line,
    296            and:
    297             "%1d,%1d", <beginning line number>, <number of lines> otherwise.
    298            If a range is empty, its beginning line number shall be the number of
    299            the line just before the range, or 0 if the empty range starts the file.
    300         '''
    301         fmt = difflib._format_range_unified
    302         self.assertEqual(fmt(3,3), '3,0')
    303         self.assertEqual(fmt(3,4), '4')
    304         self.assertEqual(fmt(3,5), '4,2')
    305         self.assertEqual(fmt(3,6), '4,3')
    306         self.assertEqual(fmt(0,0), '0,0')
    307 
    308     def test_range_format_context(self):
    309         # Per the diff spec at http://www.unix.org/single_unix_specification/
    310         spec = '''\
    311            The range of lines in file1 shall be written in the following format
    312            if the range contains two or more lines:
    313                "*** %d,%d ****\n", <beginning line number>, <ending line number>
    314            and the following format otherwise:
    315                "*** %d ****\n", <ending line number>
    316            The ending line number of an empty range shall be the number of the preceding line,
    317            or 0 if the range is at the start of the file.
    318 
    319            Next, the range of lines in file2 shall be written in the following format
    320            if the range contains two or more lines:
    321                "--- %d,%d ----\n", <beginning line number>, <ending line number>
    322            and the following format otherwise:
    323                "--- %d ----\n", <ending line number>
    324         '''
    325         fmt = difflib._format_range_context
    326         self.assertEqual(fmt(3,3), '3')
    327         self.assertEqual(fmt(3,4), '4')
    328         self.assertEqual(fmt(3,5), '4,5')
    329         self.assertEqual(fmt(3,6), '4,6')
    330         self.assertEqual(fmt(0,0), '0')
    331 
    332 
    333 class TestBytes(unittest.TestCase):
    334     # don't really care about the content of the output, just the fact
    335     # that it's bytes and we don't crash
    336     def check(self, diff):
    337         diff = list(diff)   # trigger exceptions first
    338         for line in diff:
    339             self.assertIsInstance(
    340                 line, bytes,
    341                 "all lines of diff should be bytes, but got: %r" % line)
    342 
    343     def test_byte_content(self):
    344         # if we receive byte strings, we return byte strings
    345         a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
    346         b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
    347 
    348         unified = difflib.unified_diff
    349         context = difflib.context_diff
    350 
    351         check = self.check
    352         check(difflib.diff_bytes(unified, a, a))
    353         check(difflib.diff_bytes(unified, a, b))
    354 
    355         # now with filenames (content and filenames are all bytes!)
    356         check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
    357         check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
    358 
    359         # and with filenames and dates
    360         check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
    361         check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
    362 
    363         # same all over again, with context diff
    364         check(difflib.diff_bytes(context, a, a))
    365         check(difflib.diff_bytes(context, a, b))
    366         check(difflib.diff_bytes(context, a, a, b'a', b'a'))
    367         check(difflib.diff_bytes(context, a, b, b'a', b'b'))
    368         check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
    369         check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
    370 
    371     def test_byte_filenames(self):
    372         # somebody renamed a file from ISO-8859-2 to UTF-8
    373         fna = b'\xb3odz.txt'    # "odz.txt"
    374         fnb = b'\xc5\x82odz.txt'
    375 
    376         # they transcoded the content at the same time
    377         a = [b'\xa3odz is a city in Poland.']
    378         b = [b'\xc5\x81odz is a city in Poland.']
    379 
    380         check = self.check
    381         unified = difflib.unified_diff
    382         context = difflib.context_diff
    383         check(difflib.diff_bytes(unified, a, b, fna, fnb))
    384         check(difflib.diff_bytes(context, a, b, fna, fnb))
    385 
    386         def assertDiff(expect, actual):
    387             # do not compare expect and equal as lists, because unittest
    388             # uses difflib to report difference between lists
    389             actual = list(actual)
    390             self.assertEqual(len(expect), len(actual))
    391             for e, a in zip(expect, actual):
    392                 self.assertEqual(e, a)
    393 
    394         expect = [
    395             b'--- \xb3odz.txt',
    396             b'+++ \xc5\x82odz.txt',
    397             b'@@ -1 +1 @@',
    398             b'-\xa3odz is a city in Poland.',
    399             b'+\xc5\x81odz is a city in Poland.',
    400         ]
    401         actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
    402         assertDiff(expect, actual)
    403 
    404         # with dates (plain ASCII)
    405         datea = b'2005-03-18'
    406         dateb = b'2005-03-19'
    407         check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
    408         check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
    409 
    410         expect = [
    411             # note the mixed encodings here: this is deeply wrong by every
    412             # tenet of Unicode, but it doesn't crash, it's parseable by
    413             # patch, and it's how UNIX(tm) diff behaves
    414             b'--- \xb3odz.txt\t2005-03-18',
    415             b'+++ \xc5\x82odz.txt\t2005-03-19',
    416             b'@@ -1 +1 @@',
    417             b'-\xa3odz is a city in Poland.',
    418             b'+\xc5\x81odz is a city in Poland.',
    419         ]
    420         actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
    421                                     lineterm=b'')
    422         assertDiff(expect, actual)
    423 
    424     def test_mixed_types_content(self):
    425         # type of input content must be consistent: all str or all bytes
    426         a = [b'hello']
    427         b = ['hello']
    428 
    429         unified = difflib.unified_diff
    430         context = difflib.context_diff
    431 
    432         expect = "lines to compare must be str, not bytes (b'hello')"
    433         self._assert_type_error(expect, unified, a, b)
    434         self._assert_type_error(expect, unified, b, a)
    435         self._assert_type_error(expect, context, a, b)
    436         self._assert_type_error(expect, context, b, a)
    437 
    438         expect = "all arguments must be bytes, not str ('hello')"
    439         self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
    440         self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
    441         self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
    442         self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
    443 
    444     def test_mixed_types_filenames(self):
    445         # cannot pass filenames as bytes if content is str (this may not be
    446         # the right behaviour, but at least the test demonstrates how
    447         # things work)
    448         a = ['hello\n']
    449         b = ['ohell\n']
    450         fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
    451         fnb = b'ol\xc3a9.txt'   # to UTF-8
    452         self._assert_type_error(
    453             "all arguments must be str, not: b'ol\\xe9.txt'",
    454             difflib.unified_diff, a, b, fna, fnb)
    455 
    456     def test_mixed_types_dates(self):
    457         # type of dates must be consistent with type of contents
    458         a = [b'foo\n']
    459         b = [b'bar\n']
    460         datea = '1 fv'
    461         dateb = '3 fv'
    462         self._assert_type_error(
    463             "all arguments must be bytes, not str ('1 fv')",
    464             difflib.diff_bytes, difflib.unified_diff,
    465             a, b, b'a', b'b', datea, dateb)
    466 
    467         # if input is str, non-ASCII dates are fine
    468         a = ['foo\n']
    469         b = ['bar\n']
    470         list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
    471 
    472     def _assert_type_error(self, msg, generator, *args):
    473         with self.assertRaises(TypeError) as ctx:
    474             list(generator(*args))
    475         self.assertEqual(msg, str(ctx.exception))
    476 
    477 class TestJunkAPIs(unittest.TestCase):
    478     def test_is_line_junk_true(self):
    479         for line in ['#', '  ', ' #', '# ', ' # ', '']:
    480             self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))
    481 
    482     def test_is_line_junk_false(self):
    483         for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:
    484             self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))
    485 
    486     def test_is_line_junk_REDOS(self):
    487         evil_input = ('\t' * 1000000) + '##'
    488         self.assertFalse(difflib.IS_LINE_JUNK(evil_input))
    489 
    490     def test_is_character_junk_true(self):
    491         for char in [' ', '\t']:
    492             self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))
    493 
    494     def test_is_character_junk_false(self):
    495         for char in ['a', '#', '\n', '\f', '\r', '\v']:
    496             self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
    497 
    498 def test_main():
    499     difflib.HtmlDiff._default_prefix = 0
    500     Doctests = doctest.DocTestSuite(difflib)
    501     run_unittest(
    502         TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
    503         TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
    504 
    505 if __name__ == '__main__':
    506     test_main()
    507