Home | History | Annotate | Download | only in test
      1 import difflib
      2 from test.support import run_unittest, findfile
      3 import unittest
      4 import doctest
      5 import sys
      6 
      7 
      8 class TestWithAscii(unittest.TestCase):
      9     def test_one_insert(self):
     10         sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
     11         self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
     12         self.assertEqual(list(sm.get_opcodes()),
     13             [   ('insert', 0, 0, 0, 1),
     14                 ('equal', 0, 100, 1, 101)])
     15         self.assertEqual(sm.bpopular, set())
     16         sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
     17         self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
     18         self.assertEqual(list(sm.get_opcodes()),
     19             [   ('equal', 0, 50, 0, 50),
     20                 ('insert', 50, 50, 50, 51),
     21                 ('equal', 50, 100, 51, 101)])
     22         self.assertEqual(sm.bpopular, set())
     23 
     24     def test_one_delete(self):
     25         sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
     26         self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
     27         self.assertEqual(list(sm.get_opcodes()),
     28             [   ('equal', 0, 40, 0, 40),
     29                 ('delete', 40, 41, 40, 40),
     30                 ('equal', 41, 81, 40, 80)])
     31 
     32     def test_bjunk(self):
     33         sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
     34                 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
     35         self.assertEqual(sm.bjunk, set())
     36 
     37         sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
     38                 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
     39         self.assertEqual(sm.bjunk, {' '})
     40 
     41         sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
     42                 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
     43         self.assertEqual(sm.bjunk, {' ', 'b'})
     44 
     45 
     46 class TestAutojunk(unittest.TestCase):
     47     """Tests for the autojunk parameter added in 2.7"""
     48     def test_one_insert_homogenous_sequence(self):
     49         # By default autojunk=True and the heuristic kicks in for a sequence
     50         # of length 200+
     51         seq1 = 'b' * 200
     52         seq2 = 'a' + 'b' * 200
     53 
     54         sm = difflib.SequenceMatcher(None, seq1, seq2)
     55         self.assertAlmostEqual(sm.ratio(), 0, places=3)
     56         self.assertEqual(sm.bpopular, {'b'})
     57 
     58         # Now turn the heuristic off
     59         sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
     60         self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
     61         self.assertEqual(sm.bpopular, set())
     62 
     63 
     64 class TestSFbugs(unittest.TestCase):
     65     def test_ratio_for_null_seqn(self):
     66         # Check clearing of SF bug 763023
     67         s = difflib.SequenceMatcher(None, [], [])
     68         self.assertEqual(s.ratio(), 1)
     69         self.assertEqual(s.quick_ratio(), 1)
     70         self.assertEqual(s.real_quick_ratio(), 1)
     71 
     72     def test_comparing_empty_lists(self):
     73         # Check fix for bug #979794
     74         group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
     75         self.assertRaises(StopIteration, next, group_gen)
     76         diff_gen = difflib.unified_diff([], [])
     77         self.assertRaises(StopIteration, next, diff_gen)
     78 
     79     def test_matching_blocks_cache(self):
     80         # Issue #21635
     81         s = difflib.SequenceMatcher(None, "abxcd", "abcd")
     82         first = s.get_matching_blocks()
     83         second = s.get_matching_blocks()
     84         self.assertEqual(second[0].size, 2)
     85         self.assertEqual(second[1].size, 2)
     86         self.assertEqual(second[2].size, 0)
     87 
     88     def test_added_tab_hint(self):
     89         # Check fix for bug #1488943
     90         diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
     91         self.assertEqual("- \tI am a buggy", diff[0])
     92         self.assertEqual("?            --\n", diff[1])
     93         self.assertEqual("+ \t\tI am a bug", diff[2])
     94         self.assertEqual("? +\n", diff[3])
     95 
     96 patch914575_from1 = """
     97    1. Beautiful is beTTer than ugly.
     98    2. Explicit is better than implicit.
     99    3. Simple is better than complex.
    100    4. Complex is better than complicated.
    101 """
    102 
    103 patch914575_to1 = """
    104    1. Beautiful is better than ugly.
    105    3.   Simple is better than complex.
    106    4. Complicated is better than complex.
    107    5. Flat is better than nested.
    108 """
    109 
    110 patch914575_nonascii_from1 = """
    111    1. Beautiful is beTTer than ugly.
    112    2. Explicit is better than mplct.
    113    3. Simple is better than complex.
    114    4. Complex is better than complicated.
    115 """
    116 
    117 patch914575_nonascii_to1 = """
    118    1. Beautiful is better than gly.
    119    3.   Smple is better than complex.
    120    4. Complicated is better than cmplex.
    121    5. Flat is better than nested.
    122 """
    123 
    124 patch914575_from2 = """
    125 \t\tLine 1: preceded by from:[tt] to:[ssss]
    126   \t\tLine 2: preceded by from:[sstt] to:[sssst]
    127   \t \tLine 3: preceded by from:[sstst] to:[ssssss]
    128 Line 4:  \thas from:[sst] to:[sss] after :
    129 Line 5: has from:[t] to:[ss] at end\t
    130 """
    131 
    132 patch914575_to2 = """
    133     Line 1: preceded by from:[tt] to:[ssss]
    134     \tLine 2: preceded by from:[sstt] to:[sssst]
    135       Line 3: preceded by from:[sstst] to:[ssssss]
    136 Line 4:   has from:[sst] to:[sss] after :
    137 Line 5: has from:[t] to:[ss] at end
    138 """
    139 
    140 patch914575_from3 = """line 0
    141 1234567890123456789012345689012345
    142 line 1
    143 line 2
    144 line 3
    145 line 4   changed
    146 line 5   changed
    147 line 6   changed
    148 line 7
    149 line 8  subtracted
    150 line 9
    151 1234567890123456789012345689012345
    152 short line
    153 just fits in!!
    154 just fits in two lines yup!!
    155 the end"""
    156 
    157 patch914575_to3 = """line 0
    158 1234567890123456789012345689012345
    159 line 1
    160 line 2    added
    161 line 3
    162 line 4   chanGEd
    163 line 5a  chanGed
    164 line 6a  changEd
    165 line 7
    166 line 8
    167 line 9
    168 1234567890
    169 another long line that needs to be wrapped
    170 just fitS in!!
    171 just fits in two lineS yup!!
    172 the end"""
    173 
    174 class TestSFpatches(unittest.TestCase):
    175 
    176     def test_html_diff(self):
    177         # Check SF patch 914575 for generating HTML differences
    178         f1a = ((patch914575_from1 + '123\n'*10)*3)
    179         t1a = (patch914575_to1 + '123\n'*10)*3
    180         f1b = '456\n'*10 + f1a
    181         t1b = '456\n'*10 + t1a
    182         f1a = f1a.splitlines()
    183         t1a = t1a.splitlines()
    184         f1b = f1b.splitlines()
    185         t1b = t1b.splitlines()
    186         f2 = patch914575_from2.splitlines()
    187         t2 = patch914575_to2.splitlines()
    188         f3 = patch914575_from3
    189         t3 = patch914575_to3
    190         i = difflib.HtmlDiff()
    191         j = difflib.HtmlDiff(tabsize=2)
    192         k = difflib.HtmlDiff(wrapcolumn=14)
    193 
    194         full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
    195         tables = '\n'.join(
    196             [
    197              '<h2>Context (first diff within numlines=5(default))</h2>',
    198              i.make_table(f1a,t1a,'from','to',context=True),
    199              '<h2>Context (first diff after numlines=5(default))</h2>',
    200              i.make_table(f1b,t1b,'from','to',context=True),
    201              '<h2>Context (numlines=6)</h2>',
    202              i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
    203              '<h2>Context (numlines=0)</h2>',
    204              i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
    205              '<h2>Same Context</h2>',
    206              i.make_table(f1a,f1a,'from','to',context=True),
    207              '<h2>Same Full</h2>',
    208              i.make_table(f1a,f1a,'from','to',context=False),
    209              '<h2>Empty Context</h2>',
    210              i.make_table([],[],'from','to',context=True),
    211              '<h2>Empty Full</h2>',
    212              i.make_table([],[],'from','to',context=False),
    213              '<h2>tabsize=2</h2>',
    214              j.make_table(f2,t2),
    215              '<h2>tabsize=default</h2>',
    216              i.make_table(f2,t2),
    217              '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
    218              k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
    219              '<h2>wrapcolumn=14,splitlines()</h2>',
    220              k.make_table(f3.splitlines(),t3.splitlines()),
    221              '<h2>wrapcolumn=14,splitlines(True)</h2>',
    222              k.make_table(f3.splitlines(True),t3.splitlines(True)),
    223              ])
    224         actual = full.replace('</body>','\n%s\n</body>' % tables)
    225 
    226         # temporarily uncomment next two lines to baseline this test
    227         #with open('test_difflib_expect.html','w') as fp:
    228         #    fp.write(actual)
    229 
    230         with open(findfile('test_difflib_expect.html')) as fp:
    231             self.assertEqual(actual, fp.read())
    232 
    233     def test_recursion_limit(self):
    234         # Check if the problem described in patch #1413711 exists.
    235         limit = sys.getrecursionlimit()
    236         old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
    237         new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
    238         difflib.SequenceMatcher(None, old, new).get_opcodes()
    239 
    240     def test_make_file_default_charset(self):
    241         html_diff = difflib.HtmlDiff()
    242         output = html_diff.make_file(patch914575_from1.splitlines(),
    243                                      patch914575_to1.splitlines())
    244         self.assertIn('content="text/html; charset=utf-8"', output)
    245 
    246     def test_make_file_iso88591_charset(self):
    247         html_diff = difflib.HtmlDiff()
    248         output = html_diff.make_file(patch914575_from1.splitlines(),
    249                                      patch914575_to1.splitlines(),
    250                                      charset='iso-8859-1')
    251         self.assertIn('content="text/html; charset=iso-8859-1"', output)
    252 
    253     def test_make_file_usascii_charset_with_nonascii_input(self):
    254         html_diff = difflib.HtmlDiff()
    255         output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
    256                                      patch914575_nonascii_to1.splitlines(),
    257                                      charset='us-ascii')
    258         self.assertIn('content="text/html; charset=us-ascii"', output)
    259         self.assertIn('&#305;mpl&#305;c&#305;t', output)
    260 
    261 
    262 class TestOutputFormat(unittest.TestCase):
    263     def test_tab_delimiter(self):
    264         args = ['one', 'two', 'Original', 'Current',
    265             '2005-01-26 23:30:50', '2010-04-02 10:20:52']
    266         ud = difflib.unified_diff(*args, lineterm='')
    267         self.assertEqual(list(ud)[0:2], [
    268                            "--- Original\t2005-01-26 23:30:50",
    269                            "+++ Current\t2010-04-02 10:20:52"])
    270         cd = difflib.context_diff(*args, lineterm='')
    271         self.assertEqual(list(cd)[0:2], [
    272                            "*** Original\t2005-01-26 23:30:50",
    273                            "--- Current\t2010-04-02 10:20:52"])
    274 
    275     def test_no_trailing_tab_on_empty_filedate(self):
    276         args = ['one', 'two', 'Original', 'Current']
    277         ud = difflib.unified_diff(*args, lineterm='')
    278         self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
    279 
    280         cd = difflib.context_diff(*args, lineterm='')
    281         self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
    282 
    283     def test_range_format_unified(self):
    284         # Per the diff spec at http://www.unix.org/single_unix_specification/
    285         spec = '''\
    286            Each <range> field shall be of the form:
    287              %1d", <beginning line number>  if the range contains exactly one line,
    288            and:
    289             "%1d,%1d", <beginning line number>, <number of lines> otherwise.
    290            If a range is empty, its beginning line number shall be the number of
    291            the line just before the range, or 0 if the empty range starts the file.
    292         '''
    293         fmt = difflib._format_range_unified
    294         self.assertEqual(fmt(3,3), '3,0')
    295         self.assertEqual(fmt(3,4), '4')
    296         self.assertEqual(fmt(3,5), '4,2')
    297         self.assertEqual(fmt(3,6), '4,3')
    298         self.assertEqual(fmt(0,0), '0,0')
    299 
    300     def test_range_format_context(self):
    301         # Per the diff spec at http://www.unix.org/single_unix_specification/
    302         spec = '''\
    303            The range of lines in file1 shall be written in the following format
    304            if the range contains two or more lines:
    305                "*** %d,%d ****\n", <beginning line number>, <ending line number>
    306            and the following format otherwise:
    307                "*** %d ****\n", <ending line number>
    308            The ending line number of an empty range shall be the number of the preceding line,
    309            or 0 if the range is at the start of the file.
    310 
    311            Next, the range of lines in file2 shall be written in the following format
    312            if the range contains two or more lines:
    313                "--- %d,%d ----\n", <beginning line number>, <ending line number>
    314            and the following format otherwise:
    315                "--- %d ----\n", <ending line number>
    316         '''
    317         fmt = difflib._format_range_context
    318         self.assertEqual(fmt(3,3), '3')
    319         self.assertEqual(fmt(3,4), '4')
    320         self.assertEqual(fmt(3,5), '4,5')
    321         self.assertEqual(fmt(3,6), '4,6')
    322         self.assertEqual(fmt(0,0), '0')
    323 
    324 
    325 class TestBytes(unittest.TestCase):
    326     # don't really care about the content of the output, just the fact
    327     # that it's bytes and we don't crash
    328     def check(self, diff):
    329         diff = list(diff)   # trigger exceptions first
    330         for line in diff:
    331             self.assertIsInstance(
    332                 line, bytes,
    333                 "all lines of diff should be bytes, but got: %r" % line)
    334 
    335     def test_byte_content(self):
    336         # if we receive byte strings, we return byte strings
    337         a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
    338         b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
    339 
    340         unified = difflib.unified_diff
    341         context = difflib.context_diff
    342 
    343         check = self.check
    344         check(difflib.diff_bytes(unified, a, a))
    345         check(difflib.diff_bytes(unified, a, b))
    346 
    347         # now with filenames (content and filenames are all bytes!)
    348         check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
    349         check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
    350 
    351         # and with filenames and dates
    352         check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
    353         check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
    354 
    355         # same all over again, with context diff
    356         check(difflib.diff_bytes(context, a, a))
    357         check(difflib.diff_bytes(context, a, b))
    358         check(difflib.diff_bytes(context, a, a, b'a', b'a'))
    359         check(difflib.diff_bytes(context, a, b, b'a', b'b'))
    360         check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
    361         check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
    362 
    363     def test_byte_filenames(self):
    364         # somebody renamed a file from ISO-8859-2 to UTF-8
    365         fna = b'\xb3odz.txt'    # "odz.txt"
    366         fnb = b'\xc5\x82odz.txt'
    367 
    368         # they transcoded the content at the same time
    369         a = [b'\xa3odz is a city in Poland.']
    370         b = [b'\xc5\x81odz is a city in Poland.']
    371 
    372         check = self.check
    373         unified = difflib.unified_diff
    374         context = difflib.context_diff
    375         check(difflib.diff_bytes(unified, a, b, fna, fnb))
    376         check(difflib.diff_bytes(context, a, b, fna, fnb))
    377 
    378         def assertDiff(expect, actual):
    379             # do not compare expect and equal as lists, because unittest
    380             # uses difflib to report difference between lists
    381             actual = list(actual)
    382             self.assertEqual(len(expect), len(actual))
    383             for e, a in zip(expect, actual):
    384                 self.assertEqual(e, a)
    385 
    386         expect = [
    387             b'--- \xb3odz.txt',
    388             b'+++ \xc5\x82odz.txt',
    389             b'@@ -1 +1 @@',
    390             b'-\xa3odz is a city in Poland.',
    391             b'+\xc5\x81odz is a city in Poland.',
    392         ]
    393         actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
    394         assertDiff(expect, actual)
    395 
    396         # with dates (plain ASCII)
    397         datea = b'2005-03-18'
    398         dateb = b'2005-03-19'
    399         check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
    400         check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
    401 
    402         expect = [
    403             # note the mixed encodings here: this is deeply wrong by every
    404             # tenet of Unicode, but it doesn't crash, it's parseable by
    405             # patch, and it's how UNIX(tm) diff behaves
    406             b'--- \xb3odz.txt\t2005-03-18',
    407             b'+++ \xc5\x82odz.txt\t2005-03-19',
    408             b'@@ -1 +1 @@',
    409             b'-\xa3odz is a city in Poland.',
    410             b'+\xc5\x81odz is a city in Poland.',
    411         ]
    412         actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
    413                                     lineterm=b'')
    414         assertDiff(expect, actual)
    415 
    416     def test_mixed_types_content(self):
    417         # type of input content must be consistent: all str or all bytes
    418         a = [b'hello']
    419         b = ['hello']
    420 
    421         unified = difflib.unified_diff
    422         context = difflib.context_diff
    423 
    424         expect = "lines to compare must be str, not bytes (b'hello')"
    425         self._assert_type_error(expect, unified, a, b)
    426         self._assert_type_error(expect, unified, b, a)
    427         self._assert_type_error(expect, context, a, b)
    428         self._assert_type_error(expect, context, b, a)
    429 
    430         expect = "all arguments must be bytes, not str ('hello')"
    431         self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
    432         self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
    433         self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
    434         self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
    435 
    436     def test_mixed_types_filenames(self):
    437         # cannot pass filenames as bytes if content is str (this may not be
    438         # the right behaviour, but at least the test demonstrates how
    439         # things work)
    440         a = ['hello\n']
    441         b = ['ohell\n']
    442         fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
    443         fnb = b'ol\xc3a9.txt'   # to UTF-8
    444         self._assert_type_error(
    445             "all arguments must be str, not: b'ol\\xe9.txt'",
    446             difflib.unified_diff, a, b, fna, fnb)
    447 
    448     def test_mixed_types_dates(self):
    449         # type of dates must be consistent with type of contents
    450         a = [b'foo\n']
    451         b = [b'bar\n']
    452         datea = '1 fv'
    453         dateb = '3 fv'
    454         self._assert_type_error(
    455             "all arguments must be bytes, not str ('1 fv')",
    456             difflib.diff_bytes, difflib.unified_diff,
    457             a, b, b'a', b'b', datea, dateb)
    458 
    459         # if input is str, non-ASCII dates are fine
    460         a = ['foo\n']
    461         b = ['bar\n']
    462         list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
    463 
    464     def _assert_type_error(self, msg, generator, *args):
    465         with self.assertRaises(TypeError) as ctx:
    466             list(generator(*args))
    467         self.assertEqual(msg, str(ctx.exception))
    468 
    469 
    470 def test_main():
    471     difflib.HtmlDiff._default_prefix = 0
    472     Doctests = doctest.DocTestSuite(difflib)
    473     run_unittest(
    474         TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
    475         TestOutputFormat, TestBytes, Doctests)
    476 
    477 if __name__ == '__main__':
    478     test_main()
    479