1 import difflib 2 from test.support import run_unittest, findfile 3 import unittest 4 import doctest 5 import sys 6 7 8 class TestWithAscii(unittest.TestCase): 9 def test_one_insert(self): 10 sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) 11 self.assertAlmostEqual(sm.ratio(), 0.995, places=3) 12 self.assertEqual(list(sm.get_opcodes()), 13 [ ('insert', 0, 0, 0, 1), 14 ('equal', 0, 100, 1, 101)]) 15 self.assertEqual(sm.bpopular, set()) 16 sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) 17 self.assertAlmostEqual(sm.ratio(), 0.995, places=3) 18 self.assertEqual(list(sm.get_opcodes()), 19 [ ('equal', 0, 50, 0, 50), 20 ('insert', 50, 50, 50, 51), 21 ('equal', 50, 100, 51, 101)]) 22 self.assertEqual(sm.bpopular, set()) 23 24 def test_one_delete(self): 25 sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) 26 self.assertAlmostEqual(sm.ratio(), 0.994, places=3) 27 self.assertEqual(list(sm.get_opcodes()), 28 [ ('equal', 0, 40, 0, 40), 29 ('delete', 40, 41, 40, 40), 30 ('equal', 41, 81, 40, 80)]) 31 32 def test_bjunk(self): 33 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', 34 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40) 35 self.assertEqual(sm.bjunk, set()) 36 37 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', 38 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) 39 self.assertEqual(sm.bjunk, {' '}) 40 41 sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'], 42 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) 43 self.assertEqual(sm.bjunk, {' ', 'b'}) 44 45 46 class TestAutojunk(unittest.TestCase): 47 """Tests for the autojunk parameter added in 2.7""" 48 def test_one_insert_homogenous_sequence(self): 49 # By default autojunk=True and the heuristic kicks in for a sequence 50 # of length 200+ 51 seq1 = 'b' * 200 52 seq2 = 'a' + 'b' * 200 53 54 sm = difflib.SequenceMatcher(None, seq1, seq2) 55 self.assertAlmostEqual(sm.ratio(), 0, places=3) 56 self.assertEqual(sm.bpopular, {'b'}) 57 58 # Now turn the heuristic off 59 sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) 60 self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) 61 self.assertEqual(sm.bpopular, set()) 62 63 64 class TestSFbugs(unittest.TestCase): 65 def test_ratio_for_null_seqn(self): 66 # Check clearing of SF bug 763023 67 s = difflib.SequenceMatcher(None, [], []) 68 self.assertEqual(s.ratio(), 1) 69 self.assertEqual(s.quick_ratio(), 1) 70 self.assertEqual(s.real_quick_ratio(), 1) 71 72 def test_comparing_empty_lists(self): 73 # Check fix for bug #979794 74 group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() 75 self.assertRaises(StopIteration, next, group_gen) 76 diff_gen = difflib.unified_diff([], []) 77 self.assertRaises(StopIteration, next, diff_gen) 78 79 def test_matching_blocks_cache(self): 80 # Issue #21635 81 s = difflib.SequenceMatcher(None, "abxcd", "abcd") 82 first = s.get_matching_blocks() 83 second = s.get_matching_blocks() 84 self.assertEqual(second[0].size, 2) 85 self.assertEqual(second[1].size, 2) 86 self.assertEqual(second[2].size, 0) 87 88 def test_added_tab_hint(self): 89 # Check fix for bug #1488943 90 diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) 91 self.assertEqual("- \tI am a buggy", diff[0]) 92 self.assertEqual("? --\n", diff[1]) 93 self.assertEqual("+ \t\tI am a bug", diff[2]) 94 self.assertEqual("? +\n", diff[3]) 95 96 patch914575_from1 = """ 97 1. Beautiful is beTTer than ugly. 98 2. Explicit is better than implicit. 99 3. Simple is better than complex. 100 4. Complex is better than complicated. 101 """ 102 103 patch914575_to1 = """ 104 1. Beautiful is better than ugly. 105 3. Simple is better than complex. 106 4. Complicated is better than complex. 107 5. Flat is better than nested. 108 """ 109 110 patch914575_nonascii_from1 = """ 111 1. Beautiful is beTTer than ugly. 112 2. Explicit is better than mplct. 113 3. Simple is better than complex. 114 4. Complex is better than complicated. 115 """ 116 117 patch914575_nonascii_to1 = """ 118 1. Beautiful is better than gly. 119 3. Smple is better than complex. 120 4. Complicated is better than cmplex. 121 5. Flat is better than nested. 122 """ 123 124 patch914575_from2 = """ 125 \t\tLine 1: preceded by from:[tt] to:[ssss] 126 \t\tLine 2: preceded by from:[sstt] to:[sssst] 127 \t \tLine 3: preceded by from:[sstst] to:[ssssss] 128 Line 4: \thas from:[sst] to:[sss] after : 129 Line 5: has from:[t] to:[ss] at end\t 130 """ 131 132 patch914575_to2 = """ 133 Line 1: preceded by from:[tt] to:[ssss] 134 \tLine 2: preceded by from:[sstt] to:[sssst] 135 Line 3: preceded by from:[sstst] to:[ssssss] 136 Line 4: has from:[sst] to:[sss] after : 137 Line 5: has from:[t] to:[ss] at end 138 """ 139 140 patch914575_from3 = """line 0 141 1234567890123456789012345689012345 142 line 1 143 line 2 144 line 3 145 line 4 changed 146 line 5 changed 147 line 6 changed 148 line 7 149 line 8 subtracted 150 line 9 151 1234567890123456789012345689012345 152 short line 153 just fits in!! 154 just fits in two lines yup!! 155 the end""" 156 157 patch914575_to3 = """line 0 158 1234567890123456789012345689012345 159 line 1 160 line 2 added 161 line 3 162 line 4 chanGEd 163 line 5a chanGed 164 line 6a changEd 165 line 7 166 line 8 167 line 9 168 1234567890 169 another long line that needs to be wrapped 170 just fitS in!! 171 just fits in two lineS yup!! 172 the end""" 173 174 class TestSFpatches(unittest.TestCase): 175 176 def test_html_diff(self): 177 # Check SF patch 914575 for generating HTML differences 178 f1a = ((patch914575_from1 + '123\n'*10)*3) 179 t1a = (patch914575_to1 + '123\n'*10)*3 180 f1b = '456\n'*10 + f1a 181 t1b = '456\n'*10 + t1a 182 f1a = f1a.splitlines() 183 t1a = t1a.splitlines() 184 f1b = f1b.splitlines() 185 t1b = t1b.splitlines() 186 f2 = patch914575_from2.splitlines() 187 t2 = patch914575_to2.splitlines() 188 f3 = patch914575_from3 189 t3 = patch914575_to3 190 i = difflib.HtmlDiff() 191 j = difflib.HtmlDiff(tabsize=2) 192 k = difflib.HtmlDiff(wrapcolumn=14) 193 194 full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) 195 tables = '\n'.join( 196 [ 197 '<h2>Context (first diff within numlines=5(default))</h2>', 198 i.make_table(f1a,t1a,'from','to',context=True), 199 '<h2>Context (first diff after numlines=5(default))</h2>', 200 i.make_table(f1b,t1b,'from','to',context=True), 201 '<h2>Context (numlines=6)</h2>', 202 i.make_table(f1a,t1a,'from','to',context=True,numlines=6), 203 '<h2>Context (numlines=0)</h2>', 204 i.make_table(f1a,t1a,'from','to',context=True,numlines=0), 205 '<h2>Same Context</h2>', 206 i.make_table(f1a,f1a,'from','to',context=True), 207 '<h2>Same Full</h2>', 208 i.make_table(f1a,f1a,'from','to',context=False), 209 '<h2>Empty Context</h2>', 210 i.make_table([],[],'from','to',context=True), 211 '<h2>Empty Full</h2>', 212 i.make_table([],[],'from','to',context=False), 213 '<h2>tabsize=2</h2>', 214 j.make_table(f2,t2), 215 '<h2>tabsize=default</h2>', 216 i.make_table(f2,t2), 217 '<h2>Context (wrapcolumn=14,numlines=0)</h2>', 218 k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), 219 '<h2>wrapcolumn=14,splitlines()</h2>', 220 k.make_table(f3.splitlines(),t3.splitlines()), 221 '<h2>wrapcolumn=14,splitlines(True)</h2>', 222 k.make_table(f3.splitlines(True),t3.splitlines(True)), 223 ]) 224 actual = full.replace('</body>','\n%s\n</body>' % tables) 225 226 # temporarily uncomment next two lines to baseline this test 227 #with open('test_difflib_expect.html','w') as fp: 228 # fp.write(actual) 229 230 with open(findfile('test_difflib_expect.html')) as fp: 231 self.assertEqual(actual, fp.read()) 232 233 def test_recursion_limit(self): 234 # Check if the problem described in patch #1413711 exists. 235 limit = sys.getrecursionlimit() 236 old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] 237 new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] 238 difflib.SequenceMatcher(None, old, new).get_opcodes() 239 240 def test_make_file_default_charset(self): 241 html_diff = difflib.HtmlDiff() 242 output = html_diff.make_file(patch914575_from1.splitlines(), 243 patch914575_to1.splitlines()) 244 self.assertIn('content="text/html; charset=utf-8"', output) 245 246 def test_make_file_iso88591_charset(self): 247 html_diff = difflib.HtmlDiff() 248 output = html_diff.make_file(patch914575_from1.splitlines(), 249 patch914575_to1.splitlines(), 250 charset='iso-8859-1') 251 self.assertIn('content="text/html; charset=iso-8859-1"', output) 252 253 def test_make_file_usascii_charset_with_nonascii_input(self): 254 html_diff = difflib.HtmlDiff() 255 output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), 256 patch914575_nonascii_to1.splitlines(), 257 charset='us-ascii') 258 self.assertIn('content="text/html; charset=us-ascii"', output) 259 self.assertIn('ımplıcıt', output) 260 261 262 class TestOutputFormat(unittest.TestCase): 263 def test_tab_delimiter(self): 264 args = ['one', 'two', 'Original', 'Current', 265 '2005-01-26 23:30:50', '2010-04-02 10:20:52'] 266 ud = difflib.unified_diff(*args, lineterm='') 267 self.assertEqual(list(ud)[0:2], [ 268 "--- Original\t2005-01-26 23:30:50", 269 "+++ Current\t2010-04-02 10:20:52"]) 270 cd = difflib.context_diff(*args, lineterm='') 271 self.assertEqual(list(cd)[0:2], [ 272 "*** Original\t2005-01-26 23:30:50", 273 "--- Current\t2010-04-02 10:20:52"]) 274 275 def test_no_trailing_tab_on_empty_filedate(self): 276 args = ['one', 'two', 'Original', 'Current'] 277 ud = difflib.unified_diff(*args, lineterm='') 278 self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) 279 280 cd = difflib.context_diff(*args, lineterm='') 281 self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) 282 283 def test_range_format_unified(self): 284 # Per the diff spec at http://www.unix.org/single_unix_specification/ 285 spec = '''\ 286 Each <range> field shall be of the form: 287 %1d", <beginning line number> if the range contains exactly one line, 288 and: 289 "%1d,%1d", <beginning line number>, <number of lines> otherwise. 290 If a range is empty, its beginning line number shall be the number of 291 the line just before the range, or 0 if the empty range starts the file. 292 ''' 293 fmt = difflib._format_range_unified 294 self.assertEqual(fmt(3,3), '3,0') 295 self.assertEqual(fmt(3,4), '4') 296 self.assertEqual(fmt(3,5), '4,2') 297 self.assertEqual(fmt(3,6), '4,3') 298 self.assertEqual(fmt(0,0), '0,0') 299 300 def test_range_format_context(self): 301 # Per the diff spec at http://www.unix.org/single_unix_specification/ 302 spec = '''\ 303 The range of lines in file1 shall be written in the following format 304 if the range contains two or more lines: 305 "*** %d,%d ****\n", <beginning line number>, <ending line number> 306 and the following format otherwise: 307 "*** %d ****\n", <ending line number> 308 The ending line number of an empty range shall be the number of the preceding line, 309 or 0 if the range is at the start of the file. 310 311 Next, the range of lines in file2 shall be written in the following format 312 if the range contains two or more lines: 313 "--- %d,%d ----\n", <beginning line number>, <ending line number> 314 and the following format otherwise: 315 "--- %d ----\n", <ending line number> 316 ''' 317 fmt = difflib._format_range_context 318 self.assertEqual(fmt(3,3), '3') 319 self.assertEqual(fmt(3,4), '4') 320 self.assertEqual(fmt(3,5), '4,5') 321 self.assertEqual(fmt(3,6), '4,6') 322 self.assertEqual(fmt(0,0), '0') 323 324 325 class TestBytes(unittest.TestCase): 326 # don't really care about the content of the output, just the fact 327 # that it's bytes and we don't crash 328 def check(self, diff): 329 diff = list(diff) # trigger exceptions first 330 for line in diff: 331 self.assertIsInstance( 332 line, bytes, 333 "all lines of diff should be bytes, but got: %r" % line) 334 335 def test_byte_content(self): 336 # if we receive byte strings, we return byte strings 337 a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes 338 b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes 339 340 unified = difflib.unified_diff 341 context = difflib.context_diff 342 343 check = self.check 344 check(difflib.diff_bytes(unified, a, a)) 345 check(difflib.diff_bytes(unified, a, b)) 346 347 # now with filenames (content and filenames are all bytes!) 348 check(difflib.diff_bytes(unified, a, a, b'a', b'a')) 349 check(difflib.diff_bytes(unified, a, b, b'a', b'b')) 350 351 # and with filenames and dates 352 check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013')) 353 check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013')) 354 355 # same all over again, with context diff 356 check(difflib.diff_bytes(context, a, a)) 357 check(difflib.diff_bytes(context, a, b)) 358 check(difflib.diff_bytes(context, a, a, b'a', b'a')) 359 check(difflib.diff_bytes(context, a, b, b'a', b'b')) 360 check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013')) 361 check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013')) 362 363 def test_byte_filenames(self): 364 # somebody renamed a file from ISO-8859-2 to UTF-8 365 fna = b'\xb3odz.txt' # "odz.txt" 366 fnb = b'\xc5\x82odz.txt' 367 368 # they transcoded the content at the same time 369 a = [b'\xa3odz is a city in Poland.'] 370 b = [b'\xc5\x81odz is a city in Poland.'] 371 372 check = self.check 373 unified = difflib.unified_diff 374 context = difflib.context_diff 375 check(difflib.diff_bytes(unified, a, b, fna, fnb)) 376 check(difflib.diff_bytes(context, a, b, fna, fnb)) 377 378 def assertDiff(expect, actual): 379 # do not compare expect and equal as lists, because unittest 380 # uses difflib to report difference between lists 381 actual = list(actual) 382 self.assertEqual(len(expect), len(actual)) 383 for e, a in zip(expect, actual): 384 self.assertEqual(e, a) 385 386 expect = [ 387 b'--- \xb3odz.txt', 388 b'+++ \xc5\x82odz.txt', 389 b'@@ -1 +1 @@', 390 b'-\xa3odz is a city in Poland.', 391 b'+\xc5\x81odz is a city in Poland.', 392 ] 393 actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'') 394 assertDiff(expect, actual) 395 396 # with dates (plain ASCII) 397 datea = b'2005-03-18' 398 dateb = b'2005-03-19' 399 check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb)) 400 check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb)) 401 402 expect = [ 403 # note the mixed encodings here: this is deeply wrong by every 404 # tenet of Unicode, but it doesn't crash, it's parseable by 405 # patch, and it's how UNIX(tm) diff behaves 406 b'--- \xb3odz.txt\t2005-03-18', 407 b'+++ \xc5\x82odz.txt\t2005-03-19', 408 b'@@ -1 +1 @@', 409 b'-\xa3odz is a city in Poland.', 410 b'+\xc5\x81odz is a city in Poland.', 411 ] 412 actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb, 413 lineterm=b'') 414 assertDiff(expect, actual) 415 416 def test_mixed_types_content(self): 417 # type of input content must be consistent: all str or all bytes 418 a = [b'hello'] 419 b = ['hello'] 420 421 unified = difflib.unified_diff 422 context = difflib.context_diff 423 424 expect = "lines to compare must be str, not bytes (b'hello')" 425 self._assert_type_error(expect, unified, a, b) 426 self._assert_type_error(expect, unified, b, a) 427 self._assert_type_error(expect, context, a, b) 428 self._assert_type_error(expect, context, b, a) 429 430 expect = "all arguments must be bytes, not str ('hello')" 431 self._assert_type_error(expect, difflib.diff_bytes, unified, a, b) 432 self._assert_type_error(expect, difflib.diff_bytes, unified, b, a) 433 self._assert_type_error(expect, difflib.diff_bytes, context, a, b) 434 self._assert_type_error(expect, difflib.diff_bytes, context, b, a) 435 436 def test_mixed_types_filenames(self): 437 # cannot pass filenames as bytes if content is str (this may not be 438 # the right behaviour, but at least the test demonstrates how 439 # things work) 440 a = ['hello\n'] 441 b = ['ohell\n'] 442 fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1 443 fnb = b'ol\xc3a9.txt' # to UTF-8 444 self._assert_type_error( 445 "all arguments must be str, not: b'ol\\xe9.txt'", 446 difflib.unified_diff, a, b, fna, fnb) 447 448 def test_mixed_types_dates(self): 449 # type of dates must be consistent with type of contents 450 a = [b'foo\n'] 451 b = [b'bar\n'] 452 datea = '1 fv' 453 dateb = '3 fv' 454 self._assert_type_error( 455 "all arguments must be bytes, not str ('1 fv')", 456 difflib.diff_bytes, difflib.unified_diff, 457 a, b, b'a', b'b', datea, dateb) 458 459 # if input is str, non-ASCII dates are fine 460 a = ['foo\n'] 461 b = ['bar\n'] 462 list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) 463 464 def _assert_type_error(self, msg, generator, *args): 465 with self.assertRaises(TypeError) as ctx: 466 list(generator(*args)) 467 self.assertEqual(msg, str(ctx.exception)) 468 469 470 def test_main(): 471 difflib.HtmlDiff._default_prefix = 0 472 Doctests = doctest.DocTestSuite(difflib) 473 run_unittest( 474 TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, 475 TestOutputFormat, TestBytes, Doctests) 476 477 if __name__ == '__main__': 478 test_main() 479