1 # XXX TypeErrors on calling handlers, or on bad return values from a 2 # handler, are obscure and unhelpful. 3 4 import StringIO, sys 5 import unittest 6 7 from xml.parsers import expat 8 9 from test import test_support 10 from test.test_support import sortdict, run_unittest 11 12 13 class SetAttributeTest(unittest.TestCase): 14 def setUp(self): 15 self.parser = expat.ParserCreate(namespace_separator='!') 16 self.set_get_pairs = [ 17 [0, 0], 18 [1, 1], 19 [2, 1], 20 [0, 0], 21 ] 22 23 def test_returns_unicode(self): 24 for x, y in self.set_get_pairs: 25 self.parser.returns_unicode = x 26 self.assertEqual(self.parser.returns_unicode, y) 27 28 def test_ordered_attributes(self): 29 for x, y in self.set_get_pairs: 30 self.parser.ordered_attributes = x 31 self.assertEqual(self.parser.ordered_attributes, y) 32 33 def test_specified_attributes(self): 34 for x, y in self.set_get_pairs: 35 self.parser.specified_attributes = x 36 self.assertEqual(self.parser.specified_attributes, y) 37 38 39 data = '''\ 40 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> 41 <?xml-stylesheet href="stylesheet.css"?> 42 <!-- comment data --> 43 <!DOCTYPE quotations SYSTEM "quotations.dtd" [ 44 <!ELEMENT root ANY> 45 <!NOTATION notation SYSTEM "notation.jpeg"> 46 <!ENTITY acirc "â"> 47 <!ENTITY external_entity SYSTEM "entity.file"> 48 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> 49 %unparsed_entity; 50 ]> 51 52 <root attr1="value1" attr2="value2ὀ"> 53 <myns:subelement xmlns:myns="http://www.python.org/namespace"> 54 Contents of subelements 55 </myns:subelement> 56 <sub2><![CDATA[contents of CDATA section]]></sub2> 57 &external_entity; 58 </root> 59 ''' 60 61 62 # Produce UTF-8 output 63 class ParseTest(unittest.TestCase): 64 class Outputter: 65 def __init__(self): 66 self.out = [] 67 68 def StartElementHandler(self, name, attrs): 69 self.out.append('Start element: ' + repr(name) + ' ' + 70 sortdict(attrs)) 71 72 def EndElementHandler(self, name): 73 self.out.append('End element: ' + repr(name)) 74 75 def CharacterDataHandler(self, data): 76 data = data.strip() 77 if data: 78 self.out.append('Character data: ' + repr(data)) 79 80 def ProcessingInstructionHandler(self, target, data): 81 self.out.append('PI: ' + repr(target) + ' ' + repr(data)) 82 83 def StartNamespaceDeclHandler(self, prefix, uri): 84 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) 85 86 def EndNamespaceDeclHandler(self, prefix): 87 self.out.append('End of NS decl: ' + repr(prefix)) 88 89 def StartCdataSectionHandler(self): 90 self.out.append('Start of CDATA section') 91 92 def EndCdataSectionHandler(self): 93 self.out.append('End of CDATA section') 94 95 def CommentHandler(self, text): 96 self.out.append('Comment: ' + repr(text)) 97 98 def NotationDeclHandler(self, *args): 99 name, base, sysid, pubid = args 100 self.out.append('Notation declared: %s' %(args,)) 101 102 def UnparsedEntityDeclHandler(self, *args): 103 entityName, base, systemId, publicId, notationName = args 104 self.out.append('Unparsed entity decl: %s' %(args,)) 105 106 def NotStandaloneHandler(self, userData): 107 self.out.append('Not standalone') 108 return 1 109 110 def ExternalEntityRefHandler(self, *args): 111 context, base, sysId, pubId = args 112 self.out.append('External entity ref: %s' %(args[1:],)) 113 return 1 114 115 def DefaultHandler(self, userData): 116 pass 117 118 def DefaultHandlerExpand(self, userData): 119 pass 120 121 handler_names = [ 122 'StartElementHandler', 'EndElementHandler', 123 'CharacterDataHandler', 'ProcessingInstructionHandler', 124 'UnparsedEntityDeclHandler', 'NotationDeclHandler', 125 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', 126 'CommentHandler', 'StartCdataSectionHandler', 127 'EndCdataSectionHandler', 128 'DefaultHandler', 'DefaultHandlerExpand', 129 #'NotStandaloneHandler', 130 'ExternalEntityRefHandler' 131 ] 132 133 def test_utf8(self): 134 135 out = self.Outputter() 136 parser = expat.ParserCreate(namespace_separator='!') 137 for name in self.handler_names: 138 setattr(parser, name, getattr(out, name)) 139 parser.returns_unicode = 0 140 parser.Parse(data, 1) 141 142 # Verify output 143 op = out.out 144 self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') 145 self.assertEqual(op[1], "Comment: ' comment data '") 146 self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") 147 self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") 148 self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}") 149 self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") 150 self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") 151 self.assertEqual(op[7], "Character data: 'Contents of subelements'") 152 self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'") 153 self.assertEqual(op[9], "End of NS decl: 'myns'") 154 self.assertEqual(op[10], "Start element: 'sub2' {}") 155 self.assertEqual(op[11], 'Start of CDATA section') 156 self.assertEqual(op[12], "Character data: 'contents of CDATA section'") 157 self.assertEqual(op[13], 'End of CDATA section') 158 self.assertEqual(op[14], "End element: 'sub2'") 159 self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") 160 self.assertEqual(op[16], "End element: 'root'") 161 162 def test_unicode(self): 163 # Try the parse again, this time producing Unicode output 164 out = self.Outputter() 165 parser = expat.ParserCreate(namespace_separator='!') 166 parser.returns_unicode = 1 167 for name in self.handler_names: 168 setattr(parser, name, getattr(out, name)) 169 170 parser.Parse(data, 1) 171 172 op = out.out 173 self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') 174 self.assertEqual(op[1], "Comment: u' comment data '") 175 self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") 176 self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") 177 self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") 178 self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") 179 self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") 180 self.assertEqual(op[7], "Character data: u'Contents of subelements'") 181 self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") 182 self.assertEqual(op[9], "End of NS decl: u'myns'") 183 self.assertEqual(op[10], "Start element: u'sub2' {}") 184 self.assertEqual(op[11], 'Start of CDATA section') 185 self.assertEqual(op[12], "Character data: u'contents of CDATA section'") 186 self.assertEqual(op[13], 'End of CDATA section') 187 self.assertEqual(op[14], "End element: u'sub2'") 188 self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") 189 self.assertEqual(op[16], "End element: u'root'") 190 191 def test_parse_file(self): 192 # Try parsing a file 193 out = self.Outputter() 194 parser = expat.ParserCreate(namespace_separator='!') 195 parser.returns_unicode = 1 196 for name in self.handler_names: 197 setattr(parser, name, getattr(out, name)) 198 file = StringIO.StringIO(data) 199 200 parser.ParseFile(file) 201 202 op = out.out 203 self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') 204 self.assertEqual(op[1], "Comment: u' comment data '") 205 self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") 206 self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") 207 self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") 208 self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") 209 self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") 210 self.assertEqual(op[7], "Character data: u'Contents of subelements'") 211 self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") 212 self.assertEqual(op[9], "End of NS decl: u'myns'") 213 self.assertEqual(op[10], "Start element: u'sub2' {}") 214 self.assertEqual(op[11], 'Start of CDATA section') 215 self.assertEqual(op[12], "Character data: u'contents of CDATA section'") 216 self.assertEqual(op[13], 'End of CDATA section') 217 self.assertEqual(op[14], "End element: u'sub2'") 218 self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") 219 self.assertEqual(op[16], "End element: u'root'") 220 221 # Issue 4877: expat.ParseFile causes segfault on a closed file. 222 fp = open(test_support.TESTFN, 'wb') 223 try: 224 fp.close() 225 parser = expat.ParserCreate() 226 with self.assertRaises(ValueError): 227 parser.ParseFile(fp) 228 finally: 229 test_support.unlink(test_support.TESTFN) 230 231 232 class NamespaceSeparatorTest(unittest.TestCase): 233 def test_legal(self): 234 # Tests that make sure we get errors when the namespace_separator value 235 # is illegal, and that we don't for good values: 236 expat.ParserCreate() 237 expat.ParserCreate(namespace_separator=None) 238 expat.ParserCreate(namespace_separator=' ') 239 240 def test_illegal(self): 241 try: 242 expat.ParserCreate(namespace_separator=42) 243 self.fail() 244 except TypeError, e: 245 self.assertEqual(str(e), 246 'ParserCreate() argument 2 must be string or None, not int') 247 248 try: 249 expat.ParserCreate(namespace_separator='too long') 250 self.fail() 251 except ValueError, e: 252 self.assertEqual(str(e), 253 'namespace_separator must be at most one character, omitted, or None') 254 255 def test_zero_length(self): 256 # ParserCreate() needs to accept a namespace_separator of zero length 257 # to satisfy the requirements of RDF applications that are required 258 # to simply glue together the namespace URI and the localname. Though 259 # considered a wart of the RDF specifications, it needs to be supported. 260 # 261 # See XML-SIG mailing list thread starting with 262 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html 263 # 264 expat.ParserCreate(namespace_separator='') # too short 265 266 267 class InterningTest(unittest.TestCase): 268 def test(self): 269 # Test the interning machinery. 270 p = expat.ParserCreate() 271 L = [] 272 def collector(name, *args): 273 L.append(name) 274 p.StartElementHandler = collector 275 p.EndElementHandler = collector 276 p.Parse("<e> <e/> <e></e> </e>", 1) 277 tag = L[0] 278 self.assertEqual(len(L), 6) 279 for entry in L: 280 # L should have the same string repeated over and over. 281 self.assertTrue(tag is entry) 282 283 284 class BufferTextTest(unittest.TestCase): 285 def setUp(self): 286 self.stuff = [] 287 self.parser = expat.ParserCreate() 288 self.parser.buffer_text = 1 289 self.parser.CharacterDataHandler = self.CharacterDataHandler 290 291 def check(self, expected, label): 292 self.assertEqual(self.stuff, expected, 293 "%s\nstuff = %r\nexpected = %r" 294 % (label, self.stuff, map(unicode, expected))) 295 296 def CharacterDataHandler(self, text): 297 self.stuff.append(text) 298 299 def StartElementHandler(self, name, attrs): 300 self.stuff.append("<%s>" % name) 301 bt = attrs.get("buffer-text") 302 if bt == "yes": 303 self.parser.buffer_text = 1 304 elif bt == "no": 305 self.parser.buffer_text = 0 306 307 def EndElementHandler(self, name): 308 self.stuff.append("</%s>" % name) 309 310 def CommentHandler(self, data): 311 self.stuff.append("<!--%s-->" % data) 312 313 def setHandlers(self, handlers=[]): 314 for name in handlers: 315 setattr(self.parser, name, getattr(self, name)) 316 317 def test_default_to_disabled(self): 318 parser = expat.ParserCreate() 319 self.assertFalse(parser.buffer_text) 320 321 def test_buffering_enabled(self): 322 # Make sure buffering is turned on 323 self.assertTrue(self.parser.buffer_text) 324 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) 325 self.assertEqual(self.stuff, ['123'], 326 "buffered text not properly collapsed") 327 328 def test1(self): 329 # XXX This test exposes more detail of Expat's text chunking than we 330 # XXX like, but it tests what we need to concisely. 331 self.setHandlers(["StartElementHandler"]) 332 self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) 333 self.assertEqual(self.stuff, 334 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], 335 "buffering control not reacting as expected") 336 337 def test2(self): 338 self.parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) 339 self.assertEqual(self.stuff, ["1<2> \n 3"], 340 "buffered text not properly collapsed") 341 342 def test3(self): 343 self.setHandlers(["StartElementHandler"]) 344 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) 345 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], 346 "buffered text not properly split") 347 348 def test4(self): 349 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 350 self.parser.CharacterDataHandler = None 351 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) 352 self.assertEqual(self.stuff, 353 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) 354 355 def test5(self): 356 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 357 self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1) 358 self.assertEqual(self.stuff, 359 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) 360 361 def test6(self): 362 self.setHandlers(["CommentHandler", "EndElementHandler", 363 "StartElementHandler"]) 364 self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) 365 self.assertEqual(self.stuff, 366 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], 367 "buffered text not properly split") 368 369 def test7(self): 370 self.setHandlers(["CommentHandler", "EndElementHandler", 371 "StartElementHandler"]) 372 self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) 373 self.assertEqual(self.stuff, 374 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", 375 "<!--abc-->", "4", "<!--def-->", "5", "</a>"], 376 "buffered text not properly split") 377 378 379 # Test handling of exception from callback: 380 class HandlerExceptionTest(unittest.TestCase): 381 def StartElementHandler(self, name, attrs): 382 raise RuntimeError(name) 383 384 def test(self): 385 parser = expat.ParserCreate() 386 parser.StartElementHandler = self.StartElementHandler 387 try: 388 parser.Parse("<a><b><c/></b></a>", 1) 389 self.fail() 390 except RuntimeError, e: 391 self.assertEqual(e.args[0], 'a', 392 "Expected RuntimeError for element 'a', but" + \ 393 " found %r" % e.args[0]) 394 395 396 # Test Current* members: 397 class PositionTest(unittest.TestCase): 398 def StartElementHandler(self, name, attrs): 399 self.check_pos('s') 400 401 def EndElementHandler(self, name): 402 self.check_pos('e') 403 404 def check_pos(self, event): 405 pos = (event, 406 self.parser.CurrentByteIndex, 407 self.parser.CurrentLineNumber, 408 self.parser.CurrentColumnNumber) 409 self.assertTrue(self.upto < len(self.expected_list), 410 'too many parser events') 411 expected = self.expected_list[self.upto] 412 self.assertEqual(pos, expected, 413 'Expected position %s, got position %s' %(pos, expected)) 414 self.upto += 1 415 416 def test(self): 417 self.parser = expat.ParserCreate() 418 self.parser.StartElementHandler = self.StartElementHandler 419 self.parser.EndElementHandler = self.EndElementHandler 420 self.upto = 0 421 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), 422 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] 423 424 xml = '<a>\n <b>\n <c/>\n </b>\n</a>' 425 self.parser.Parse(xml, 1) 426 427 428 class sf1296433Test(unittest.TestCase): 429 def test_parse_only_xml_data(self): 430 # http://python.org/sf/1296433 431 # 432 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) 433 # this one doesn't crash 434 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) 435 436 class SpecificException(Exception): 437 pass 438 439 def handler(text): 440 raise SpecificException 441 442 parser = expat.ParserCreate() 443 parser.CharacterDataHandler = handler 444 445 self.assertRaises(Exception, parser.Parse, xml) 446 447 class ChardataBufferTest(unittest.TestCase): 448 """ 449 test setting of chardata buffer size 450 """ 451 452 def test_1025_bytes(self): 453 self.assertEqual(self.small_buffer_test(1025), 2) 454 455 def test_1000_bytes(self): 456 self.assertEqual(self.small_buffer_test(1000), 1) 457 458 def test_wrong_size(self): 459 parser = expat.ParserCreate() 460 parser.buffer_text = 1 461 def f(size): 462 parser.buffer_size = size 463 464 self.assertRaises(TypeError, f, sys.maxint+1) 465 self.assertRaises(ValueError, f, -1) 466 self.assertRaises(ValueError, f, 0) 467 468 def test_unchanged_size(self): 469 xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) 470 xml2 = 'a'*512 + '</s>' 471 parser = expat.ParserCreate() 472 parser.CharacterDataHandler = self.counting_handler 473 parser.buffer_size = 512 474 parser.buffer_text = 1 475 476 # Feed 512 bytes of character data: the handler should be called 477 # once. 478 self.n = 0 479 parser.Parse(xml1) 480 self.assertEqual(self.n, 1) 481 482 # Reassign to buffer_size, but assign the same size. 483 parser.buffer_size = parser.buffer_size 484 self.assertEqual(self.n, 1) 485 486 # Try parsing rest of the document 487 parser.Parse(xml2) 488 self.assertEqual(self.n, 2) 489 490 491 def test_disabling_buffer(self): 492 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512) 493 xml2 = ('b' * 1024) 494 xml3 = "%s</a>" % ('c' * 1024) 495 parser = expat.ParserCreate() 496 parser.CharacterDataHandler = self.counting_handler 497 parser.buffer_text = 1 498 parser.buffer_size = 1024 499 self.assertEqual(parser.buffer_size, 1024) 500 501 # Parse one chunk of XML 502 self.n = 0 503 parser.Parse(xml1, 0) 504 self.assertEqual(parser.buffer_size, 1024) 505 self.assertEqual(self.n, 1) 506 507 # Turn off buffering and parse the next chunk. 508 parser.buffer_text = 0 509 self.assertFalse(parser.buffer_text) 510 self.assertEqual(parser.buffer_size, 1024) 511 for i in range(10): 512 parser.Parse(xml2, 0) 513 self.assertEqual(self.n, 11) 514 515 parser.buffer_text = 1 516 self.assertTrue(parser.buffer_text) 517 self.assertEqual(parser.buffer_size, 1024) 518 parser.Parse(xml3, 1) 519 self.assertEqual(self.n, 12) 520 521 522 523 def make_document(self, bytes): 524 return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>') 525 526 def counting_handler(self, text): 527 self.n += 1 528 529 def small_buffer_test(self, buffer_len): 530 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len) 531 parser = expat.ParserCreate() 532 parser.CharacterDataHandler = self.counting_handler 533 parser.buffer_size = 1024 534 parser.buffer_text = 1 535 536 self.n = 0 537 parser.Parse(xml) 538 return self.n 539 540 def test_change_size_1(self): 541 xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024) 542 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) 543 parser = expat.ParserCreate() 544 parser.CharacterDataHandler = self.counting_handler 545 parser.buffer_text = 1 546 parser.buffer_size = 1024 547 self.assertEqual(parser.buffer_size, 1024) 548 549 self.n = 0 550 parser.Parse(xml1, 0) 551 parser.buffer_size *= 2 552 self.assertEqual(parser.buffer_size, 2048) 553 parser.Parse(xml2, 1) 554 self.assertEqual(self.n, 2) 555 556 def test_change_size_2(self): 557 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023) 558 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) 559 parser = expat.ParserCreate() 560 parser.CharacterDataHandler = self.counting_handler 561 parser.buffer_text = 1 562 parser.buffer_size = 2048 563 self.assertEqual(parser.buffer_size, 2048) 564 565 self.n=0 566 parser.Parse(xml1, 0) 567 parser.buffer_size //= 2 568 self.assertEqual(parser.buffer_size, 1024) 569 parser.Parse(xml2, 1) 570 self.assertEqual(self.n, 4) 571 572 class MalformedInputText(unittest.TestCase): 573 def test1(self): 574 xml = "\0\r\n" 575 parser = expat.ParserCreate() 576 try: 577 parser.Parse(xml, True) 578 self.fail() 579 except expat.ExpatError as e: 580 self.assertEqual(str(e), 'unclosed token: line 2, column 0') 581 582 def test2(self): 583 xml = "<?xml version\xc2\x85='1.0'?>\r\n" 584 parser = expat.ParserCreate() 585 try: 586 parser.Parse(xml, True) 587 self.fail() 588 except expat.ExpatError as e: 589 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14') 590 591 class ForeignDTDTests(unittest.TestCase): 592 """ 593 Tests for the UseForeignDTD method of expat parser objects. 594 """ 595 def test_use_foreign_dtd(self): 596 """ 597 If UseForeignDTD is passed True and a document without an external 598 entity reference is parsed, ExternalEntityRefHandler is first called 599 with None for the public and system ids. 600 """ 601 handler_call_args = [] 602 def resolve_entity(context, base, system_id, public_id): 603 handler_call_args.append((public_id, system_id)) 604 return 1 605 606 parser = expat.ParserCreate() 607 parser.UseForeignDTD(True) 608 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 609 parser.ExternalEntityRefHandler = resolve_entity 610 parser.Parse("<?xml version='1.0'?><element/>") 611 self.assertEqual(handler_call_args, [(None, None)]) 612 613 # test UseForeignDTD() is equal to UseForeignDTD(True) 614 handler_call_args[:] = [] 615 616 parser = expat.ParserCreate() 617 parser.UseForeignDTD() 618 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 619 parser.ExternalEntityRefHandler = resolve_entity 620 parser.Parse("<?xml version='1.0'?><element/>") 621 self.assertEqual(handler_call_args, [(None, None)]) 622 623 def test_ignore_use_foreign_dtd(self): 624 """ 625 If UseForeignDTD is passed True and a document with an external 626 entity reference is parsed, ExternalEntityRefHandler is called with 627 the public and system ids from the document. 628 """ 629 handler_call_args = [] 630 def resolve_entity(context, base, system_id, public_id): 631 handler_call_args.append((public_id, system_id)) 632 return 1 633 634 parser = expat.ParserCreate() 635 parser.UseForeignDTD(True) 636 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 637 parser.ExternalEntityRefHandler = resolve_entity 638 parser.Parse( 639 "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") 640 self.assertEqual(handler_call_args, [("bar", "baz")]) 641 642 643 def test_main(): 644 run_unittest(SetAttributeTest, 645 ParseTest, 646 NamespaceSeparatorTest, 647 InterningTest, 648 BufferTextTest, 649 HandlerExceptionTest, 650 PositionTest, 651 sf1296433Test, 652 ChardataBufferTest, 653 MalformedInputText, 654 ForeignDTDTests) 655 656 if __name__ == "__main__": 657 test_main() 658