1 # regression test for SAX 2.0 -*- coding: utf-8 -*- 2 # $Id$ 3 4 from xml.sax import make_parser, ContentHandler, \ 5 SAXException, SAXReaderNotAvailable, SAXParseException 6 try: 7 make_parser() 8 except SAXReaderNotAvailable: 9 # don't try to test this module if we cannot create a parser 10 raise ImportError("no XML parsers available") 11 from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ 12 XMLFilterBase, prepare_input_source 13 from xml.sax.expatreader import create_parser 14 from xml.sax.handler import feature_namespaces 15 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl 16 from cStringIO import StringIO 17 import io 18 import gc 19 import os.path 20 import shutil 21 import test.test_support as support 22 from test.test_support import findfile, run_unittest, TESTFN 23 import unittest 24 25 TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") 26 TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") 27 28 supports_unicode_filenames = True 29 if not os.path.supports_unicode_filenames: 30 try: 31 support.TESTFN_UNICODE.encode(support.TESTFN_ENCODING) 32 except (AttributeError, UnicodeError, TypeError): 33 # Either the file system encoding is None, or the file name 34 # cannot be encoded in the file system encoding. 35 supports_unicode_filenames = False 36 requires_unicode_filenames = unittest.skipUnless( 37 supports_unicode_filenames, 38 'Requires unicode filenames support') 39 40 ns_uri = "http://www.python.org/xml-ns/saxtest/" 41 42 class XmlTestBase(unittest.TestCase): 43 def verify_empty_attrs(self, attrs): 44 self.assertRaises(KeyError, attrs.getValue, "attr") 45 self.assertRaises(KeyError, attrs.getValueByQName, "attr") 46 self.assertRaises(KeyError, attrs.getNameByQName, "attr") 47 self.assertRaises(KeyError, attrs.getQNameByName, "attr") 48 self.assertRaises(KeyError, attrs.__getitem__, "attr") 49 self.assertEqual(attrs.getLength(), 0) 50 self.assertEqual(attrs.getNames(), []) 51 self.assertEqual(attrs.getQNames(), []) 52 self.assertEqual(len(attrs), 0) 53 self.assertFalse(attrs.has_key("attr")) 54 self.assertEqual(attrs.keys(), []) 55 self.assertEqual(attrs.get("attrs"), None) 56 self.assertEqual(attrs.get("attrs", 25), 25) 57 self.assertEqual(attrs.items(), []) 58 self.assertEqual(attrs.values(), []) 59 60 def verify_empty_nsattrs(self, attrs): 61 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr")) 62 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr") 63 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr") 64 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr")) 65 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr")) 66 self.assertEqual(attrs.getLength(), 0) 67 self.assertEqual(attrs.getNames(), []) 68 self.assertEqual(attrs.getQNames(), []) 69 self.assertEqual(len(attrs), 0) 70 self.assertFalse(attrs.has_key((ns_uri, "attr"))) 71 self.assertEqual(attrs.keys(), []) 72 self.assertEqual(attrs.get((ns_uri, "attr")), None) 73 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25) 74 self.assertEqual(attrs.items(), []) 75 self.assertEqual(attrs.values(), []) 76 77 def verify_attrs_wattr(self, attrs): 78 self.assertEqual(attrs.getLength(), 1) 79 self.assertEqual(attrs.getNames(), ["attr"]) 80 self.assertEqual(attrs.getQNames(), ["attr"]) 81 self.assertEqual(len(attrs), 1) 82 self.assertTrue(attrs.has_key("attr")) 83 self.assertEqual(attrs.keys(), ["attr"]) 84 self.assertEqual(attrs.get("attr"), "val") 85 self.assertEqual(attrs.get("attr", 25), "val") 86 self.assertEqual(attrs.items(), [("attr", "val")]) 87 self.assertEqual(attrs.values(), ["val"]) 88 self.assertEqual(attrs.getValue("attr"), "val") 89 self.assertEqual(attrs.getValueByQName("attr"), "val") 90 self.assertEqual(attrs.getNameByQName("attr"), "attr") 91 self.assertEqual(attrs["attr"], "val") 92 self.assertEqual(attrs.getQNameByName("attr"), "attr") 93 94 95 def xml_unicode(doc, encoding=None): 96 if encoding is None: 97 return doc 98 return u'<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) 99 100 def xml_bytes(doc, encoding, decl_encoding=Ellipsis): 101 if decl_encoding is Ellipsis: 102 decl_encoding = encoding 103 return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') 104 105 def make_xml_file(doc, encoding, decl_encoding=Ellipsis): 106 if decl_encoding is Ellipsis: 107 decl_encoding = encoding 108 with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: 109 f.write(xml_unicode(doc, decl_encoding)) 110 111 112 class ParseTest(unittest.TestCase): 113 data = support.u(r'<money value="$\xa3\u20ac\U0001017b">' 114 r'$\xa3\u20ac\U0001017b</money>') 115 116 def tearDown(self): 117 support.unlink(TESTFN) 118 119 def check_parse(self, f): 120 from xml.sax import parse 121 result = StringIO() 122 parse(f, XMLGenerator(result, 'utf-8')) 123 self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) 124 125 def test_parse_bytes(self): 126 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 127 # UTF-16 is autodetected 128 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 129 for encoding in encodings: 130 self.check_parse(io.BytesIO(xml_bytes(self.data, encoding))) 131 make_xml_file(self.data, encoding) 132 self.check_parse(TESTFN) 133 with io.open(TESTFN, 'rb') as f: 134 self.check_parse(f) 135 self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None))) 136 make_xml_file(self.data, encoding, None) 137 self.check_parse(TESTFN) 138 with io.open(TESTFN, 'rb') as f: 139 self.check_parse(f) 140 # accept UTF-8 with BOM 141 self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) 142 make_xml_file(self.data, 'utf-8-sig', 'utf-8') 143 self.check_parse(TESTFN) 144 with io.open(TESTFN, 'rb') as f: 145 self.check_parse(f) 146 self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) 147 make_xml_file(self.data, 'utf-8-sig', None) 148 self.check_parse(TESTFN) 149 with io.open(TESTFN, 'rb') as f: 150 self.check_parse(f) 151 # accept data with declared encoding 152 self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1'))) 153 make_xml_file(self.data, 'iso-8859-1') 154 self.check_parse(TESTFN) 155 with io.open(TESTFN, 'rb') as f: 156 self.check_parse(f) 157 # fail on non-UTF-8 incompatible data without declared encoding 158 with self.assertRaises(SAXException): 159 self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) 160 make_xml_file(self.data, 'iso-8859-1', None) 161 with self.assertRaises(SAXException): 162 self.check_parse(TESTFN) 163 with io.open(TESTFN, 'rb') as f: 164 with self.assertRaises(SAXException): 165 self.check_parse(f) 166 167 def test_parse_InputSource(self): 168 # accept data without declared but with explicitly specified encoding 169 make_xml_file(self.data, 'iso-8859-1', None) 170 with io.open(TESTFN, 'rb') as f: 171 input = InputSource() 172 input.setByteStream(f) 173 input.setEncoding('iso-8859-1') 174 self.check_parse(input) 175 176 def check_parseString(self, s): 177 from xml.sax import parseString 178 result = StringIO() 179 parseString(s, XMLGenerator(result, 'utf-8')) 180 self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) 181 182 def test_parseString_bytes(self): 183 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 184 # UTF-16 is autodetected 185 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 186 for encoding in encodings: 187 self.check_parseString(xml_bytes(self.data, encoding)) 188 self.check_parseString(xml_bytes(self.data, encoding, None)) 189 # accept UTF-8 with BOM 190 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) 191 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) 192 # accept data with declared encoding 193 self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) 194 # fail on non-UTF-8 incompatible data without declared encoding 195 with self.assertRaises(SAXException): 196 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) 197 198 199 class MakeParserTest(unittest.TestCase): 200 def test_make_parser2(self): 201 # Creating parsers several times in a row should succeed. 202 # Testing this because there have been failures of this kind 203 # before. 204 from xml.sax import make_parser 205 p = make_parser() 206 from xml.sax import make_parser 207 p = make_parser() 208 from xml.sax import make_parser 209 p = make_parser() 210 from xml.sax import make_parser 211 p = make_parser() 212 from xml.sax import make_parser 213 p = make_parser() 214 from xml.sax import make_parser 215 p = make_parser() 216 217 218 # =========================================================================== 219 # 220 # saxutils tests 221 # 222 # =========================================================================== 223 224 class SaxutilsTest(unittest.TestCase): 225 # ===== escape 226 def test_escape_basic(self): 227 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co") 228 229 def test_escape_all(self): 230 self.assertEqual(escape("<Donald Duck & Co>"), 231 "<Donald Duck & Co>") 232 233 def test_escape_extra(self): 234 self.assertEqual(escape("Hei p deg", {"" : "å"}), 235 "Hei på deg") 236 237 # ===== unescape 238 def test_unescape_basic(self): 239 self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co") 240 241 def test_unescape_all(self): 242 self.assertEqual(unescape("<Donald Duck & Co>"), 243 "<Donald Duck & Co>") 244 245 def test_unescape_extra(self): 246 self.assertEqual(unescape("Hei p deg", {"" : "å"}), 247 "Hei på deg") 248 249 def test_unescape_amp_extra(self): 250 self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;") 251 252 # ===== quoteattr 253 def test_quoteattr_basic(self): 254 self.assertEqual(quoteattr("Donald Duck & Co"), 255 '"Donald Duck & Co"') 256 257 def test_single_quoteattr(self): 258 self.assertEqual(quoteattr('Includes "double" quotes'), 259 '\'Includes "double" quotes\'') 260 261 def test_double_quoteattr(self): 262 self.assertEqual(quoteattr("Includes 'single' quotes"), 263 "\"Includes 'single' quotes\"") 264 265 def test_single_double_quoteattr(self): 266 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), 267 "\"Includes 'single' and "double" quotes\"") 268 269 # ===== make_parser 270 def test_make_parser(self): 271 # Creating a parser should succeed - it should fall back 272 # to the expatreader 273 p = make_parser(['xml.parsers.no_such_parser']) 274 275 276 class PrepareInputSourceTest(unittest.TestCase): 277 278 def setUp(self): 279 self.file = support.TESTFN 280 with open(self.file, "w") as tmp: 281 tmp.write("This was read from a file.") 282 283 def tearDown(self): 284 support.unlink(self.file) 285 286 def make_byte_stream(self): 287 return io.BytesIO(b"This is a byte stream.") 288 289 def checkContent(self, stream, content): 290 self.assertIsNotNone(stream) 291 self.assertEqual(stream.read(), content) 292 stream.close() 293 294 295 def test_byte_stream(self): 296 # If the source is an InputSource that does not have a character 297 # stream but does have a byte stream, use the byte stream. 298 src = InputSource(self.file) 299 src.setByteStream(self.make_byte_stream()) 300 prep = prepare_input_source(src) 301 self.assertIsNone(prep.getCharacterStream()) 302 self.checkContent(prep.getByteStream(), 303 b"This is a byte stream.") 304 305 def test_system_id(self): 306 # If the source is an InputSource that has neither a character 307 # stream nor a byte stream, open the system ID. 308 src = InputSource(self.file) 309 prep = prepare_input_source(src) 310 self.assertIsNone(prep.getCharacterStream()) 311 self.checkContent(prep.getByteStream(), 312 b"This was read from a file.") 313 314 def test_string(self): 315 # If the source is a string, use it as a system ID and open it. 316 prep = prepare_input_source(self.file) 317 self.assertIsNone(prep.getCharacterStream()) 318 self.checkContent(prep.getByteStream(), 319 b"This was read from a file.") 320 321 def test_binary_file(self): 322 # If the source is a binary file-like object, use it as a byte 323 # stream. 324 prep = prepare_input_source(self.make_byte_stream()) 325 self.assertIsNone(prep.getCharacterStream()) 326 self.checkContent(prep.getByteStream(), 327 b"This is a byte stream.") 328 329 330 # ===== XMLGenerator 331 332 start = '<?xml version="1.0" encoding="iso-8859-1"?>\n' 333 334 class XmlgenTest: 335 def test_xmlgen_basic(self): 336 result = self.ioclass() 337 gen = XMLGenerator(result) 338 gen.startDocument() 339 gen.startElement("doc", {}) 340 gen.endElement("doc") 341 gen.endDocument() 342 343 self.assertEqual(result.getvalue(), start + "<doc></doc>") 344 345 def test_xmlgen_content(self): 346 result = self.ioclass() 347 gen = XMLGenerator(result) 348 349 gen.startDocument() 350 gen.startElement("doc", {}) 351 gen.characters("huhei") 352 gen.endElement("doc") 353 gen.endDocument() 354 355 self.assertEqual(result.getvalue(), start + "<doc>huhei</doc>") 356 357 def test_xmlgen_pi(self): 358 result = self.ioclass() 359 gen = XMLGenerator(result) 360 361 gen.startDocument() 362 gen.processingInstruction("test", "data") 363 gen.startElement("doc", {}) 364 gen.endElement("doc") 365 gen.endDocument() 366 367 self.assertEqual(result.getvalue(), start + "<?test data?><doc></doc>") 368 369 def test_xmlgen_content_escape(self): 370 result = self.ioclass() 371 gen = XMLGenerator(result) 372 373 gen.startDocument() 374 gen.startElement("doc", {}) 375 gen.characters("<huhei&") 376 gen.endElement("doc") 377 gen.endDocument() 378 379 self.assertEqual(result.getvalue(), 380 start + "<doc><huhei&</doc>") 381 382 def test_xmlgen_attr_escape(self): 383 result = self.ioclass() 384 gen = XMLGenerator(result) 385 386 gen.startDocument() 387 gen.startElement("doc", {"a": '"'}) 388 gen.startElement("e", {"a": "'"}) 389 gen.endElement("e") 390 gen.startElement("e", {"a": "'\""}) 391 gen.endElement("e") 392 gen.startElement("e", {"a": "\n\r\t"}) 393 gen.endElement("e") 394 gen.endElement("doc") 395 gen.endDocument() 396 397 self.assertEqual(result.getvalue(), start + 398 ("<doc a='\"'><e a=\"'\"></e>" 399 "<e a=\"'"\"></e>" 400 "<e a=\" 	\"></e></doc>")) 401 402 def test_xmlgen_encoding(self): 403 encodings = ('iso-8859-15', 'utf-8', 404 'utf-16be', 'utf-16le', 405 'utf-32be', 'utf-32le') 406 for encoding in encodings: 407 result = self.ioclass() 408 gen = XMLGenerator(result, encoding=encoding) 409 410 gen.startDocument() 411 gen.startElement("doc", {"a": u'\u20ac'}) 412 gen.characters(u"\u20ac") 413 gen.endElement("doc") 414 gen.endDocument() 415 416 self.assertEqual(result.getvalue(), ( 417 u'<?xml version="1.0" encoding="%s"?>\n' 418 u'<doc a="\u20ac">\u20ac</doc>' % encoding 419 ).encode(encoding, 'xmlcharrefreplace')) 420 421 def test_xmlgen_unencodable(self): 422 result = self.ioclass() 423 gen = XMLGenerator(result, encoding='ascii') 424 425 gen.startDocument() 426 gen.startElement("doc", {"a": u'\u20ac'}) 427 gen.characters(u"\u20ac") 428 gen.endElement("doc") 429 gen.endDocument() 430 431 self.assertEqual(result.getvalue(), 432 '<?xml version="1.0" encoding="ascii"?>\n' 433 '<doc a="€">€</doc>') 434 435 def test_xmlgen_ignorable(self): 436 result = self.ioclass() 437 gen = XMLGenerator(result) 438 439 gen.startDocument() 440 gen.startElement("doc", {}) 441 gen.ignorableWhitespace(" ") 442 gen.endElement("doc") 443 gen.endDocument() 444 445 self.assertEqual(result.getvalue(), start + "<doc> </doc>") 446 447 def test_xmlgen_encoding_bytes(self): 448 encodings = ('iso-8859-15', 'utf-8', 449 'utf-16be', 'utf-16le', 450 'utf-32be', 'utf-32le') 451 for encoding in encodings: 452 result = self.ioclass() 453 gen = XMLGenerator(result, encoding=encoding) 454 455 gen.startDocument() 456 gen.startElement("doc", {"a": u'\u20ac'}) 457 gen.characters(u"\u20ac".encode(encoding)) 458 gen.ignorableWhitespace(" ".encode(encoding)) 459 gen.endElement("doc") 460 gen.endDocument() 461 462 self.assertEqual(result.getvalue(), ( 463 u'<?xml version="1.0" encoding="%s"?>\n' 464 u'<doc a="\u20ac">\u20ac </doc>' % encoding 465 ).encode(encoding, 'xmlcharrefreplace')) 466 467 def test_xmlgen_ns(self): 468 result = self.ioclass() 469 gen = XMLGenerator(result) 470 471 gen.startDocument() 472 gen.startPrefixMapping("ns1", ns_uri) 473 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) 474 # add an unqualified name 475 gen.startElementNS((None, "udoc"), None, {}) 476 gen.endElementNS((None, "udoc"), None) 477 gen.endElementNS((ns_uri, "doc"), "ns1:doc") 478 gen.endPrefixMapping("ns1") 479 gen.endDocument() 480 481 self.assertEqual(result.getvalue(), start + \ 482 ('<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' % 483 ns_uri)) 484 485 def test_1463026_1(self): 486 result = self.ioclass() 487 gen = XMLGenerator(result) 488 489 gen.startDocument() 490 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) 491 gen.endElementNS((None, 'a'), 'a') 492 gen.endDocument() 493 494 self.assertEqual(result.getvalue(), start+'<a b="c"></a>') 495 496 def test_1463026_2(self): 497 result = self.ioclass() 498 gen = XMLGenerator(result) 499 500 gen.startDocument() 501 gen.startPrefixMapping(None, 'qux') 502 gen.startElementNS(('qux', 'a'), 'a', {}) 503 gen.endElementNS(('qux', 'a'), 'a') 504 gen.endPrefixMapping(None) 505 gen.endDocument() 506 507 self.assertEqual(result.getvalue(), start+'<a xmlns="qux"></a>') 508 509 def test_1463026_3(self): 510 result = self.ioclass() 511 gen = XMLGenerator(result) 512 513 gen.startDocument() 514 gen.startPrefixMapping('my', 'qux') 515 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) 516 gen.endElementNS(('qux', 'a'), 'a') 517 gen.endPrefixMapping('my') 518 gen.endDocument() 519 520 self.assertEqual(result.getvalue(), 521 start+'<my:a xmlns:my="qux" b="c"></my:a>') 522 523 def test_5027_1(self): 524 # The xml prefix (as in xml:lang below) is reserved and bound by 525 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 526 # a bug whereby a KeyError is raised because this namespace is missing 527 # from a dictionary. 528 # 529 # This test demonstrates the bug by parsing a document. 530 test_xml = StringIO( 531 '<?xml version="1.0"?>' 532 '<a:g1 xmlns:a="http://example.com/ns">' 533 '<a:g2 xml:lang="en">Hello</a:g2>' 534 '</a:g1>') 535 536 parser = make_parser() 537 parser.setFeature(feature_namespaces, True) 538 result = self.ioclass() 539 gen = XMLGenerator(result) 540 parser.setContentHandler(gen) 541 parser.parse(test_xml) 542 543 self.assertEqual(result.getvalue(), 544 start + ( 545 '<a:g1 xmlns:a="http://example.com/ns">' 546 '<a:g2 xml:lang="en">Hello</a:g2>' 547 '</a:g1>')) 548 549 def test_5027_2(self): 550 # The xml prefix (as in xml:lang below) is reserved and bound by 551 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 552 # a bug whereby a KeyError is raised because this namespace is missing 553 # from a dictionary. 554 # 555 # This test demonstrates the bug by direct manipulation of the 556 # XMLGenerator. 557 result = self.ioclass() 558 gen = XMLGenerator(result) 559 560 gen.startDocument() 561 gen.startPrefixMapping('a', 'http://example.com/ns') 562 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {}) 563 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'} 564 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr) 565 gen.characters('Hello') 566 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2') 567 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1') 568 gen.endPrefixMapping('a') 569 gen.endDocument() 570 571 self.assertEqual(result.getvalue(), 572 start + ( 573 '<a:g1 xmlns:a="http://example.com/ns">' 574 '<a:g2 xml:lang="en">Hello</a:g2>' 575 '</a:g1>')) 576 577 def test_no_close_file(self): 578 result = self.ioclass() 579 def func(out): 580 gen = XMLGenerator(out) 581 gen.startDocument() 582 gen.startElement("doc", {}) 583 func(result) 584 self.assertFalse(result.closed) 585 586 def test_xmlgen_fragment(self): 587 result = self.ioclass() 588 gen = XMLGenerator(result) 589 590 # Don't call gen.startDocument() 591 gen.startElement("foo", {"a": "1.0"}) 592 gen.characters("Hello") 593 gen.endElement("foo") 594 gen.startElement("bar", {"b": "2.0"}) 595 gen.endElement("bar") 596 # Don't call gen.endDocument() 597 598 self.assertEqual(result.getvalue(), 599 '<foo a="1.0">Hello</foo><bar b="2.0"></bar>') 600 601 class StringXmlgenTest(XmlgenTest, unittest.TestCase): 602 ioclass = StringIO 603 604 class BytesIOXmlgenTest(XmlgenTest, unittest.TestCase): 605 ioclass = io.BytesIO 606 607 class WriterXmlgenTest(XmlgenTest, unittest.TestCase): 608 class ioclass(list): 609 write = list.append 610 closed = False 611 612 def getvalue(self): 613 return b''.join(self) 614 615 616 class XMLFilterBaseTest(unittest.TestCase): 617 def test_filter_basic(self): 618 result = StringIO() 619 gen = XMLGenerator(result) 620 filter = XMLFilterBase() 621 filter.setContentHandler(gen) 622 623 filter.startDocument() 624 filter.startElement("doc", {}) 625 filter.characters("content") 626 filter.ignorableWhitespace(" ") 627 filter.endElement("doc") 628 filter.endDocument() 629 630 self.assertEqual(result.getvalue(), start + "<doc>content </doc>") 631 632 # =========================================================================== 633 # 634 # expatreader tests 635 # 636 # =========================================================================== 637 638 xml_test_out = open(TEST_XMLFILE_OUT).read() 639 640 class ExpatReaderTest(XmlTestBase): 641 642 # ===== XMLReader support 643 644 def test_expat_binary_file(self): 645 parser = create_parser() 646 result = StringIO() 647 xmlgen = XMLGenerator(result) 648 649 parser.setContentHandler(xmlgen) 650 parser.parse(open(TEST_XMLFILE)) 651 652 self.assertEqual(result.getvalue(), xml_test_out) 653 654 @requires_unicode_filenames 655 def test_expat_file_unicode(self): 656 fname = support.TESTFN_UNICODE 657 shutil.copyfile(TEST_XMLFILE, fname) 658 self.addCleanup(support.unlink, fname) 659 660 parser = create_parser() 661 result = StringIO() 662 xmlgen = XMLGenerator(result) 663 664 parser.setContentHandler(xmlgen) 665 parser.parse(open(fname)) 666 667 self.assertEqual(result.getvalue(), xml_test_out) 668 669 # ===== DTDHandler support 670 671 class TestDTDHandler: 672 673 def __init__(self): 674 self._notations = [] 675 self._entities = [] 676 677 def notationDecl(self, name, publicId, systemId): 678 self._notations.append((name, publicId, systemId)) 679 680 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 681 self._entities.append((name, publicId, systemId, ndata)) 682 683 def test_expat_dtdhandler(self): 684 parser = create_parser() 685 handler = self.TestDTDHandler() 686 parser.setDTDHandler(handler) 687 688 parser.feed('<!DOCTYPE doc [\n') 689 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n') 690 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n') 691 parser.feed(']>\n') 692 parser.feed('<doc></doc>') 693 parser.close() 694 695 self.assertEqual(handler._notations, 696 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) 697 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) 698 699 # ===== EntityResolver support 700 701 class TestEntityResolver: 702 703 def resolveEntity(self, publicId, systemId): 704 inpsrc = InputSource() 705 inpsrc.setByteStream(StringIO("<entity/>")) 706 return inpsrc 707 708 def test_expat_entityresolver(self): 709 parser = create_parser() 710 parser.setEntityResolver(self.TestEntityResolver()) 711 result = StringIO() 712 parser.setContentHandler(XMLGenerator(result)) 713 714 parser.feed('<!DOCTYPE doc [\n') 715 parser.feed(' <!ENTITY test SYSTEM "whatever">\n') 716 parser.feed(']>\n') 717 parser.feed('<doc>&test;</doc>') 718 parser.close() 719 720 self.assertEqual(result.getvalue(), start + 721 "<doc><entity></entity></doc>") 722 723 # ===== Attributes support 724 725 class AttrGatherer(ContentHandler): 726 727 def startElement(self, name, attrs): 728 self._attrs = attrs 729 730 def startElementNS(self, name, qname, attrs): 731 self._attrs = attrs 732 733 def test_expat_attrs_empty(self): 734 parser = create_parser() 735 gather = self.AttrGatherer() 736 parser.setContentHandler(gather) 737 738 parser.feed("<doc/>") 739 parser.close() 740 741 self.verify_empty_attrs(gather._attrs) 742 743 def test_expat_attrs_wattr(self): 744 parser = create_parser() 745 gather = self.AttrGatherer() 746 parser.setContentHandler(gather) 747 748 parser.feed("<doc attr='val'/>") 749 parser.close() 750 751 self.verify_attrs_wattr(gather._attrs) 752 753 def test_expat_nsattrs_empty(self): 754 parser = create_parser(1) 755 gather = self.AttrGatherer() 756 parser.setContentHandler(gather) 757 758 parser.feed("<doc/>") 759 parser.close() 760 761 self.verify_empty_nsattrs(gather._attrs) 762 763 def test_expat_nsattrs_wattr(self): 764 parser = create_parser(1) 765 gather = self.AttrGatherer() 766 parser.setContentHandler(gather) 767 768 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri) 769 parser.close() 770 771 attrs = gather._attrs 772 773 self.assertEqual(attrs.getLength(), 1) 774 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 775 self.assertTrue((attrs.getQNames() == [] or 776 attrs.getQNames() == ["ns:attr"])) 777 self.assertEqual(len(attrs), 1) 778 self.assertTrue(attrs.has_key((ns_uri, "attr"))) 779 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 780 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 781 self.assertEqual(attrs.items(), [((ns_uri, "attr"), "val")]) 782 self.assertEqual(attrs.values(), ["val"]) 783 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 784 self.assertEqual(attrs[(ns_uri, "attr")], "val") 785 786 # ===== InputSource support 787 788 def test_expat_inpsource_filename(self): 789 parser = create_parser() 790 result = StringIO() 791 xmlgen = XMLGenerator(result) 792 793 parser.setContentHandler(xmlgen) 794 parser.parse(TEST_XMLFILE) 795 796 self.assertEqual(result.getvalue(), xml_test_out) 797 798 def test_expat_inpsource_sysid(self): 799 parser = create_parser() 800 result = StringIO() 801 xmlgen = XMLGenerator(result) 802 803 parser.setContentHandler(xmlgen) 804 parser.parse(InputSource(TEST_XMLFILE)) 805 806 self.assertEqual(result.getvalue(), xml_test_out) 807 808 @requires_unicode_filenames 809 def test_expat_inpsource_sysid_unicode(self): 810 fname = support.TESTFN_UNICODE 811 shutil.copyfile(TEST_XMLFILE, fname) 812 self.addCleanup(support.unlink, fname) 813 814 parser = create_parser() 815 result = StringIO() 816 xmlgen = XMLGenerator(result) 817 818 parser.setContentHandler(xmlgen) 819 parser.parse(InputSource(fname)) 820 821 self.assertEqual(result.getvalue(), xml_test_out) 822 823 def test_expat_inpsource_byte_stream(self): 824 parser = create_parser() 825 result = StringIO() 826 xmlgen = XMLGenerator(result) 827 828 parser.setContentHandler(xmlgen) 829 inpsrc = InputSource() 830 inpsrc.setByteStream(open(TEST_XMLFILE)) 831 parser.parse(inpsrc) 832 833 self.assertEqual(result.getvalue(), xml_test_out) 834 835 # ===== IncrementalParser support 836 837 def test_expat_incremental(self): 838 result = StringIO() 839 xmlgen = XMLGenerator(result) 840 parser = create_parser() 841 parser.setContentHandler(xmlgen) 842 843 parser.feed("<doc>") 844 parser.feed("</doc>") 845 parser.close() 846 847 self.assertEqual(result.getvalue(), start + "<doc></doc>") 848 849 def test_expat_incremental_reset(self): 850 result = StringIO() 851 xmlgen = XMLGenerator(result) 852 parser = create_parser() 853 parser.setContentHandler(xmlgen) 854 855 parser.feed("<doc>") 856 parser.feed("text") 857 858 result = StringIO() 859 xmlgen = XMLGenerator(result) 860 parser.setContentHandler(xmlgen) 861 parser.reset() 862 863 parser.feed("<doc>") 864 parser.feed("text") 865 parser.feed("</doc>") 866 parser.close() 867 868 self.assertEqual(result.getvalue(), start + "<doc>text</doc>") 869 870 # ===== Locator support 871 872 def test_expat_locator_noinfo(self): 873 result = StringIO() 874 xmlgen = XMLGenerator(result) 875 parser = create_parser() 876 parser.setContentHandler(xmlgen) 877 878 parser.feed("<doc>") 879 parser.feed("</doc>") 880 parser.close() 881 882 self.assertEqual(parser.getSystemId(), None) 883 self.assertEqual(parser.getPublicId(), None) 884 self.assertEqual(parser.getLineNumber(), 1) 885 886 def test_expat_locator_withinfo(self): 887 result = StringIO() 888 xmlgen = XMLGenerator(result) 889 parser = create_parser() 890 parser.setContentHandler(xmlgen) 891 parser.parse(TEST_XMLFILE) 892 893 self.assertEqual(parser.getSystemId(), TEST_XMLFILE) 894 self.assertEqual(parser.getPublicId(), None) 895 896 @requires_unicode_filenames 897 def test_expat_locator_withinfo_unicode(self): 898 fname = support.TESTFN_UNICODE 899 shutil.copyfile(TEST_XMLFILE, fname) 900 self.addCleanup(support.unlink, fname) 901 902 result = StringIO() 903 xmlgen = XMLGenerator(result) 904 parser = create_parser() 905 parser.setContentHandler(xmlgen) 906 parser.parse(fname) 907 908 self.assertEqual(parser.getSystemId(), fname) 909 self.assertEqual(parser.getPublicId(), None) 910 911 912 # =========================================================================== 913 # 914 # error reporting 915 # 916 # =========================================================================== 917 918 class ErrorReportingTest(unittest.TestCase): 919 def test_expat_inpsource_location(self): 920 parser = create_parser() 921 parser.setContentHandler(ContentHandler()) # do nothing 922 source = InputSource() 923 source.setByteStream(StringIO("<foo bar foobar>")) #ill-formed 924 name = "a file name" 925 source.setSystemId(name) 926 try: 927 parser.parse(source) 928 self.fail() 929 except SAXException, e: 930 self.assertEqual(e.getSystemId(), name) 931 932 def test_expat_incomplete(self): 933 parser = create_parser() 934 parser.setContentHandler(ContentHandler()) # do nothing 935 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>")) 936 self.assertEqual(parser.getColumnNumber(), 5) 937 self.assertEqual(parser.getLineNumber(), 1) 938 939 def test_sax_parse_exception_str(self): 940 # pass various values from a locator to the SAXParseException to 941 # make sure that the __str__() doesn't fall apart when None is 942 # passed instead of an integer line and column number 943 # 944 # use "normal" values for the locator: 945 str(SAXParseException("message", None, 946 self.DummyLocator(1, 1))) 947 # use None for the line number: 948 str(SAXParseException("message", None, 949 self.DummyLocator(None, 1))) 950 # use None for the column number: 951 str(SAXParseException("message", None, 952 self.DummyLocator(1, None))) 953 # use None for both: 954 str(SAXParseException("message", None, 955 self.DummyLocator(None, None))) 956 957 class DummyLocator: 958 def __init__(self, lineno, colno): 959 self._lineno = lineno 960 self._colno = colno 961 962 def getPublicId(self): 963 return "pubid" 964 965 def getSystemId(self): 966 return "sysid" 967 968 def getLineNumber(self): 969 return self._lineno 970 971 def getColumnNumber(self): 972 return self._colno 973 974 # =========================================================================== 975 # 976 # xmlreader tests 977 # 978 # =========================================================================== 979 980 class XmlReaderTest(XmlTestBase): 981 982 # ===== AttributesImpl 983 def test_attrs_empty(self): 984 self.verify_empty_attrs(AttributesImpl({})) 985 986 def test_attrs_wattr(self): 987 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"})) 988 989 def test_nsattrs_empty(self): 990 self.verify_empty_nsattrs(AttributesNSImpl({}, {})) 991 992 def test_nsattrs_wattr(self): 993 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"}, 994 {(ns_uri, "attr") : "ns:attr"}) 995 996 self.assertEqual(attrs.getLength(), 1) 997 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 998 self.assertEqual(attrs.getQNames(), ["ns:attr"]) 999 self.assertEqual(len(attrs), 1) 1000 self.assertTrue(attrs.has_key((ns_uri, "attr"))) 1001 self.assertEqual(attrs.keys(), [(ns_uri, "attr")]) 1002 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 1003 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 1004 self.assertEqual(attrs.items(), [((ns_uri, "attr"), "val")]) 1005 self.assertEqual(attrs.values(), ["val"]) 1006 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 1007 self.assertEqual(attrs.getValueByQName("ns:attr"), "val") 1008 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr")) 1009 self.assertEqual(attrs[(ns_uri, "attr")], "val") 1010 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") 1011 1012 1013 # During the development of Python 2.5, an attempt to move the "xml" 1014 # package implementation to a new package ("xmlcore") proved painful. 1015 # The goal of this change was to allow applications to be able to 1016 # obtain and rely on behavior in the standard library implementation 1017 # of the XML support without needing to be concerned about the 1018 # availability of the PyXML implementation. 1019 # 1020 # While the existing import hackery in Lib/xml/__init__.py can cause 1021 # PyXML's _xmlpus package to supplant the "xml" package, that only 1022 # works because either implementation uses the "xml" package name for 1023 # imports. 1024 # 1025 # The move resulted in a number of problems related to the fact that 1026 # the import machinery's "package context" is based on the name that's 1027 # being imported rather than the __name__ of the actual package 1028 # containment; it wasn't possible for the "xml" package to be replaced 1029 # by a simple module that indirected imports to the "xmlcore" package. 1030 # 1031 # The following two tests exercised bugs that were introduced in that 1032 # attempt. Keeping these tests around will help detect problems with 1033 # other attempts to provide reliable access to the standard library's 1034 # implementation of the XML support. 1035 1036 def test_sf_1511497(self): 1037 # Bug report: http://www.python.org/sf/1511497 1038 import sys 1039 old_modules = sys.modules.copy() 1040 for modname in sys.modules.keys(): 1041 if modname.startswith("xml."): 1042 del sys.modules[modname] 1043 try: 1044 import xml.sax.expatreader 1045 module = xml.sax.expatreader 1046 self.assertEqual(module.__name__, "xml.sax.expatreader") 1047 finally: 1048 sys.modules.update(old_modules) 1049 1050 def test_sf_1513611(self): 1051 # Bug report: http://www.python.org/sf/1513611 1052 sio = StringIO("invalid") 1053 parser = make_parser() 1054 from xml.sax import SAXParseException 1055 self.assertRaises(SAXParseException, parser.parse, sio) 1056 1057 1058 def test_main(): 1059 run_unittest(MakeParserTest, 1060 ParseTest, 1061 SaxutilsTest, 1062 PrepareInputSourceTest, 1063 StringXmlgenTest, 1064 BytesIOXmlgenTest, 1065 WriterXmlgenTest, 1066 ExpatReaderTest, 1067 ErrorReportingTest, 1068 XmlReaderTest) 1069 1070 if __name__ == "__main__": 1071 test_main() 1072