1 # regression test for SAX 2.0 2 # $Id$ 3 4 from xml.sax import make_parser, ContentHandler, \ 5 SAXException, SAXReaderNotAvailable, SAXParseException 6 import unittest 7 try: 8 make_parser() 9 except SAXReaderNotAvailable: 10 # don't try to test this module if we cannot create a parser 11 raise unittest.SkipTest("no XML parsers available") 12 from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ 13 XMLFilterBase, prepare_input_source 14 from xml.sax.expatreader import create_parser 15 from xml.sax.handler import feature_namespaces 16 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl 17 from io import BytesIO, StringIO 18 import codecs 19 import gc 20 import os.path 21 import shutil 22 from test import support 23 from test.support import findfile, run_unittest, TESTFN 24 25 TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") 26 TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") 27 try: 28 TEST_XMLFILE.encode("utf-8") 29 TEST_XMLFILE_OUT.encode("utf-8") 30 except UnicodeEncodeError: 31 raise unittest.SkipTest("filename is not encodable to utf8") 32 33 supports_nonascii_filenames = True 34 if not os.path.supports_unicode_filenames: 35 try: 36 support.TESTFN_UNICODE.encode(support.TESTFN_ENCODING) 37 except (UnicodeError, TypeError): 38 # Either the file system encoding is None, or the file name 39 # cannot be encoded in the file system encoding. 40 supports_nonascii_filenames = False 41 requires_nonascii_filenames = unittest.skipUnless( 42 supports_nonascii_filenames, 43 'Requires non-ascii filenames support') 44 45 ns_uri = "http://www.python.org/xml-ns/saxtest/" 46 47 class XmlTestBase(unittest.TestCase): 48 def verify_empty_attrs(self, attrs): 49 self.assertRaises(KeyError, attrs.getValue, "attr") 50 self.assertRaises(KeyError, attrs.getValueByQName, "attr") 51 self.assertRaises(KeyError, attrs.getNameByQName, "attr") 52 self.assertRaises(KeyError, attrs.getQNameByName, "attr") 53 self.assertRaises(KeyError, attrs.__getitem__, "attr") 54 self.assertEqual(attrs.getLength(), 0) 55 self.assertEqual(attrs.getNames(), []) 56 self.assertEqual(attrs.getQNames(), []) 57 self.assertEqual(len(attrs), 0) 58 self.assertNotIn("attr", attrs) 59 self.assertEqual(list(attrs.keys()), []) 60 self.assertEqual(attrs.get("attrs"), None) 61 self.assertEqual(attrs.get("attrs", 25), 25) 62 self.assertEqual(list(attrs.items()), []) 63 self.assertEqual(list(attrs.values()), []) 64 65 def verify_empty_nsattrs(self, attrs): 66 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr")) 67 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr") 68 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr") 69 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr")) 70 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr")) 71 self.assertEqual(attrs.getLength(), 0) 72 self.assertEqual(attrs.getNames(), []) 73 self.assertEqual(attrs.getQNames(), []) 74 self.assertEqual(len(attrs), 0) 75 self.assertNotIn((ns_uri, "attr"), attrs) 76 self.assertEqual(list(attrs.keys()), []) 77 self.assertEqual(attrs.get((ns_uri, "attr")), None) 78 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25) 79 self.assertEqual(list(attrs.items()), []) 80 self.assertEqual(list(attrs.values()), []) 81 82 def verify_attrs_wattr(self, attrs): 83 self.assertEqual(attrs.getLength(), 1) 84 self.assertEqual(attrs.getNames(), ["attr"]) 85 self.assertEqual(attrs.getQNames(), ["attr"]) 86 self.assertEqual(len(attrs), 1) 87 self.assertIn("attr", attrs) 88 self.assertEqual(list(attrs.keys()), ["attr"]) 89 self.assertEqual(attrs.get("attr"), "val") 90 self.assertEqual(attrs.get("attr", 25), "val") 91 self.assertEqual(list(attrs.items()), [("attr", "val")]) 92 self.assertEqual(list(attrs.values()), ["val"]) 93 self.assertEqual(attrs.getValue("attr"), "val") 94 self.assertEqual(attrs.getValueByQName("attr"), "val") 95 self.assertEqual(attrs.getNameByQName("attr"), "attr") 96 self.assertEqual(attrs["attr"], "val") 97 self.assertEqual(attrs.getQNameByName("attr"), "attr") 98 99 100 def xml_str(doc, encoding=None): 101 if encoding is None: 102 return doc 103 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) 104 105 def xml_bytes(doc, encoding, decl_encoding=...): 106 if decl_encoding is ...: 107 decl_encoding = encoding 108 return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') 109 110 def make_xml_file(doc, encoding, decl_encoding=...): 111 if decl_encoding is ...: 112 decl_encoding = encoding 113 with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: 114 f.write(xml_str(doc, decl_encoding)) 115 116 117 class ParseTest(unittest.TestCase): 118 data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>' 119 120 def tearDown(self): 121 support.unlink(TESTFN) 122 123 def check_parse(self, f): 124 from xml.sax import parse 125 result = StringIO() 126 parse(f, XMLGenerator(result, 'utf-8')) 127 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) 128 129 def test_parse_text(self): 130 encodings = ('us-ascii', 'iso-8859-1', 'utf-8', 131 'utf-16', 'utf-16le', 'utf-16be') 132 for encoding in encodings: 133 self.check_parse(StringIO(xml_str(self.data, encoding))) 134 make_xml_file(self.data, encoding) 135 with open(TESTFN, 'r', encoding=encoding) as f: 136 self.check_parse(f) 137 self.check_parse(StringIO(self.data)) 138 make_xml_file(self.data, encoding, None) 139 with open(TESTFN, 'r', encoding=encoding) as f: 140 self.check_parse(f) 141 142 def test_parse_bytes(self): 143 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 144 # UTF-16 is autodetected 145 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 146 for encoding in encodings: 147 self.check_parse(BytesIO(xml_bytes(self.data, encoding))) 148 make_xml_file(self.data, encoding) 149 self.check_parse(TESTFN) 150 with open(TESTFN, 'rb') as f: 151 self.check_parse(f) 152 self.check_parse(BytesIO(xml_bytes(self.data, encoding, None))) 153 make_xml_file(self.data, encoding, None) 154 self.check_parse(TESTFN) 155 with open(TESTFN, 'rb') as f: 156 self.check_parse(f) 157 # accept UTF-8 with BOM 158 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) 159 make_xml_file(self.data, 'utf-8-sig', 'utf-8') 160 self.check_parse(TESTFN) 161 with open(TESTFN, 'rb') as f: 162 self.check_parse(f) 163 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) 164 make_xml_file(self.data, 'utf-8-sig', None) 165 self.check_parse(TESTFN) 166 with open(TESTFN, 'rb') as f: 167 self.check_parse(f) 168 # accept data with declared encoding 169 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1'))) 170 make_xml_file(self.data, 'iso-8859-1') 171 self.check_parse(TESTFN) 172 with open(TESTFN, 'rb') as f: 173 self.check_parse(f) 174 # fail on non-UTF-8 incompatible data without declared encoding 175 with self.assertRaises(SAXException): 176 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) 177 make_xml_file(self.data, 'iso-8859-1', None) 178 with support.check_warnings(('unclosed file', ResourceWarning)): 179 # XXX Failed parser leaks an opened file. 180 with self.assertRaises(SAXException): 181 self.check_parse(TESTFN) 182 # Collect leaked file. 183 gc.collect() 184 with open(TESTFN, 'rb') as f: 185 with self.assertRaises(SAXException): 186 self.check_parse(f) 187 188 def test_parse_InputSource(self): 189 # accept data without declared but with explicitly specified encoding 190 make_xml_file(self.data, 'iso-8859-1', None) 191 with open(TESTFN, 'rb') as f: 192 input = InputSource() 193 input.setByteStream(f) 194 input.setEncoding('iso-8859-1') 195 self.check_parse(input) 196 197 def check_parseString(self, s): 198 from xml.sax import parseString 199 result = StringIO() 200 parseString(s, XMLGenerator(result, 'utf-8')) 201 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) 202 203 def test_parseString_text(self): 204 encodings = ('us-ascii', 'iso-8859-1', 'utf-8', 205 'utf-16', 'utf-16le', 'utf-16be') 206 for encoding in encodings: 207 self.check_parseString(xml_str(self.data, encoding)) 208 self.check_parseString(self.data) 209 210 def test_parseString_bytes(self): 211 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 212 # UTF-16 is autodetected 213 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 214 for encoding in encodings: 215 self.check_parseString(xml_bytes(self.data, encoding)) 216 self.check_parseString(xml_bytes(self.data, encoding, None)) 217 # accept UTF-8 with BOM 218 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) 219 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) 220 # accept data with declared encoding 221 self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) 222 # fail on non-UTF-8 incompatible data without declared encoding 223 with self.assertRaises(SAXException): 224 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) 225 226 class MakeParserTest(unittest.TestCase): 227 def test_make_parser2(self): 228 # Creating parsers several times in a row should succeed. 229 # Testing this because there have been failures of this kind 230 # before. 231 from xml.sax import make_parser 232 p = make_parser() 233 from xml.sax import make_parser 234 p = make_parser() 235 from xml.sax import make_parser 236 p = make_parser() 237 from xml.sax import make_parser 238 p = make_parser() 239 from xml.sax import make_parser 240 p = make_parser() 241 from xml.sax import make_parser 242 p = make_parser() 243 244 245 # =========================================================================== 246 # 247 # saxutils tests 248 # 249 # =========================================================================== 250 251 class SaxutilsTest(unittest.TestCase): 252 # ===== escape 253 def test_escape_basic(self): 254 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co") 255 256 def test_escape_all(self): 257 self.assertEqual(escape("<Donald Duck & Co>"), 258 "<Donald Duck & Co>") 259 260 def test_escape_extra(self): 261 self.assertEqual(escape("Hei p deg", {"" : "å"}), 262 "Hei på deg") 263 264 # ===== unescape 265 def test_unescape_basic(self): 266 self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co") 267 268 def test_unescape_all(self): 269 self.assertEqual(unescape("<Donald Duck & Co>"), 270 "<Donald Duck & Co>") 271 272 def test_unescape_extra(self): 273 self.assertEqual(unescape("Hei p deg", {"" : "å"}), 274 "Hei på deg") 275 276 def test_unescape_amp_extra(self): 277 self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;") 278 279 # ===== quoteattr 280 def test_quoteattr_basic(self): 281 self.assertEqual(quoteattr("Donald Duck & Co"), 282 '"Donald Duck & Co"') 283 284 def test_single_quoteattr(self): 285 self.assertEqual(quoteattr('Includes "double" quotes'), 286 '\'Includes "double" quotes\'') 287 288 def test_double_quoteattr(self): 289 self.assertEqual(quoteattr("Includes 'single' quotes"), 290 "\"Includes 'single' quotes\"") 291 292 def test_single_double_quoteattr(self): 293 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), 294 "\"Includes 'single' and "double" quotes\"") 295 296 # ===== make_parser 297 def test_make_parser(self): 298 # Creating a parser should succeed - it should fall back 299 # to the expatreader 300 p = make_parser(['xml.parsers.no_such_parser']) 301 302 303 class PrepareInputSourceTest(unittest.TestCase): 304 305 def setUp(self): 306 self.file = support.TESTFN 307 with open(self.file, "w") as tmp: 308 tmp.write("This was read from a file.") 309 310 def tearDown(self): 311 support.unlink(self.file) 312 313 def make_byte_stream(self): 314 return BytesIO(b"This is a byte stream.") 315 316 def make_character_stream(self): 317 return StringIO("This is a character stream.") 318 319 def checkContent(self, stream, content): 320 self.assertIsNotNone(stream) 321 self.assertEqual(stream.read(), content) 322 stream.close() 323 324 325 def test_character_stream(self): 326 # If the source is an InputSource with a character stream, use it. 327 src = InputSource(self.file) 328 src.setCharacterStream(self.make_character_stream()) 329 prep = prepare_input_source(src) 330 self.assertIsNone(prep.getByteStream()) 331 self.checkContent(prep.getCharacterStream(), 332 "This is a character stream.") 333 334 def test_byte_stream(self): 335 # If the source is an InputSource that does not have a character 336 # stream but does have a byte stream, use the byte stream. 337 src = InputSource(self.file) 338 src.setByteStream(self.make_byte_stream()) 339 prep = prepare_input_source(src) 340 self.assertIsNone(prep.getCharacterStream()) 341 self.checkContent(prep.getByteStream(), 342 b"This is a byte stream.") 343 344 def test_system_id(self): 345 # If the source is an InputSource that has neither a character 346 # stream nor a byte stream, open the system ID. 347 src = InputSource(self.file) 348 prep = prepare_input_source(src) 349 self.assertIsNone(prep.getCharacterStream()) 350 self.checkContent(prep.getByteStream(), 351 b"This was read from a file.") 352 353 def test_string(self): 354 # If the source is a string, use it as a system ID and open it. 355 prep = prepare_input_source(self.file) 356 self.assertIsNone(prep.getCharacterStream()) 357 self.checkContent(prep.getByteStream(), 358 b"This was read from a file.") 359 360 def test_binary_file(self): 361 # If the source is a binary file-like object, use it as a byte 362 # stream. 363 prep = prepare_input_source(self.make_byte_stream()) 364 self.assertIsNone(prep.getCharacterStream()) 365 self.checkContent(prep.getByteStream(), 366 b"This is a byte stream.") 367 368 def test_text_file(self): 369 # If the source is a text file-like object, use it as a character 370 # stream. 371 prep = prepare_input_source(self.make_character_stream()) 372 self.assertIsNone(prep.getByteStream()) 373 self.checkContent(prep.getCharacterStream(), 374 "This is a character stream.") 375 376 377 # ===== XMLGenerator 378 379 class XmlgenTest: 380 def test_xmlgen_basic(self): 381 result = self.ioclass() 382 gen = XMLGenerator(result) 383 gen.startDocument() 384 gen.startElement("doc", {}) 385 gen.endElement("doc") 386 gen.endDocument() 387 388 self.assertEqual(result.getvalue(), self.xml("<doc></doc>")) 389 390 def test_xmlgen_basic_empty(self): 391 result = self.ioclass() 392 gen = XMLGenerator(result, short_empty_elements=True) 393 gen.startDocument() 394 gen.startElement("doc", {}) 395 gen.endElement("doc") 396 gen.endDocument() 397 398 self.assertEqual(result.getvalue(), self.xml("<doc/>")) 399 400 def test_xmlgen_content(self): 401 result = self.ioclass() 402 gen = XMLGenerator(result) 403 404 gen.startDocument() 405 gen.startElement("doc", {}) 406 gen.characters("huhei") 407 gen.endElement("doc") 408 gen.endDocument() 409 410 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>")) 411 412 def test_xmlgen_content_empty(self): 413 result = self.ioclass() 414 gen = XMLGenerator(result, short_empty_elements=True) 415 416 gen.startDocument() 417 gen.startElement("doc", {}) 418 gen.characters("huhei") 419 gen.endElement("doc") 420 gen.endDocument() 421 422 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>")) 423 424 def test_xmlgen_pi(self): 425 result = self.ioclass() 426 gen = XMLGenerator(result) 427 428 gen.startDocument() 429 gen.processingInstruction("test", "data") 430 gen.startElement("doc", {}) 431 gen.endElement("doc") 432 gen.endDocument() 433 434 self.assertEqual(result.getvalue(), 435 self.xml("<?test data?><doc></doc>")) 436 437 def test_xmlgen_content_escape(self): 438 result = self.ioclass() 439 gen = XMLGenerator(result) 440 441 gen.startDocument() 442 gen.startElement("doc", {}) 443 gen.characters("<huhei&") 444 gen.endElement("doc") 445 gen.endDocument() 446 447 self.assertEqual(result.getvalue(), 448 self.xml("<doc><huhei&</doc>")) 449 450 def test_xmlgen_attr_escape(self): 451 result = self.ioclass() 452 gen = XMLGenerator(result) 453 454 gen.startDocument() 455 gen.startElement("doc", {"a": '"'}) 456 gen.startElement("e", {"a": "'"}) 457 gen.endElement("e") 458 gen.startElement("e", {"a": "'\""}) 459 gen.endElement("e") 460 gen.startElement("e", {"a": "\n\r\t"}) 461 gen.endElement("e") 462 gen.endElement("doc") 463 gen.endDocument() 464 465 self.assertEqual(result.getvalue(), self.xml( 466 "<doc a='\"'><e a=\"'\"></e>" 467 "<e a=\"'"\"></e>" 468 "<e a=\" 	\"></e></doc>")) 469 470 def test_xmlgen_encoding(self): 471 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', 472 'utf-16', 'utf-16be', 'utf-16le', 473 'utf-32', 'utf-32be', 'utf-32le') 474 for encoding in encodings: 475 result = self.ioclass() 476 gen = XMLGenerator(result, encoding=encoding) 477 478 gen.startDocument() 479 gen.startElement("doc", {"a": '\u20ac'}) 480 gen.characters("\u20ac") 481 gen.endElement("doc") 482 gen.endDocument() 483 484 self.assertEqual(result.getvalue(), 485 self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding)) 486 487 def test_xmlgen_unencodable(self): 488 result = self.ioclass() 489 gen = XMLGenerator(result, encoding='ascii') 490 491 gen.startDocument() 492 gen.startElement("doc", {"a": '\u20ac'}) 493 gen.characters("\u20ac") 494 gen.endElement("doc") 495 gen.endDocument() 496 497 self.assertEqual(result.getvalue(), 498 self.xml('<doc a="€">€</doc>', encoding='ascii')) 499 500 def test_xmlgen_ignorable(self): 501 result = self.ioclass() 502 gen = XMLGenerator(result) 503 504 gen.startDocument() 505 gen.startElement("doc", {}) 506 gen.ignorableWhitespace(" ") 507 gen.endElement("doc") 508 gen.endDocument() 509 510 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>")) 511 512 def test_xmlgen_ignorable_empty(self): 513 result = self.ioclass() 514 gen = XMLGenerator(result, short_empty_elements=True) 515 516 gen.startDocument() 517 gen.startElement("doc", {}) 518 gen.ignorableWhitespace(" ") 519 gen.endElement("doc") 520 gen.endDocument() 521 522 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>")) 523 524 def test_xmlgen_encoding_bytes(self): 525 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', 526 'utf-16', 'utf-16be', 'utf-16le', 527 'utf-32', 'utf-32be', 'utf-32le') 528 for encoding in encodings: 529 result = self.ioclass() 530 gen = XMLGenerator(result, encoding=encoding) 531 532 gen.startDocument() 533 gen.startElement("doc", {"a": '\u20ac'}) 534 gen.characters("\u20ac".encode(encoding)) 535 gen.ignorableWhitespace(" ".encode(encoding)) 536 gen.endElement("doc") 537 gen.endDocument() 538 539 self.assertEqual(result.getvalue(), 540 self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding)) 541 542 def test_xmlgen_ns(self): 543 result = self.ioclass() 544 gen = XMLGenerator(result) 545 546 gen.startDocument() 547 gen.startPrefixMapping("ns1", ns_uri) 548 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) 549 # add an unqualified name 550 gen.startElementNS((None, "udoc"), None, {}) 551 gen.endElementNS((None, "udoc"), None) 552 gen.endElementNS((ns_uri, "doc"), "ns1:doc") 553 gen.endPrefixMapping("ns1") 554 gen.endDocument() 555 556 self.assertEqual(result.getvalue(), self.xml( 557 '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' % 558 ns_uri)) 559 560 def test_xmlgen_ns_empty(self): 561 result = self.ioclass() 562 gen = XMLGenerator(result, short_empty_elements=True) 563 564 gen.startDocument() 565 gen.startPrefixMapping("ns1", ns_uri) 566 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) 567 # add an unqualified name 568 gen.startElementNS((None, "udoc"), None, {}) 569 gen.endElementNS((None, "udoc"), None) 570 gen.endElementNS((ns_uri, "doc"), "ns1:doc") 571 gen.endPrefixMapping("ns1") 572 gen.endDocument() 573 574 self.assertEqual(result.getvalue(), self.xml( 575 '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' % 576 ns_uri)) 577 578 def test_1463026_1(self): 579 result = self.ioclass() 580 gen = XMLGenerator(result) 581 582 gen.startDocument() 583 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) 584 gen.endElementNS((None, 'a'), 'a') 585 gen.endDocument() 586 587 self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>')) 588 589 def test_1463026_1_empty(self): 590 result = self.ioclass() 591 gen = XMLGenerator(result, short_empty_elements=True) 592 593 gen.startDocument() 594 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) 595 gen.endElementNS((None, 'a'), 'a') 596 gen.endDocument() 597 598 self.assertEqual(result.getvalue(), self.xml('<a b="c"/>')) 599 600 def test_1463026_2(self): 601 result = self.ioclass() 602 gen = XMLGenerator(result) 603 604 gen.startDocument() 605 gen.startPrefixMapping(None, 'qux') 606 gen.startElementNS(('qux', 'a'), 'a', {}) 607 gen.endElementNS(('qux', 'a'), 'a') 608 gen.endPrefixMapping(None) 609 gen.endDocument() 610 611 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>')) 612 613 def test_1463026_2_empty(self): 614 result = self.ioclass() 615 gen = XMLGenerator(result, short_empty_elements=True) 616 617 gen.startDocument() 618 gen.startPrefixMapping(None, 'qux') 619 gen.startElementNS(('qux', 'a'), 'a', {}) 620 gen.endElementNS(('qux', 'a'), 'a') 621 gen.endPrefixMapping(None) 622 gen.endDocument() 623 624 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>')) 625 626 def test_1463026_3(self): 627 result = self.ioclass() 628 gen = XMLGenerator(result) 629 630 gen.startDocument() 631 gen.startPrefixMapping('my', 'qux') 632 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) 633 gen.endElementNS(('qux', 'a'), 'a') 634 gen.endPrefixMapping('my') 635 gen.endDocument() 636 637 self.assertEqual(result.getvalue(), 638 self.xml('<my:a xmlns:my="qux" b="c"></my:a>')) 639 640 def test_1463026_3_empty(self): 641 result = self.ioclass() 642 gen = XMLGenerator(result, short_empty_elements=True) 643 644 gen.startDocument() 645 gen.startPrefixMapping('my', 'qux') 646 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) 647 gen.endElementNS(('qux', 'a'), 'a') 648 gen.endPrefixMapping('my') 649 gen.endDocument() 650 651 self.assertEqual(result.getvalue(), 652 self.xml('<my:a xmlns:my="qux" b="c"/>')) 653 654 def test_5027_1(self): 655 # The xml prefix (as in xml:lang below) is reserved and bound by 656 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 657 # a bug whereby a KeyError is raised because this namespace is missing 658 # from a dictionary. 659 # 660 # This test demonstrates the bug by parsing a document. 661 test_xml = StringIO( 662 '<?xml version="1.0"?>' 663 '<a:g1 xmlns:a="http://example.com/ns">' 664 '<a:g2 xml:lang="en">Hello</a:g2>' 665 '</a:g1>') 666 667 parser = make_parser() 668 parser.setFeature(feature_namespaces, True) 669 result = self.ioclass() 670 gen = XMLGenerator(result) 671 parser.setContentHandler(gen) 672 parser.parse(test_xml) 673 674 self.assertEqual(result.getvalue(), 675 self.xml( 676 '<a:g1 xmlns:a="http://example.com/ns">' 677 '<a:g2 xml:lang="en">Hello</a:g2>' 678 '</a:g1>')) 679 680 def test_5027_2(self): 681 # The xml prefix (as in xml:lang below) is reserved and bound by 682 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 683 # a bug whereby a KeyError is raised because this namespace is missing 684 # from a dictionary. 685 # 686 # This test demonstrates the bug by direct manipulation of the 687 # XMLGenerator. 688 result = self.ioclass() 689 gen = XMLGenerator(result) 690 691 gen.startDocument() 692 gen.startPrefixMapping('a', 'http://example.com/ns') 693 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {}) 694 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'} 695 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr) 696 gen.characters('Hello') 697 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2') 698 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1') 699 gen.endPrefixMapping('a') 700 gen.endDocument() 701 702 self.assertEqual(result.getvalue(), 703 self.xml( 704 '<a:g1 xmlns:a="http://example.com/ns">' 705 '<a:g2 xml:lang="en">Hello</a:g2>' 706 '</a:g1>')) 707 708 def test_no_close_file(self): 709 result = self.ioclass() 710 def func(out): 711 gen = XMLGenerator(out) 712 gen.startDocument() 713 gen.startElement("doc", {}) 714 func(result) 715 self.assertFalse(result.closed) 716 717 def test_xmlgen_fragment(self): 718 result = self.ioclass() 719 gen = XMLGenerator(result) 720 721 # Don't call gen.startDocument() 722 gen.startElement("foo", {"a": "1.0"}) 723 gen.characters("Hello") 724 gen.endElement("foo") 725 gen.startElement("bar", {"b": "2.0"}) 726 gen.endElement("bar") 727 # Don't call gen.endDocument() 728 729 self.assertEqual(result.getvalue(), 730 self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):]) 731 732 class StringXmlgenTest(XmlgenTest, unittest.TestCase): 733 ioclass = StringIO 734 735 def xml(self, doc, encoding='iso-8859-1'): 736 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) 737 738 test_xmlgen_unencodable = None 739 740 class BytesXmlgenTest(XmlgenTest, unittest.TestCase): 741 ioclass = BytesIO 742 743 def xml(self, doc, encoding='iso-8859-1'): 744 return ('<?xml version="1.0" encoding="%s"?>\n%s' % 745 (encoding, doc)).encode(encoding, 'xmlcharrefreplace') 746 747 class WriterXmlgenTest(BytesXmlgenTest): 748 class ioclass(list): 749 write = list.append 750 closed = False 751 752 def seekable(self): 753 return True 754 755 def tell(self): 756 # return 0 at start and not 0 after start 757 return len(self) 758 759 def getvalue(self): 760 return b''.join(self) 761 762 class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase): 763 def ioclass(self): 764 raw = BytesIO() 765 writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace') 766 writer.getvalue = raw.getvalue 767 return writer 768 769 def xml(self, doc, encoding='iso-8859-1'): 770 return ('<?xml version="1.0" encoding="%s"?>\n%s' % 771 (encoding, doc)).encode('ascii', 'xmlcharrefreplace') 772 773 class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase): 774 fname = support.TESTFN + '-codecs' 775 776 def ioclass(self): 777 writer = codecs.open(self.fname, 'w', encoding='ascii', 778 errors='xmlcharrefreplace', buffering=0) 779 def cleanup(): 780 writer.close() 781 support.unlink(self.fname) 782 self.addCleanup(cleanup) 783 def getvalue(): 784 # Windows will not let use reopen without first closing 785 writer.close() 786 with open(writer.name, 'rb') as f: 787 return f.read() 788 writer.getvalue = getvalue 789 return writer 790 791 def xml(self, doc, encoding='iso-8859-1'): 792 return ('<?xml version="1.0" encoding="%s"?>\n%s' % 793 (encoding, doc)).encode('ascii', 'xmlcharrefreplace') 794 795 start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n' 796 797 798 class XMLFilterBaseTest(unittest.TestCase): 799 def test_filter_basic(self): 800 result = BytesIO() 801 gen = XMLGenerator(result) 802 filter = XMLFilterBase() 803 filter.setContentHandler(gen) 804 805 filter.startDocument() 806 filter.startElement("doc", {}) 807 filter.characters("content") 808 filter.ignorableWhitespace(" ") 809 filter.endElement("doc") 810 filter.endDocument() 811 812 self.assertEqual(result.getvalue(), start + b"<doc>content </doc>") 813 814 # =========================================================================== 815 # 816 # expatreader tests 817 # 818 # =========================================================================== 819 820 with open(TEST_XMLFILE_OUT, 'rb') as f: 821 xml_test_out = f.read() 822 823 class ExpatReaderTest(XmlTestBase): 824 825 # ===== XMLReader support 826 827 def test_expat_binary_file(self): 828 parser = create_parser() 829 result = BytesIO() 830 xmlgen = XMLGenerator(result) 831 832 parser.setContentHandler(xmlgen) 833 with open(TEST_XMLFILE, 'rb') as f: 834 parser.parse(f) 835 836 self.assertEqual(result.getvalue(), xml_test_out) 837 838 def test_expat_text_file(self): 839 parser = create_parser() 840 result = BytesIO() 841 xmlgen = XMLGenerator(result) 842 843 parser.setContentHandler(xmlgen) 844 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: 845 parser.parse(f) 846 847 self.assertEqual(result.getvalue(), xml_test_out) 848 849 @requires_nonascii_filenames 850 def test_expat_binary_file_nonascii(self): 851 fname = support.TESTFN_UNICODE 852 shutil.copyfile(TEST_XMLFILE, fname) 853 self.addCleanup(support.unlink, fname) 854 855 parser = create_parser() 856 result = BytesIO() 857 xmlgen = XMLGenerator(result) 858 859 parser.setContentHandler(xmlgen) 860 parser.parse(open(fname, 'rb')) 861 862 self.assertEqual(result.getvalue(), xml_test_out) 863 864 def test_expat_binary_file_bytes_name(self): 865 fname = os.fsencode(TEST_XMLFILE) 866 parser = create_parser() 867 result = BytesIO() 868 xmlgen = XMLGenerator(result) 869 870 parser.setContentHandler(xmlgen) 871 with open(fname, 'rb') as f: 872 parser.parse(f) 873 874 self.assertEqual(result.getvalue(), xml_test_out) 875 876 def test_expat_binary_file_int_name(self): 877 parser = create_parser() 878 result = BytesIO() 879 xmlgen = XMLGenerator(result) 880 881 parser.setContentHandler(xmlgen) 882 with open(TEST_XMLFILE, 'rb') as f: 883 with open(f.fileno(), 'rb', closefd=False) as f2: 884 parser.parse(f2) 885 886 self.assertEqual(result.getvalue(), xml_test_out) 887 888 # ===== DTDHandler support 889 890 class TestDTDHandler: 891 892 def __init__(self): 893 self._notations = [] 894 self._entities = [] 895 896 def notationDecl(self, name, publicId, systemId): 897 self._notations.append((name, publicId, systemId)) 898 899 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 900 self._entities.append((name, publicId, systemId, ndata)) 901 902 def test_expat_dtdhandler(self): 903 parser = create_parser() 904 handler = self.TestDTDHandler() 905 parser.setDTDHandler(handler) 906 907 parser.feed('<!DOCTYPE doc [\n') 908 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n') 909 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n') 910 parser.feed(']>\n') 911 parser.feed('<doc></doc>') 912 parser.close() 913 914 self.assertEqual(handler._notations, 915 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) 916 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) 917 918 # ===== EntityResolver support 919 920 class TestEntityResolver: 921 922 def resolveEntity(self, publicId, systemId): 923 inpsrc = InputSource() 924 inpsrc.setByteStream(BytesIO(b"<entity/>")) 925 return inpsrc 926 927 def test_expat_entityresolver(self): 928 parser = create_parser() 929 parser.setEntityResolver(self.TestEntityResolver()) 930 result = BytesIO() 931 parser.setContentHandler(XMLGenerator(result)) 932 933 parser.feed('<!DOCTYPE doc [\n') 934 parser.feed(' <!ENTITY test SYSTEM "whatever">\n') 935 parser.feed(']>\n') 936 parser.feed('<doc>&test;</doc>') 937 parser.close() 938 939 self.assertEqual(result.getvalue(), start + 940 b"<doc><entity></entity></doc>") 941 942 # ===== Attributes support 943 944 class AttrGatherer(ContentHandler): 945 946 def startElement(self, name, attrs): 947 self._attrs = attrs 948 949 def startElementNS(self, name, qname, attrs): 950 self._attrs = attrs 951 952 def test_expat_attrs_empty(self): 953 parser = create_parser() 954 gather = self.AttrGatherer() 955 parser.setContentHandler(gather) 956 957 parser.feed("<doc/>") 958 parser.close() 959 960 self.verify_empty_attrs(gather._attrs) 961 962 def test_expat_attrs_wattr(self): 963 parser = create_parser() 964 gather = self.AttrGatherer() 965 parser.setContentHandler(gather) 966 967 parser.feed("<doc attr='val'/>") 968 parser.close() 969 970 self.verify_attrs_wattr(gather._attrs) 971 972 def test_expat_nsattrs_empty(self): 973 parser = create_parser(1) 974 gather = self.AttrGatherer() 975 parser.setContentHandler(gather) 976 977 parser.feed("<doc/>") 978 parser.close() 979 980 self.verify_empty_nsattrs(gather._attrs) 981 982 def test_expat_nsattrs_wattr(self): 983 parser = create_parser(1) 984 gather = self.AttrGatherer() 985 parser.setContentHandler(gather) 986 987 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri) 988 parser.close() 989 990 attrs = gather._attrs 991 992 self.assertEqual(attrs.getLength(), 1) 993 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 994 self.assertTrue((attrs.getQNames() == [] or 995 attrs.getQNames() == ["ns:attr"])) 996 self.assertEqual(len(attrs), 1) 997 self.assertIn((ns_uri, "attr"), attrs) 998 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 999 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 1000 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")]) 1001 self.assertEqual(list(attrs.values()), ["val"]) 1002 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 1003 self.assertEqual(attrs[(ns_uri, "attr")], "val") 1004 1005 # ===== InputSource support 1006 1007 def test_expat_inpsource_filename(self): 1008 parser = create_parser() 1009 result = BytesIO() 1010 xmlgen = XMLGenerator(result) 1011 1012 parser.setContentHandler(xmlgen) 1013 parser.parse(TEST_XMLFILE) 1014 1015 self.assertEqual(result.getvalue(), xml_test_out) 1016 1017 def test_expat_inpsource_sysid(self): 1018 parser = create_parser() 1019 result = BytesIO() 1020 xmlgen = XMLGenerator(result) 1021 1022 parser.setContentHandler(xmlgen) 1023 parser.parse(InputSource(TEST_XMLFILE)) 1024 1025 self.assertEqual(result.getvalue(), xml_test_out) 1026 1027 @requires_nonascii_filenames 1028 def test_expat_inpsource_sysid_nonascii(self): 1029 fname = support.TESTFN_UNICODE 1030 shutil.copyfile(TEST_XMLFILE, fname) 1031 self.addCleanup(support.unlink, fname) 1032 1033 parser = create_parser() 1034 result = BytesIO() 1035 xmlgen = XMLGenerator(result) 1036 1037 parser.setContentHandler(xmlgen) 1038 parser.parse(InputSource(fname)) 1039 1040 self.assertEqual(result.getvalue(), xml_test_out) 1041 1042 def test_expat_inpsource_byte_stream(self): 1043 parser = create_parser() 1044 result = BytesIO() 1045 xmlgen = XMLGenerator(result) 1046 1047 parser.setContentHandler(xmlgen) 1048 inpsrc = InputSource() 1049 with open(TEST_XMLFILE, 'rb') as f: 1050 inpsrc.setByteStream(f) 1051 parser.parse(inpsrc) 1052 1053 self.assertEqual(result.getvalue(), xml_test_out) 1054 1055 def test_expat_inpsource_character_stream(self): 1056 parser = create_parser() 1057 result = BytesIO() 1058 xmlgen = XMLGenerator(result) 1059 1060 parser.setContentHandler(xmlgen) 1061 inpsrc = InputSource() 1062 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: 1063 inpsrc.setCharacterStream(f) 1064 parser.parse(inpsrc) 1065 1066 self.assertEqual(result.getvalue(), xml_test_out) 1067 1068 # ===== IncrementalParser support 1069 1070 def test_expat_incremental(self): 1071 result = BytesIO() 1072 xmlgen = XMLGenerator(result) 1073 parser = create_parser() 1074 parser.setContentHandler(xmlgen) 1075 1076 parser.feed("<doc>") 1077 parser.feed("</doc>") 1078 parser.close() 1079 1080 self.assertEqual(result.getvalue(), start + b"<doc></doc>") 1081 1082 def test_expat_incremental_reset(self): 1083 result = BytesIO() 1084 xmlgen = XMLGenerator(result) 1085 parser = create_parser() 1086 parser.setContentHandler(xmlgen) 1087 1088 parser.feed("<doc>") 1089 parser.feed("text") 1090 1091 result = BytesIO() 1092 xmlgen = XMLGenerator(result) 1093 parser.setContentHandler(xmlgen) 1094 parser.reset() 1095 1096 parser.feed("<doc>") 1097 parser.feed("text") 1098 parser.feed("</doc>") 1099 parser.close() 1100 1101 self.assertEqual(result.getvalue(), start + b"<doc>text</doc>") 1102 1103 # ===== Locator support 1104 1105 def test_expat_locator_noinfo(self): 1106 result = BytesIO() 1107 xmlgen = XMLGenerator(result) 1108 parser = create_parser() 1109 parser.setContentHandler(xmlgen) 1110 1111 parser.feed("<doc>") 1112 parser.feed("</doc>") 1113 parser.close() 1114 1115 self.assertEqual(parser.getSystemId(), None) 1116 self.assertEqual(parser.getPublicId(), None) 1117 self.assertEqual(parser.getLineNumber(), 1) 1118 1119 def test_expat_locator_withinfo(self): 1120 result = BytesIO() 1121 xmlgen = XMLGenerator(result) 1122 parser = create_parser() 1123 parser.setContentHandler(xmlgen) 1124 parser.parse(TEST_XMLFILE) 1125 1126 self.assertEqual(parser.getSystemId(), TEST_XMLFILE) 1127 self.assertEqual(parser.getPublicId(), None) 1128 1129 @requires_nonascii_filenames 1130 def test_expat_locator_withinfo_nonascii(self): 1131 fname = support.TESTFN_UNICODE 1132 shutil.copyfile(TEST_XMLFILE, fname) 1133 self.addCleanup(support.unlink, fname) 1134 1135 result = BytesIO() 1136 xmlgen = XMLGenerator(result) 1137 parser = create_parser() 1138 parser.setContentHandler(xmlgen) 1139 parser.parse(fname) 1140 1141 self.assertEqual(parser.getSystemId(), fname) 1142 self.assertEqual(parser.getPublicId(), None) 1143 1144 1145 # =========================================================================== 1146 # 1147 # error reporting 1148 # 1149 # =========================================================================== 1150 1151 class ErrorReportingTest(unittest.TestCase): 1152 def test_expat_inpsource_location(self): 1153 parser = create_parser() 1154 parser.setContentHandler(ContentHandler()) # do nothing 1155 source = InputSource() 1156 source.setByteStream(BytesIO(b"<foo bar foobar>")) #ill-formed 1157 name = "a file name" 1158 source.setSystemId(name) 1159 try: 1160 parser.parse(source) 1161 self.fail() 1162 except SAXException as e: 1163 self.assertEqual(e.getSystemId(), name) 1164 1165 def test_expat_incomplete(self): 1166 parser = create_parser() 1167 parser.setContentHandler(ContentHandler()) # do nothing 1168 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>")) 1169 self.assertEqual(parser.getColumnNumber(), 5) 1170 self.assertEqual(parser.getLineNumber(), 1) 1171 1172 def test_sax_parse_exception_str(self): 1173 # pass various values from a locator to the SAXParseException to 1174 # make sure that the __str__() doesn't fall apart when None is 1175 # passed instead of an integer line and column number 1176 # 1177 # use "normal" values for the locator: 1178 str(SAXParseException("message", None, 1179 self.DummyLocator(1, 1))) 1180 # use None for the line number: 1181 str(SAXParseException("message", None, 1182 self.DummyLocator(None, 1))) 1183 # use None for the column number: 1184 str(SAXParseException("message", None, 1185 self.DummyLocator(1, None))) 1186 # use None for both: 1187 str(SAXParseException("message", None, 1188 self.DummyLocator(None, None))) 1189 1190 class DummyLocator: 1191 def __init__(self, lineno, colno): 1192 self._lineno = lineno 1193 self._colno = colno 1194 1195 def getPublicId(self): 1196 return "pubid" 1197 1198 def getSystemId(self): 1199 return "sysid" 1200 1201 def getLineNumber(self): 1202 return self._lineno 1203 1204 def getColumnNumber(self): 1205 return self._colno 1206 1207 # =========================================================================== 1208 # 1209 # xmlreader tests 1210 # 1211 # =========================================================================== 1212 1213 class XmlReaderTest(XmlTestBase): 1214 1215 # ===== AttributesImpl 1216 def test_attrs_empty(self): 1217 self.verify_empty_attrs(AttributesImpl({})) 1218 1219 def test_attrs_wattr(self): 1220 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"})) 1221 1222 def test_nsattrs_empty(self): 1223 self.verify_empty_nsattrs(AttributesNSImpl({}, {})) 1224 1225 def test_nsattrs_wattr(self): 1226 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"}, 1227 {(ns_uri, "attr") : "ns:attr"}) 1228 1229 self.assertEqual(attrs.getLength(), 1) 1230 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 1231 self.assertEqual(attrs.getQNames(), ["ns:attr"]) 1232 self.assertEqual(len(attrs), 1) 1233 self.assertIn((ns_uri, "attr"), attrs) 1234 self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")]) 1235 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 1236 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 1237 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")]) 1238 self.assertEqual(list(attrs.values()), ["val"]) 1239 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 1240 self.assertEqual(attrs.getValueByQName("ns:attr"), "val") 1241 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr")) 1242 self.assertEqual(attrs[(ns_uri, "attr")], "val") 1243 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") 1244 1245 1246 def test_main(): 1247 run_unittest(MakeParserTest, 1248 ParseTest, 1249 SaxutilsTest, 1250 PrepareInputSourceTest, 1251 StringXmlgenTest, 1252 BytesXmlgenTest, 1253 WriterXmlgenTest, 1254 StreamWriterXmlgenTest, 1255 StreamReaderWriterXmlgenTest, 1256 ExpatReaderTest, 1257 ErrorReportingTest, 1258 XmlReaderTest) 1259 1260 if __name__ == "__main__": 1261 test_main() 1262