1 # IMPORTANT: the same tests are run from "test_xml_etree_c" in order 2 # to ensure consistency between the C implementation and the Python 3 # implementation. 4 # 5 # For this purpose, the module-level "ET" symbol is temporarily 6 # monkey-patched when running the "test_xml_etree_c" test suite. 7 8 import copy 9 import html 10 import io 11 import operator 12 import pickle 13 import sys 14 import types 15 import unittest 16 import warnings 17 import weakref 18 19 from itertools import product 20 from test import support 21 from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr 22 23 # pyET is the pure-Python implementation. 24 # 25 # ET is pyET in test_xml_etree and is the C accelerated version in 26 # test_xml_etree_c. 27 pyET = None 28 ET = None 29 30 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 31 try: 32 SIMPLE_XMLFILE.encode("utf-8") 33 except UnicodeEncodeError: 34 raise unittest.SkipTest("filename is not encodable to utf8") 35 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 36 37 SAMPLE_XML = """\ 38 <body> 39 <tag class='a'>text</tag> 40 <tag class='b' /> 41 <section> 42 <tag class='b' id='inner'>subtext</tag> 43 </section> 44 </body> 45 """ 46 47 SAMPLE_SECTION = """\ 48 <section> 49 <tag class='b' id='inner'>subtext</tag> 50 <nexttag /> 51 <nextsection> 52 <tag /> 53 </nextsection> 54 </section> 55 """ 56 57 SAMPLE_XML_NS = """ 58 <body xmlns="http://effbot.org/ns"> 59 <tag>text</tag> 60 <tag /> 61 <section> 62 <tag>subtext</tag> 63 </section> 64 </body> 65 """ 66 67 SAMPLE_XML_NS_ELEMS = """ 68 <root> 69 <h:table xmlns:h="hello"> 70 <h:tr> 71 <h:td>Apples</h:td> 72 <h:td>Bananas</h:td> 73 </h:tr> 74 </h:table> 75 76 <f:table xmlns:f="foo"> 77 <f:name>African Coffee Table</f:name> 78 <f:width>80</f:width> 79 <f:length>120</f:length> 80 </f:table> 81 </root> 82 """ 83 84 ENTITY_XML = """\ 85 <!DOCTYPE points [ 86 <!ENTITY % user-entities SYSTEM 'user-entities.xml'> 87 %user-entities; 88 ]> 89 <document>&entity;</document> 90 """ 91 92 93 class ModuleTest(unittest.TestCase): 94 def test_sanity(self): 95 # Import sanity. 96 97 from xml.etree import ElementTree 98 from xml.etree import ElementInclude 99 from xml.etree import ElementPath 100 101 def test_all(self): 102 names = ("xml.etree.ElementTree", "_elementtree") 103 support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",)) 104 105 106 def serialize(elem, to_string=True, encoding='unicode', **options): 107 if encoding != 'unicode': 108 file = io.BytesIO() 109 else: 110 file = io.StringIO() 111 tree = ET.ElementTree(elem) 112 tree.write(file, encoding=encoding, **options) 113 if to_string: 114 return file.getvalue() 115 else: 116 file.seek(0) 117 return file 118 119 def summarize_list(seq): 120 return [elem.tag for elem in seq] 121 122 123 class ElementTestCase: 124 @classmethod 125 def setUpClass(cls): 126 cls.modules = {pyET, ET} 127 128 def pickleRoundTrip(self, obj, name, dumper, loader, proto): 129 save_m = sys.modules[name] 130 try: 131 sys.modules[name] = dumper 132 temp = pickle.dumps(obj, proto) 133 sys.modules[name] = loader 134 result = pickle.loads(temp) 135 except pickle.PicklingError as pe: 136 # pyET must be second, because pyET may be (equal to) ET. 137 human = dict([(ET, "cET"), (pyET, "pyET")]) 138 raise support.TestFailed("Failed to round-trip %r from %r to %r" 139 % (obj, 140 human.get(dumper, dumper), 141 human.get(loader, loader))) from pe 142 finally: 143 sys.modules[name] = save_m 144 return result 145 146 def assertEqualElements(self, alice, bob): 147 self.assertIsInstance(alice, (ET.Element, pyET.Element)) 148 self.assertIsInstance(bob, (ET.Element, pyET.Element)) 149 self.assertEqual(len(list(alice)), len(list(bob))) 150 for x, y in zip(alice, bob): 151 self.assertEqualElements(x, y) 152 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') 153 self.assertEqual(properties(alice), properties(bob)) 154 155 # -------------------------------------------------------------------- 156 # element tree tests 157 158 class ElementTreeTest(unittest.TestCase): 159 160 def serialize_check(self, elem, expected): 161 self.assertEqual(serialize(elem), expected) 162 163 def test_interface(self): 164 # Test element tree interface. 165 166 def check_string(string): 167 len(string) 168 for char in string: 169 self.assertEqual(len(char), 1, 170 msg="expected one-character string, got %r" % char) 171 new_string = string + "" 172 new_string = string + " " 173 string[:0] 174 175 def check_mapping(mapping): 176 len(mapping) 177 keys = mapping.keys() 178 items = mapping.items() 179 for key in keys: 180 item = mapping[key] 181 mapping["key"] = "value" 182 self.assertEqual(mapping["key"], "value", 183 msg="expected value string, got %r" % mapping["key"]) 184 185 def check_element(element): 186 self.assertTrue(ET.iselement(element), msg="not an element") 187 direlem = dir(element) 188 for attr in 'tag', 'attrib', 'text', 'tail': 189 self.assertTrue(hasattr(element, attr), 190 msg='no %s member' % attr) 191 self.assertIn(attr, direlem, 192 msg='no %s visible by dir' % attr) 193 194 check_string(element.tag) 195 check_mapping(element.attrib) 196 if element.text is not None: 197 check_string(element.text) 198 if element.tail is not None: 199 check_string(element.tail) 200 for elem in element: 201 check_element(elem) 202 203 element = ET.Element("tag") 204 check_element(element) 205 tree = ET.ElementTree(element) 206 check_element(tree.getroot()) 207 element = ET.Element("t\xe4g", key="value") 208 tree = ET.ElementTree(element) 209 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$") 210 element = ET.Element("tag", key="value") 211 212 # Make sure all standard element methods exist. 213 214 def check_method(method): 215 self.assertTrue(hasattr(method, '__call__'), 216 msg="%s not callable" % method) 217 218 check_method(element.append) 219 check_method(element.extend) 220 check_method(element.insert) 221 check_method(element.remove) 222 check_method(element.getchildren) 223 check_method(element.find) 224 check_method(element.iterfind) 225 check_method(element.findall) 226 check_method(element.findtext) 227 check_method(element.clear) 228 check_method(element.get) 229 check_method(element.set) 230 check_method(element.keys) 231 check_method(element.items) 232 check_method(element.iter) 233 check_method(element.itertext) 234 check_method(element.getiterator) 235 236 # These methods return an iterable. See bug 6472. 237 238 def check_iter(it): 239 check_method(it.__next__) 240 241 check_iter(element.iterfind("tag")) 242 check_iter(element.iterfind("*")) 243 check_iter(tree.iterfind("tag")) 244 check_iter(tree.iterfind("*")) 245 246 # These aliases are provided: 247 248 self.assertEqual(ET.XML, ET.fromstring) 249 self.assertEqual(ET.PI, ET.ProcessingInstruction) 250 251 def test_set_attribute(self): 252 element = ET.Element('tag') 253 254 self.assertEqual(element.tag, 'tag') 255 element.tag = 'Tag' 256 self.assertEqual(element.tag, 'Tag') 257 element.tag = 'TAG' 258 self.assertEqual(element.tag, 'TAG') 259 260 self.assertIsNone(element.text) 261 element.text = 'Text' 262 self.assertEqual(element.text, 'Text') 263 element.text = 'TEXT' 264 self.assertEqual(element.text, 'TEXT') 265 266 self.assertIsNone(element.tail) 267 element.tail = 'Tail' 268 self.assertEqual(element.tail, 'Tail') 269 element.tail = 'TAIL' 270 self.assertEqual(element.tail, 'TAIL') 271 272 self.assertEqual(element.attrib, {}) 273 element.attrib = {'a': 'b', 'c': 'd'} 274 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 275 element.attrib = {'A': 'B', 'C': 'D'} 276 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 277 278 def test_simpleops(self): 279 # Basic method sanity checks. 280 281 elem = ET.XML("<body><tag/></body>") 282 self.serialize_check(elem, '<body><tag /></body>') 283 e = ET.Element("tag2") 284 elem.append(e) 285 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 286 elem.remove(e) 287 self.serialize_check(elem, '<body><tag /></body>') 288 elem.insert(0, e) 289 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 290 elem.remove(e) 291 elem.extend([e]) 292 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 293 elem.remove(e) 294 295 element = ET.Element("tag", key="value") 296 self.serialize_check(element, '<tag key="value" />') # 1 297 subelement = ET.Element("subtag") 298 element.append(subelement) 299 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 300 element.insert(0, subelement) 301 self.serialize_check(element, 302 '<tag key="value"><subtag /><subtag /></tag>') # 3 303 element.remove(subelement) 304 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 305 element.remove(subelement) 306 self.serialize_check(element, '<tag key="value" />') # 5 307 with self.assertRaises(ValueError) as cm: 308 element.remove(subelement) 309 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 310 self.serialize_check(element, '<tag key="value" />') # 6 311 element[0:0] = [subelement, subelement, subelement] 312 self.serialize_check(element[1], '<subtag />') 313 self.assertEqual(element[1:9], [element[1], element[2]]) 314 self.assertEqual(element[:9:2], [element[0], element[2]]) 315 del element[1:2] 316 self.serialize_check(element, 317 '<tag key="value"><subtag /><subtag /></tag>') 318 319 def test_cdata(self): 320 # Test CDATA handling (etc). 321 322 self.serialize_check(ET.XML("<tag>hello</tag>"), 323 '<tag>hello</tag>') 324 self.serialize_check(ET.XML("<tag>hello</tag>"), 325 '<tag>hello</tag>') 326 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 327 '<tag>hello</tag>') 328 329 def test_file_init(self): 330 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) 331 tree = ET.ElementTree(file=stringfile) 332 self.assertEqual(tree.find("tag").tag, 'tag') 333 self.assertEqual(tree.find("section/tag").tag, 'tag') 334 335 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 336 self.assertEqual(tree.find("element").tag, 'element') 337 self.assertEqual(tree.find("element/../empty-element").tag, 338 'empty-element') 339 340 def test_path_cache(self): 341 # Check that the path cache behaves sanely. 342 343 from xml.etree import ElementPath 344 345 elem = ET.XML(SAMPLE_XML) 346 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 347 cache_len_10 = len(ElementPath._cache) 348 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 349 self.assertEqual(len(ElementPath._cache), cache_len_10) 350 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 351 self.assertGreater(len(ElementPath._cache), cache_len_10) 352 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 353 self.assertLess(len(ElementPath._cache), 500) 354 355 def test_copy(self): 356 # Test copy handling (etc). 357 358 import copy 359 e1 = ET.XML("<tag>hello<foo/></tag>") 360 e2 = copy.copy(e1) 361 e3 = copy.deepcopy(e1) 362 e1.find("foo").tag = "bar" 363 self.serialize_check(e1, '<tag>hello<bar /></tag>') 364 self.serialize_check(e2, '<tag>hello<bar /></tag>') 365 self.serialize_check(e3, '<tag>hello<foo /></tag>') 366 367 def test_attrib(self): 368 # Test attribute handling. 369 370 elem = ET.Element("tag") 371 elem.get("key") # 1.1 372 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 373 374 elem.set("key", "value") 375 self.assertEqual(elem.get("key"), 'value') # 1.3 376 377 elem = ET.Element("tag", key="value") 378 self.assertEqual(elem.get("key"), 'value') # 2.1 379 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 380 381 attrib = {"key": "value"} 382 elem = ET.Element("tag", attrib) 383 attrib.clear() # check for aliasing issues 384 self.assertEqual(elem.get("key"), 'value') # 3.1 385 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 386 387 attrib = {"key": "value"} 388 elem = ET.Element("tag", **attrib) 389 attrib.clear() # check for aliasing issues 390 self.assertEqual(elem.get("key"), 'value') # 4.1 391 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 392 393 elem = ET.Element("tag", {"key": "other"}, key="value") 394 self.assertEqual(elem.get("key"), 'value') # 5.1 395 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 396 397 elem = ET.Element('test') 398 elem.text = "aa" 399 elem.set('testa', 'testval') 400 elem.set('testb', 'test2') 401 self.assertEqual(ET.tostring(elem), 402 b'<test testa="testval" testb="test2">aa</test>') 403 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 404 self.assertEqual(sorted(elem.items()), 405 [('testa', 'testval'), ('testb', 'test2')]) 406 self.assertEqual(elem.attrib['testb'], 'test2') 407 elem.attrib['testb'] = 'test1' 408 elem.attrib['testc'] = 'test2' 409 self.assertEqual(ET.tostring(elem), 410 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 411 412 elem = ET.Element('test') 413 elem.set('a', '\r') 414 elem.set('b', '\r\n') 415 elem.set('c', '\t\n\r ') 416 elem.set('d', '\n\n') 417 self.assertEqual(ET.tostring(elem), 418 b'<test a=" " b=" " c="	 " d=" " />') 419 420 def test_makeelement(self): 421 # Test makeelement handling. 422 423 elem = ET.Element("tag") 424 attrib = {"key": "value"} 425 subelem = elem.makeelement("subtag", attrib) 426 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 427 elem.append(subelem) 428 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 429 430 elem.clear() 431 self.serialize_check(elem, '<tag />') 432 elem.append(subelem) 433 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 434 elem.extend([subelem, subelem]) 435 self.serialize_check(elem, 436 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 437 elem[:] = [subelem] 438 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 439 elem[:] = tuple([subelem]) 440 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 441 442 def test_parsefile(self): 443 # Test parsing from file. 444 445 tree = ET.parse(SIMPLE_XMLFILE) 446 stream = io.StringIO() 447 tree.write(stream, encoding='unicode') 448 self.assertEqual(stream.getvalue(), 449 '<root>\n' 450 ' <element key="value">text</element>\n' 451 ' <element>text</element>tail\n' 452 ' <empty-element />\n' 453 '</root>') 454 tree = ET.parse(SIMPLE_NS_XMLFILE) 455 stream = io.StringIO() 456 tree.write(stream, encoding='unicode') 457 self.assertEqual(stream.getvalue(), 458 '<ns0:root xmlns:ns0="namespace">\n' 459 ' <ns0:element key="value">text</ns0:element>\n' 460 ' <ns0:element>text</ns0:element>tail\n' 461 ' <ns0:empty-element />\n' 462 '</ns0:root>') 463 464 with open(SIMPLE_XMLFILE) as f: 465 data = f.read() 466 467 parser = ET.XMLParser() 468 self.assertRegex(parser.version, r'^Expat ') 469 parser.feed(data) 470 self.serialize_check(parser.close(), 471 '<root>\n' 472 ' <element key="value">text</element>\n' 473 ' <element>text</element>tail\n' 474 ' <empty-element />\n' 475 '</root>') 476 477 target = ET.TreeBuilder() 478 parser = ET.XMLParser(target=target) 479 parser.feed(data) 480 self.serialize_check(parser.close(), 481 '<root>\n' 482 ' <element key="value">text</element>\n' 483 ' <element>text</element>tail\n' 484 ' <empty-element />\n' 485 '</root>') 486 487 def test_parseliteral(self): 488 element = ET.XML("<html><body>text</body></html>") 489 self.assertEqual(ET.tostring(element, encoding='unicode'), 490 '<html><body>text</body></html>') 491 element = ET.fromstring("<html><body>text</body></html>") 492 self.assertEqual(ET.tostring(element, encoding='unicode'), 493 '<html><body>text</body></html>') 494 sequence = ["<html><body>", "text</bo", "dy></html>"] 495 element = ET.fromstringlist(sequence) 496 self.assertEqual(ET.tostring(element), 497 b'<html><body>text</body></html>') 498 self.assertEqual(b"".join(ET.tostringlist(element)), 499 b'<html><body>text</body></html>') 500 self.assertEqual(ET.tostring(element, "ascii"), 501 b"<?xml version='1.0' encoding='ascii'?>\n" 502 b"<html><body>text</body></html>") 503 _, ids = ET.XMLID("<html><body>text</body></html>") 504 self.assertEqual(len(ids), 0) 505 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 506 self.assertEqual(len(ids), 1) 507 self.assertEqual(ids["body"].tag, 'body') 508 509 def test_iterparse(self): 510 # Test iterparse interface. 511 512 iterparse = ET.iterparse 513 514 context = iterparse(SIMPLE_XMLFILE) 515 action, elem = next(context) 516 self.assertEqual((action, elem.tag), ('end', 'element')) 517 self.assertEqual([(action, elem.tag) for action, elem in context], [ 518 ('end', 'element'), 519 ('end', 'empty-element'), 520 ('end', 'root'), 521 ]) 522 self.assertEqual(context.root.tag, 'root') 523 524 context = iterparse(SIMPLE_NS_XMLFILE) 525 self.assertEqual([(action, elem.tag) for action, elem in context], [ 526 ('end', '{namespace}element'), 527 ('end', '{namespace}element'), 528 ('end', '{namespace}empty-element'), 529 ('end', '{namespace}root'), 530 ]) 531 532 events = () 533 context = iterparse(SIMPLE_XMLFILE, events) 534 self.assertEqual([(action, elem.tag) for action, elem in context], []) 535 536 events = () 537 context = iterparse(SIMPLE_XMLFILE, events=events) 538 self.assertEqual([(action, elem.tag) for action, elem in context], []) 539 540 events = ("start", "end") 541 context = iterparse(SIMPLE_XMLFILE, events) 542 self.assertEqual([(action, elem.tag) for action, elem in context], [ 543 ('start', 'root'), 544 ('start', 'element'), 545 ('end', 'element'), 546 ('start', 'element'), 547 ('end', 'element'), 548 ('start', 'empty-element'), 549 ('end', 'empty-element'), 550 ('end', 'root'), 551 ]) 552 553 events = ("start", "end", "start-ns", "end-ns") 554 context = iterparse(SIMPLE_NS_XMLFILE, events) 555 self.assertEqual([(action, elem.tag) if action in ("start", "end") 556 else (action, elem) 557 for action, elem in context], [ 558 ('start-ns', ('', 'namespace')), 559 ('start', '{namespace}root'), 560 ('start', '{namespace}element'), 561 ('end', '{namespace}element'), 562 ('start', '{namespace}element'), 563 ('end', '{namespace}element'), 564 ('start', '{namespace}empty-element'), 565 ('end', '{namespace}empty-element'), 566 ('end', '{namespace}root'), 567 ('end-ns', None), 568 ]) 569 570 events = ('start-ns', 'end-ns') 571 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) 572 res = [action for action, elem in context] 573 self.assertEqual(res, ['start-ns', 'end-ns']) 574 575 events = ("start", "end", "bogus") 576 with open(SIMPLE_XMLFILE, "rb") as f: 577 with self.assertRaises(ValueError) as cm: 578 iterparse(f, events) 579 self.assertFalse(f.closed) 580 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 581 582 with support.check_no_resource_warning(self): 583 with self.assertRaises(ValueError) as cm: 584 iterparse(SIMPLE_XMLFILE, events) 585 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 586 del cm 587 588 source = io.BytesIO( 589 b"<?xml version='1.0' encoding='iso-8859-1'?>\n" 590 b"<body xmlns='http://éffbot.org/ns'\n" 591 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 592 events = ("start-ns",) 593 context = iterparse(source, events) 594 self.assertEqual([(action, elem) for action, elem in context], [ 595 ('start-ns', ('', 'http://\xe9ffbot.org/ns')), 596 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), 597 ]) 598 599 source = io.StringIO("<document />junk") 600 it = iterparse(source) 601 action, elem = next(it) 602 self.assertEqual((action, elem.tag), ('end', 'document')) 603 with self.assertRaises(ET.ParseError) as cm: 604 next(it) 605 self.assertEqual(str(cm.exception), 606 'junk after document element: line 1, column 12') 607 608 with open(TESTFN, "wb") as f: 609 f.write(b"<document />junk") 610 it = iterparse(TESTFN) 611 action, elem = next(it) 612 self.assertEqual((action, elem.tag), ('end', 'document')) 613 with support.check_no_resource_warning(self): 614 with self.assertRaises(ET.ParseError) as cm: 615 next(it) 616 self.assertEqual(str(cm.exception), 617 'junk after document element: line 1, column 12') 618 del cm, it 619 620 def test_writefile(self): 621 elem = ET.Element("tag") 622 elem.text = "text" 623 self.serialize_check(elem, '<tag>text</tag>') 624 ET.SubElement(elem, "subtag").text = "subtext" 625 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 626 627 # Test tag suppression 628 elem.tag = None 629 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 630 elem.insert(0, ET.Comment("comment")) 631 self.serialize_check(elem, 632 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 633 634 elem[0] = ET.PI("key", "value") 635 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 636 637 def test_custom_builder(self): 638 # Test parser w. custom builder. 639 640 with open(SIMPLE_XMLFILE) as f: 641 data = f.read() 642 class Builder(list): 643 def start(self, tag, attrib): 644 self.append(("start", tag)) 645 def end(self, tag): 646 self.append(("end", tag)) 647 def data(self, text): 648 pass 649 builder = Builder() 650 parser = ET.XMLParser(target=builder) 651 parser.feed(data) 652 self.assertEqual(builder, [ 653 ('start', 'root'), 654 ('start', 'element'), 655 ('end', 'element'), 656 ('start', 'element'), 657 ('end', 'element'), 658 ('start', 'empty-element'), 659 ('end', 'empty-element'), 660 ('end', 'root'), 661 ]) 662 663 with open(SIMPLE_NS_XMLFILE) as f: 664 data = f.read() 665 class Builder(list): 666 def start(self, tag, attrib): 667 self.append(("start", tag)) 668 def end(self, tag): 669 self.append(("end", tag)) 670 def data(self, text): 671 pass 672 def pi(self, target, data): 673 self.append(("pi", target, data)) 674 def comment(self, data): 675 self.append(("comment", data)) 676 builder = Builder() 677 parser = ET.XMLParser(target=builder) 678 parser.feed(data) 679 self.assertEqual(builder, [ 680 ('pi', 'pi', 'data'), 681 ('comment', ' comment '), 682 ('start', '{namespace}root'), 683 ('start', '{namespace}element'), 684 ('end', '{namespace}element'), 685 ('start', '{namespace}element'), 686 ('end', '{namespace}element'), 687 ('start', '{namespace}empty-element'), 688 ('end', '{namespace}empty-element'), 689 ('end', '{namespace}root'), 690 ]) 691 692 693 def test_getchildren(self): 694 # Test Element.getchildren() 695 696 with open(SIMPLE_XMLFILE, "rb") as f: 697 tree = ET.parse(f) 698 self.assertEqual([summarize_list(elem.getchildren()) 699 for elem in tree.getroot().iter()], [ 700 ['element', 'element', 'empty-element'], 701 [], 702 [], 703 [], 704 ]) 705 self.assertEqual([summarize_list(elem.getchildren()) 706 for elem in tree.getiterator()], [ 707 ['element', 'element', 'empty-element'], 708 [], 709 [], 710 [], 711 ]) 712 713 elem = ET.XML(SAMPLE_XML) 714 self.assertEqual(len(elem.getchildren()), 3) 715 self.assertEqual(len(elem[2].getchildren()), 1) 716 self.assertEqual(elem[:], elem.getchildren()) 717 child1 = elem[0] 718 child2 = elem[2] 719 del elem[1:2] 720 self.assertEqual(len(elem.getchildren()), 2) 721 self.assertEqual(child1, elem[0]) 722 self.assertEqual(child2, elem[1]) 723 elem[0:2] = [child2, child1] 724 self.assertEqual(child2, elem[0]) 725 self.assertEqual(child1, elem[1]) 726 self.assertNotEqual(child1, elem[0]) 727 elem.clear() 728 self.assertEqual(elem.getchildren(), []) 729 730 def test_writestring(self): 731 elem = ET.XML("<html><body>text</body></html>") 732 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 733 elem = ET.fromstring("<html><body>text</body></html>") 734 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 735 736 def test_encoding(self): 737 def check(encoding, body=''): 738 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 739 (encoding, body)) 740 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 741 self.assertEqual(ET.XML(xml).text, body) 742 check("ascii", 'a') 743 check("us-ascii", 'a') 744 check("iso-8859-1", '\xbd') 745 check("iso-8859-15", '\u20ac') 746 check("cp437", '\u221a') 747 check("mac-roman", '\u02da') 748 749 def xml(encoding): 750 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 751 def bxml(encoding): 752 return xml(encoding).encode(encoding) 753 supported_encodings = [ 754 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 755 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 756 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 757 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 758 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 759 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 760 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', 761 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 762 'cp1256', 'cp1257', 'cp1258', 763 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 764 'mac-roman', 'mac-turkish', 765 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 766 'iso2022-jp-3', 'iso2022-jp-ext', 767 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 768 'hz', 'ptcp154', 769 ] 770 for encoding in supported_encodings: 771 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 772 773 unsupported_ascii_compatible_encodings = [ 774 'big5', 'big5hkscs', 775 'cp932', 'cp949', 'cp950', 776 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 777 'gb2312', 'gbk', 'gb18030', 778 'iso2022-kr', 'johab', 779 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 780 'utf-7', 781 ] 782 for encoding in unsupported_ascii_compatible_encodings: 783 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 784 785 unsupported_ascii_incompatible_encodings = [ 786 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 787 'utf_32', 'utf_32_be', 'utf_32_le', 788 ] 789 for encoding in unsupported_ascii_incompatible_encodings: 790 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 791 792 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 793 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 794 795 def test_methods(self): 796 # Test serialization methods. 797 798 e = ET.XML("<html><link/><script>1 < 2</script></html>") 799 e.tail = "\n" 800 self.assertEqual(serialize(e), 801 '<html><link /><script>1 < 2</script></html>\n') 802 self.assertEqual(serialize(e, method=None), 803 '<html><link /><script>1 < 2</script></html>\n') 804 self.assertEqual(serialize(e, method="xml"), 805 '<html><link /><script>1 < 2</script></html>\n') 806 self.assertEqual(serialize(e, method="html"), 807 '<html><link><script>1 < 2</script></html>\n') 808 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 809 810 def test_issue18347(self): 811 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 812 self.assertEqual(serialize(e), 813 '<html><CamelCase>text</CamelCase></html>') 814 self.assertEqual(serialize(e, method="html"), 815 '<html><CamelCase>text</CamelCase></html>') 816 817 def test_entity(self): 818 # Test entity handling. 819 820 # 1) good entities 821 822 e = ET.XML("<document title='舰'>test</document>") 823 self.assertEqual(serialize(e, encoding="us-ascii"), 824 b'<document title="舰">test</document>') 825 self.serialize_check(e, '<document title="\u8230">test</document>') 826 827 # 2) bad entities 828 829 with self.assertRaises(ET.ParseError) as cm: 830 ET.XML("<document>&entity;</document>") 831 self.assertEqual(str(cm.exception), 832 'undefined entity: line 1, column 10') 833 834 with self.assertRaises(ET.ParseError) as cm: 835 ET.XML(ENTITY_XML) 836 self.assertEqual(str(cm.exception), 837 'undefined entity &entity;: line 5, column 10') 838 839 # 3) custom entity 840 841 parser = ET.XMLParser() 842 parser.entity["entity"] = "text" 843 parser.feed(ENTITY_XML) 844 root = parser.close() 845 self.serialize_check(root, '<document>text</document>') 846 847 def test_namespace(self): 848 # Test namespace issues. 849 850 # 1) xml namespace 851 852 elem = ET.XML("<tag xml:lang='en' />") 853 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 854 855 # 2) other "well-known" namespaces 856 857 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 858 self.serialize_check(elem, 859 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 860 861 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 862 self.serialize_check(elem, 863 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 864 865 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 866 self.serialize_check(elem, 867 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 868 869 # 3) unknown namespaces 870 elem = ET.XML(SAMPLE_XML_NS) 871 self.serialize_check(elem, 872 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 873 ' <ns0:tag>text</ns0:tag>\n' 874 ' <ns0:tag />\n' 875 ' <ns0:section>\n' 876 ' <ns0:tag>subtext</ns0:tag>\n' 877 ' </ns0:section>\n' 878 '</ns0:body>') 879 880 def test_qname(self): 881 # Test QName handling. 882 883 # 1) decorated tags 884 885 elem = ET.Element("{uri}tag") 886 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 887 elem = ET.Element(ET.QName("{uri}tag")) 888 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 889 elem = ET.Element(ET.QName("uri", "tag")) 890 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 891 elem = ET.Element(ET.QName("uri", "tag")) 892 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 893 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 894 self.serialize_check(elem, 895 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 896 897 # 2) decorated attributes 898 899 elem.clear() 900 elem.attrib["{uri}key"] = "value" 901 self.serialize_check(elem, 902 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 903 904 elem.clear() 905 elem.attrib[ET.QName("{uri}key")] = "value" 906 self.serialize_check(elem, 907 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 908 909 # 3) decorated values are not converted by default, but the 910 # QName wrapper can be used for values 911 912 elem.clear() 913 elem.attrib["{uri}key"] = "{uri}value" 914 self.serialize_check(elem, 915 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 916 917 elem.clear() 918 elem.attrib["{uri}key"] = ET.QName("{uri}value") 919 self.serialize_check(elem, 920 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 921 922 elem.clear() 923 subelem = ET.Element("tag") 924 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 925 elem.append(subelem) 926 elem.append(subelem) 927 self.serialize_check(elem, 928 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 929 '<tag ns1:key="ns2:value" />' 930 '<tag ns1:key="ns2:value" />' 931 '</ns0:tag>') # 3.3 932 933 # 4) Direct QName tests 934 935 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 936 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 937 q1 = ET.QName('ns', 'tag') 938 q2 = ET.QName('ns', 'tag') 939 self.assertEqual(q1, q2) 940 q2 = ET.QName('ns', 'other-tag') 941 self.assertNotEqual(q1, q2) 942 self.assertNotEqual(q1, 'ns:tag') 943 self.assertEqual(q1, '{ns}tag') 944 945 def test_doctype_public(self): 946 # Test PUBLIC doctype. 947 948 elem = ET.XML('<!DOCTYPE html PUBLIC' 949 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 950 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 951 '<html>text</html>') 952 953 def test_xpath_tokenizer(self): 954 # Test the XPath tokenizer. 955 from xml.etree import ElementPath 956 def check(p, expected): 957 self.assertEqual([op or tag 958 for op, tag in ElementPath.xpath_tokenizer(p)], 959 expected) 960 961 # tests from the xml specification 962 check("*", ['*']) 963 check("text()", ['text', '()']) 964 check("@name", ['@', 'name']) 965 check("@*", ['@', '*']) 966 check("para[1]", ['para', '[', '1', ']']) 967 check("para[last()]", ['para', '[', 'last', '()', ']']) 968 check("*/para", ['*', '/', 'para']) 969 check("/doc/chapter[5]/section[2]", 970 ['/', 'doc', '/', 'chapter', '[', '5', ']', 971 '/', 'section', '[', '2', ']']) 972 check("chapter//para", ['chapter', '//', 'para']) 973 check("//para", ['//', 'para']) 974 check("//olist/item", ['//', 'olist', '/', 'item']) 975 check(".", ['.']) 976 check(".//para", ['.', '//', 'para']) 977 check("..", ['..']) 978 check("../@lang", ['..', '/', '@', 'lang']) 979 check("chapter[title]", ['chapter', '[', 'title', ']']) 980 check("employee[@secretary and @assistant]", ['employee', 981 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 982 983 # additional tests 984 check("{http://spam}egg", ['{http://spam}egg']) 985 check("./spam.egg", ['.', '/', 'spam.egg']) 986 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 987 988 def test_processinginstruction(self): 989 # Test ProcessingInstruction directly 990 991 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 992 b'<?test instruction?>') 993 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 994 b'<?test instruction?>') 995 996 # Issue #2746 997 998 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 999 b'<?test <testing&>?>') 1000 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), 1001 b"<?xml version='1.0' encoding='latin-1'?>\n" 1002 b"<?test <testing&>\xe3?>") 1003 1004 def test_html_empty_elems_serialization(self): 1005 # issue 15970 1006 # from http://www.w3.org/TR/html401/index/elements.html 1007 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 1008 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: 1009 for elem in [element, element.lower()]: 1010 expected = '<%s>' % elem 1011 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1012 self.assertEqual(serialized, expected) 1013 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1014 method='html') 1015 self.assertEqual(serialized, expected) 1016 1017 1018 class XMLPullParserTest(unittest.TestCase): 1019 1020 def _feed(self, parser, data, chunk_size=None): 1021 if chunk_size is None: 1022 parser.feed(data) 1023 else: 1024 for i in range(0, len(data), chunk_size): 1025 parser.feed(data[i:i+chunk_size]) 1026 1027 def assert_event_tags(self, parser, expected): 1028 events = parser.read_events() 1029 self.assertEqual([(action, elem.tag) for action, elem in events], 1030 expected) 1031 1032 def test_simple_xml(self): 1033 for chunk_size in (None, 1, 5): 1034 with self.subTest(chunk_size=chunk_size): 1035 parser = ET.XMLPullParser() 1036 self.assert_event_tags(parser, []) 1037 self._feed(parser, "<!-- comment -->\n", chunk_size) 1038 self.assert_event_tags(parser, []) 1039 self._feed(parser, 1040 "<root>\n <element key='value'>text</element", 1041 chunk_size) 1042 self.assert_event_tags(parser, []) 1043 self._feed(parser, ">\n", chunk_size) 1044 self.assert_event_tags(parser, [('end', 'element')]) 1045 self._feed(parser, "<element>text</element>tail\n", chunk_size) 1046 self._feed(parser, "<empty-element/>\n", chunk_size) 1047 self.assert_event_tags(parser, [ 1048 ('end', 'element'), 1049 ('end', 'empty-element'), 1050 ]) 1051 self._feed(parser, "</root>\n", chunk_size) 1052 self.assert_event_tags(parser, [('end', 'root')]) 1053 self.assertIsNone(parser.close()) 1054 1055 def test_feed_while_iterating(self): 1056 parser = ET.XMLPullParser() 1057 it = parser.read_events() 1058 self._feed(parser, "<root>\n <element key='value'>text</element>\n") 1059 action, elem = next(it) 1060 self.assertEqual((action, elem.tag), ('end', 'element')) 1061 self._feed(parser, "</root>\n") 1062 action, elem = next(it) 1063 self.assertEqual((action, elem.tag), ('end', 'root')) 1064 with self.assertRaises(StopIteration): 1065 next(it) 1066 1067 def test_simple_xml_with_ns(self): 1068 parser = ET.XMLPullParser() 1069 self.assert_event_tags(parser, []) 1070 self._feed(parser, "<!-- comment -->\n") 1071 self.assert_event_tags(parser, []) 1072 self._feed(parser, "<root xmlns='namespace'>\n") 1073 self.assert_event_tags(parser, []) 1074 self._feed(parser, "<element key='value'>text</element") 1075 self.assert_event_tags(parser, []) 1076 self._feed(parser, ">\n") 1077 self.assert_event_tags(parser, [('end', '{namespace}element')]) 1078 self._feed(parser, "<element>text</element>tail\n") 1079 self._feed(parser, "<empty-element/>\n") 1080 self.assert_event_tags(parser, [ 1081 ('end', '{namespace}element'), 1082 ('end', '{namespace}empty-element'), 1083 ]) 1084 self._feed(parser, "</root>\n") 1085 self.assert_event_tags(parser, [('end', '{namespace}root')]) 1086 self.assertIsNone(parser.close()) 1087 1088 def test_ns_events(self): 1089 parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) 1090 self._feed(parser, "<!-- comment -->\n") 1091 self._feed(parser, "<root xmlns='namespace'>\n") 1092 self.assertEqual( 1093 list(parser.read_events()), 1094 [('start-ns', ('', 'namespace'))]) 1095 self._feed(parser, "<element key='value'>text</element") 1096 self._feed(parser, ">\n") 1097 self._feed(parser, "<element>text</element>tail\n") 1098 self._feed(parser, "<empty-element/>\n") 1099 self._feed(parser, "</root>\n") 1100 self.assertEqual(list(parser.read_events()), [('end-ns', None)]) 1101 self.assertIsNone(parser.close()) 1102 1103 def test_events(self): 1104 parser = ET.XMLPullParser(events=()) 1105 self._feed(parser, "<root/>\n") 1106 self.assert_event_tags(parser, []) 1107 1108 parser = ET.XMLPullParser(events=('start', 'end')) 1109 self._feed(parser, "<!-- comment -->\n") 1110 self.assert_event_tags(parser, []) 1111 self._feed(parser, "<root>\n") 1112 self.assert_event_tags(parser, [('start', 'root')]) 1113 self._feed(parser, "<element key='value'>text</element") 1114 self.assert_event_tags(parser, [('start', 'element')]) 1115 self._feed(parser, ">\n") 1116 self.assert_event_tags(parser, [('end', 'element')]) 1117 self._feed(parser, 1118 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1119 self.assert_event_tags(parser, [ 1120 ('start', '{foo}element'), 1121 ('start', '{foo}empty-element'), 1122 ('end', '{foo}empty-element'), 1123 ('end', '{foo}element'), 1124 ]) 1125 self._feed(parser, "</root>") 1126 self.assertIsNone(parser.close()) 1127 self.assert_event_tags(parser, [('end', 'root')]) 1128 1129 parser = ET.XMLPullParser(events=('start',)) 1130 self._feed(parser, "<!-- comment -->\n") 1131 self.assert_event_tags(parser, []) 1132 self._feed(parser, "<root>\n") 1133 self.assert_event_tags(parser, [('start', 'root')]) 1134 self._feed(parser, "<element key='value'>text</element") 1135 self.assert_event_tags(parser, [('start', 'element')]) 1136 self._feed(parser, ">\n") 1137 self.assert_event_tags(parser, []) 1138 self._feed(parser, 1139 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1140 self.assert_event_tags(parser, [ 1141 ('start', '{foo}element'), 1142 ('start', '{foo}empty-element'), 1143 ]) 1144 self._feed(parser, "</root>") 1145 self.assertIsNone(parser.close()) 1146 1147 def test_events_sequence(self): 1148 # Test that events can be some sequence that's not just a tuple or list 1149 eventset = {'end', 'start'} 1150 parser = ET.XMLPullParser(events=eventset) 1151 self._feed(parser, "<foo>bar</foo>") 1152 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1153 1154 class DummyIter: 1155 def __init__(self): 1156 self.events = iter(['start', 'end', 'start-ns']) 1157 def __iter__(self): 1158 return self 1159 def __next__(self): 1160 return next(self.events) 1161 1162 parser = ET.XMLPullParser(events=DummyIter()) 1163 self._feed(parser, "<foo>bar</foo>") 1164 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1165 1166 1167 def test_unknown_event(self): 1168 with self.assertRaises(ValueError): 1169 ET.XMLPullParser(events=('start', 'end', 'bogus')) 1170 1171 1172 # 1173 # xinclude tests (samples from appendix C of the xinclude specification) 1174 1175 XINCLUDE = {} 1176 1177 XINCLUDE["C1.xml"] = """\ 1178 <?xml version='1.0'?> 1179 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1180 <p>120 Mz is adequate for an average home user.</p> 1181 <xi:include href="disclaimer.xml"/> 1182 </document> 1183 """ 1184 1185 XINCLUDE["disclaimer.xml"] = """\ 1186 <?xml version='1.0'?> 1187 <disclaimer> 1188 <p>The opinions represented herein represent those of the individual 1189 and should not be interpreted as official policy endorsed by this 1190 organization.</p> 1191 </disclaimer> 1192 """ 1193 1194 XINCLUDE["C2.xml"] = """\ 1195 <?xml version='1.0'?> 1196 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1197 <p>This document has been accessed 1198 <xi:include href="count.txt" parse="text"/> times.</p> 1199 </document> 1200 """ 1201 1202 XINCLUDE["count.txt"] = "324387" 1203 1204 XINCLUDE["C2b.xml"] = """\ 1205 <?xml version='1.0'?> 1206 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1207 <p>This document has been <em>accessed</em> 1208 <xi:include href="count.txt" parse="text"/> times.</p> 1209 </document> 1210 """ 1211 1212 XINCLUDE["C3.xml"] = """\ 1213 <?xml version='1.0'?> 1214 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1215 <p>The following is the source of the "data.xml" resource:</p> 1216 <example><xi:include href="data.xml" parse="text"/></example> 1217 </document> 1218 """ 1219 1220 XINCLUDE["data.xml"] = """\ 1221 <?xml version='1.0'?> 1222 <data> 1223 <item><![CDATA[Brooks & Shields]]></item> 1224 </data> 1225 """ 1226 1227 XINCLUDE["C5.xml"] = """\ 1228 <?xml version='1.0'?> 1229 <div xmlns:xi="http://www.w3.org/2001/XInclude"> 1230 <xi:include href="example.txt" parse="text"> 1231 <xi:fallback> 1232 <xi:include href="fallback-example.txt" parse="text"> 1233 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1234 </xi:include> 1235 </xi:fallback> 1236 </xi:include> 1237 </div> 1238 """ 1239 1240 XINCLUDE["default.xml"] = """\ 1241 <?xml version='1.0'?> 1242 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1243 <p>Example.</p> 1244 <xi:include href="{}"/> 1245 </document> 1246 """.format(html.escape(SIMPLE_XMLFILE, True)) 1247 1248 # 1249 # badly formatted xi:include tags 1250 1251 XINCLUDE_BAD = {} 1252 1253 XINCLUDE_BAD["B1.xml"] = """\ 1254 <?xml version='1.0'?> 1255 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1256 <p>120 Mz is adequate for an average home user.</p> 1257 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1258 </document> 1259 """ 1260 1261 XINCLUDE_BAD["B2.xml"] = """\ 1262 <?xml version='1.0'?> 1263 <div xmlns:xi="http://www.w3.org/2001/XInclude"> 1264 <xi:fallback></xi:fallback> 1265 </div> 1266 """ 1267 1268 class XIncludeTest(unittest.TestCase): 1269 1270 def xinclude_loader(self, href, parse="xml", encoding=None): 1271 try: 1272 data = XINCLUDE[href] 1273 except KeyError: 1274 raise OSError("resource not found") 1275 if parse == "xml": 1276 data = ET.XML(data) 1277 return data 1278 1279 def none_loader(self, href, parser, encoding=None): 1280 return None 1281 1282 def _my_loader(self, href, parse): 1283 # Used to avoid a test-dependency problem where the default loader 1284 # of ElementInclude uses the pyET parser for cET tests. 1285 if parse == 'xml': 1286 with open(href, 'rb') as f: 1287 return ET.parse(f).getroot() 1288 else: 1289 return None 1290 1291 def test_xinclude_default(self): 1292 from xml.etree import ElementInclude 1293 doc = self.xinclude_loader('default.xml') 1294 ElementInclude.include(doc, self._my_loader) 1295 self.assertEqual(serialize(doc), 1296 '<document>\n' 1297 ' <p>Example.</p>\n' 1298 ' <root>\n' 1299 ' <element key="value">text</element>\n' 1300 ' <element>text</element>tail\n' 1301 ' <empty-element />\n' 1302 '</root>\n' 1303 '</document>') 1304 1305 def test_xinclude(self): 1306 from xml.etree import ElementInclude 1307 1308 # Basic inclusion example (XInclude C.1) 1309 document = self.xinclude_loader("C1.xml") 1310 ElementInclude.include(document, self.xinclude_loader) 1311 self.assertEqual(serialize(document), 1312 '<document>\n' 1313 ' <p>120 Mz is adequate for an average home user.</p>\n' 1314 ' <disclaimer>\n' 1315 ' <p>The opinions represented herein represent those of the individual\n' 1316 ' and should not be interpreted as official policy endorsed by this\n' 1317 ' organization.</p>\n' 1318 '</disclaimer>\n' 1319 '</document>') # C1 1320 1321 # Textual inclusion example (XInclude C.2) 1322 document = self.xinclude_loader("C2.xml") 1323 ElementInclude.include(document, self.xinclude_loader) 1324 self.assertEqual(serialize(document), 1325 '<document>\n' 1326 ' <p>This document has been accessed\n' 1327 ' 324387 times.</p>\n' 1328 '</document>') # C2 1329 1330 # Textual inclusion after sibling element (based on modified XInclude C.2) 1331 document = self.xinclude_loader("C2b.xml") 1332 ElementInclude.include(document, self.xinclude_loader) 1333 self.assertEqual(serialize(document), 1334 '<document>\n' 1335 ' <p>This document has been <em>accessed</em>\n' 1336 ' 324387 times.</p>\n' 1337 '</document>') # C2b 1338 1339 # Textual inclusion of XML example (XInclude C.3) 1340 document = self.xinclude_loader("C3.xml") 1341 ElementInclude.include(document, self.xinclude_loader) 1342 self.assertEqual(serialize(document), 1343 '<document>\n' 1344 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1345 " <example><?xml version='1.0'?>\n" 1346 '<data>\n' 1347 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1348 '</data>\n' 1349 '</example>\n' 1350 '</document>') # C3 1351 1352 # Fallback example (XInclude C.5) 1353 # Note! Fallback support is not yet implemented 1354 document = self.xinclude_loader("C5.xml") 1355 with self.assertRaises(OSError) as cm: 1356 ElementInclude.include(document, self.xinclude_loader) 1357 self.assertEqual(str(cm.exception), 'resource not found') 1358 self.assertEqual(serialize(document), 1359 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1360 ' <ns0:include href="example.txt" parse="text">\n' 1361 ' <ns0:fallback>\n' 1362 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1363 ' <ns0:fallback><a href="mailto:bob (at] example.org">Report error</a></ns0:fallback>\n' 1364 ' </ns0:include>\n' 1365 ' </ns0:fallback>\n' 1366 ' </ns0:include>\n' 1367 '</div>') # C5 1368 1369 def test_xinclude_failures(self): 1370 from xml.etree import ElementInclude 1371 1372 # Test failure to locate included XML file. 1373 document = ET.XML(XINCLUDE["C1.xml"]) 1374 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1375 ElementInclude.include(document, loader=self.none_loader) 1376 self.assertEqual(str(cm.exception), 1377 "cannot load 'disclaimer.xml' as 'xml'") 1378 1379 # Test failure to locate included text file. 1380 document = ET.XML(XINCLUDE["C2.xml"]) 1381 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1382 ElementInclude.include(document, loader=self.none_loader) 1383 self.assertEqual(str(cm.exception), 1384 "cannot load 'count.txt' as 'text'") 1385 1386 # Test bad parse type. 1387 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1388 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1389 ElementInclude.include(document, loader=self.none_loader) 1390 self.assertEqual(str(cm.exception), 1391 "unknown parse type in xi:include tag ('BAD_TYPE')") 1392 1393 # Test xi:fallback outside xi:include. 1394 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1395 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1396 ElementInclude.include(document, loader=self.none_loader) 1397 self.assertEqual(str(cm.exception), 1398 "xi:fallback tag must be child of xi:include " 1399 "('{http://www.w3.org/2001/XInclude}fallback')") 1400 1401 # -------------------------------------------------------------------- 1402 # reported bugs 1403 1404 class BugsTest(unittest.TestCase): 1405 1406 def test_bug_xmltoolkit21(self): 1407 # marshaller gives obscure errors for non-string values 1408 1409 def check(elem): 1410 with self.assertRaises(TypeError) as cm: 1411 serialize(elem) 1412 self.assertEqual(str(cm.exception), 1413 'cannot serialize 123 (type int)') 1414 1415 elem = ET.Element(123) 1416 check(elem) # tag 1417 1418 elem = ET.Element("elem") 1419 elem.text = 123 1420 check(elem) # text 1421 1422 elem = ET.Element("elem") 1423 elem.tail = 123 1424 check(elem) # tail 1425 1426 elem = ET.Element("elem") 1427 elem.set(123, "123") 1428 check(elem) # attribute key 1429 1430 elem = ET.Element("elem") 1431 elem.set("123", 123) 1432 check(elem) # attribute value 1433 1434 def test_bug_xmltoolkit25(self): 1435 # typo in ElementTree.findtext 1436 1437 elem = ET.XML(SAMPLE_XML) 1438 tree = ET.ElementTree(elem) 1439 self.assertEqual(tree.findtext("tag"), 'text') 1440 self.assertEqual(tree.findtext("section/tag"), 'subtext') 1441 1442 def test_bug_xmltoolkit28(self): 1443 # .//tag causes exceptions 1444 1445 tree = ET.XML("<doc><table><tbody/></table></doc>") 1446 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 1447 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 1448 1449 def test_bug_xmltoolkitX1(self): 1450 # dump() doesn't flush the output buffer 1451 1452 tree = ET.XML("<doc><table><tbody/></table></doc>") 1453 with support.captured_stdout() as stdout: 1454 ET.dump(tree) 1455 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 1456 1457 def test_bug_xmltoolkit39(self): 1458 # non-ascii element and attribute names doesn't work 1459 1460 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1461 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1462 1463 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1464 b"<tag \xe4ttr='välue' />") 1465 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'}) 1466 self.assertEqual(ET.tostring(tree, "utf-8"), 1467 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1468 1469 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1470 b'<t\xe4g>text</t\xe4g>') 1471 self.assertEqual(ET.tostring(tree, "utf-8"), 1472 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 1473 1474 tree = ET.Element("t\u00e4g") 1475 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1476 1477 tree = ET.Element("tag") 1478 tree.set("\u00e4ttr", "v\u00e4lue") 1479 self.assertEqual(ET.tostring(tree, "utf-8"), 1480 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1481 1482 def test_bug_xmltoolkit54(self): 1483 # problems handling internally defined entities 1484 1485 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 1486 '<doc>&ldots;</doc>') 1487 self.assertEqual(serialize(e, encoding="us-ascii"), 1488 b'<doc>舰</doc>') 1489 self.assertEqual(serialize(e), '<doc>\u8230</doc>') 1490 1491 def test_bug_xmltoolkit55(self): 1492 # make sure we're reporting the first error, not the last 1493 1494 with self.assertRaises(ET.ParseError) as cm: 1495 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>" 1496 b'<doc>&ldots;&ndots;&rdots;</doc>') 1497 self.assertEqual(str(cm.exception), 1498 'undefined entity &ldots;: line 1, column 36') 1499 1500 def test_bug_xmltoolkit60(self): 1501 # Handle crash in stream source. 1502 1503 class ExceptionFile: 1504 def read(self, x): 1505 raise OSError 1506 1507 self.assertRaises(OSError, ET.parse, ExceptionFile()) 1508 1509 def test_bug_xmltoolkit62(self): 1510 # Don't crash when using custom entities. 1511 1512 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'} 1513 parser = ET.XMLParser() 1514 parser.entity.update(ENTITIES) 1515 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 1516 <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1517 <patent-application-publication> 1518 <subdoc-abstract> 1519 <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1520 </subdoc-abstract> 1521 </patent-application-publication>""") 1522 t = parser.close() 1523 self.assertEqual(t.find('.//paragraph').text, 1524 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 1525 1526 def test_bug_xmltoolkit63(self): 1527 # Check reference leak. 1528 def xmltoolkit63(): 1529 tree = ET.TreeBuilder() 1530 tree.start("tag", {}) 1531 tree.data("text") 1532 tree.end("tag") 1533 1534 xmltoolkit63() 1535 count = sys.getrefcount(None) 1536 for i in range(1000): 1537 xmltoolkit63() 1538 self.assertEqual(sys.getrefcount(None), count) 1539 1540 def test_bug_200708_newline(self): 1541 # Preserve newlines in attributes. 1542 1543 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1544 self.assertEqual(ET.tostring(e), 1545 b'<SomeTag text="def _f(): return 3 " />') 1546 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 1547 'def _f():\n return 3\n') 1548 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 1549 b'<SomeTag text="def _f(): return 3 " />') 1550 1551 def test_bug_200708_close(self): 1552 # Test default builder. 1553 parser = ET.XMLParser() # default 1554 parser.feed("<element>some text</element>") 1555 self.assertEqual(parser.close().tag, 'element') 1556 1557 # Test custom builder. 1558 class EchoTarget: 1559 def close(self): 1560 return ET.Element("element") # simulate root 1561 parser = ET.XMLParser(EchoTarget()) 1562 parser.feed("<element>some text</element>") 1563 self.assertEqual(parser.close().tag, 'element') 1564 1565 def test_bug_200709_default_namespace(self): 1566 e = ET.Element("{default}elem") 1567 s = ET.SubElement(e, "{default}elem") 1568 self.assertEqual(serialize(e, default_namespace="default"), # 1 1569 '<elem xmlns="default"><elem /></elem>') 1570 1571 e = ET.Element("{default}elem") 1572 s = ET.SubElement(e, "{default}elem") 1573 s = ET.SubElement(e, "{not-default}elem") 1574 self.assertEqual(serialize(e, default_namespace="default"), # 2 1575 '<elem xmlns="default" xmlns:ns1="not-default">' 1576 '<elem />' 1577 '<ns1:elem />' 1578 '</elem>') 1579 1580 e = ET.Element("{default}elem") 1581 s = ET.SubElement(e, "{default}elem") 1582 s = ET.SubElement(e, "elem") # unprefixed name 1583 with self.assertRaises(ValueError) as cm: 1584 serialize(e, default_namespace="default") # 3 1585 self.assertEqual(str(cm.exception), 1586 'cannot use non-qualified names with default_namespace option') 1587 1588 def test_bug_200709_register_namespace(self): 1589 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1590 self.assertEqual(ET.tostring(e), 1591 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 1592 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1593 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1594 self.assertEqual(ET.tostring(e), 1595 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 1596 1597 # And the Dublin Core namespace is in the default list: 1598 1599 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 1600 self.assertEqual(ET.tostring(e), 1601 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 1602 1603 def test_bug_200709_element_comment(self): 1604 # Not sure if this can be fixed, really (since the serializer needs 1605 # ET.Comment, not cET.comment). 1606 1607 a = ET.Element('a') 1608 a.append(ET.Comment('foo')) 1609 self.assertEqual(a[0].tag, ET.Comment) 1610 1611 a = ET.Element('a') 1612 a.append(ET.PI('foo')) 1613 self.assertEqual(a[0].tag, ET.PI) 1614 1615 def test_bug_200709_element_insert(self): 1616 a = ET.Element('a') 1617 b = ET.SubElement(a, 'b') 1618 c = ET.SubElement(a, 'c') 1619 d = ET.Element('d') 1620 a.insert(0, d) 1621 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 1622 a.insert(-1, d) 1623 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 1624 1625 def test_bug_200709_iter_comment(self): 1626 a = ET.Element('a') 1627 b = ET.SubElement(a, 'b') 1628 comment_b = ET.Comment("TEST-b") 1629 b.append(comment_b) 1630 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 1631 1632 # -------------------------------------------------------------------- 1633 # reported on bugs.python.org 1634 1635 def test_bug_1534630(self): 1636 bob = ET.TreeBuilder() 1637 e = bob.data("data") 1638 e = bob.start("tag", {}) 1639 e = bob.end("tag") 1640 e = bob.close() 1641 self.assertEqual(serialize(e), '<tag />') 1642 1643 def test_issue6233(self): 1644 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 1645 b'<body>t\xc3\xa3g</body>') 1646 self.assertEqual(ET.tostring(e, 'ascii'), 1647 b"<?xml version='1.0' encoding='ascii'?>\n" 1648 b'<body>tãg</body>') 1649 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1650 b'<body>t\xe3g</body>') 1651 self.assertEqual(ET.tostring(e, 'ascii'), 1652 b"<?xml version='1.0' encoding='ascii'?>\n" 1653 b'<body>tãg</body>') 1654 1655 def test_issue3151(self): 1656 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1657 self.assertEqual(e.tag, '{${stuff}}localname') 1658 t = ET.ElementTree(e) 1659 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />') 1660 1661 def test_issue6565(self): 1662 elem = ET.XML("<body><tag/></body>") 1663 self.assertEqual(summarize_list(elem), ['tag']) 1664 newelem = ET.XML(SAMPLE_XML) 1665 elem[:] = newelem[:] 1666 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 1667 1668 def test_issue10777(self): 1669 # Registering a namespace twice caused a "dictionary changed size during 1670 # iteration" bug. 1671 1672 ET.register_namespace('test10777', 'http://myuri/') 1673 ET.register_namespace('test10777', 'http://myuri/') 1674 1675 def test_lost_text(self): 1676 # Issue #25902: Borrowed text can disappear 1677 class Text: 1678 def __bool__(self): 1679 e.text = 'changed' 1680 return True 1681 1682 e = ET.Element('tag') 1683 e.text = Text() 1684 i = e.itertext() 1685 t = next(i) 1686 self.assertIsInstance(t, Text) 1687 self.assertIsInstance(e.text, str) 1688 self.assertEqual(e.text, 'changed') 1689 1690 def test_lost_tail(self): 1691 # Issue #25902: Borrowed tail can disappear 1692 class Text: 1693 def __bool__(self): 1694 e[0].tail = 'changed' 1695 return True 1696 1697 e = ET.Element('root') 1698 e.append(ET.Element('tag')) 1699 e[0].tail = Text() 1700 i = e.itertext() 1701 t = next(i) 1702 self.assertIsInstance(t, Text) 1703 self.assertIsInstance(e[0].tail, str) 1704 self.assertEqual(e[0].tail, 'changed') 1705 1706 def test_lost_elem(self): 1707 # Issue #25902: Borrowed element can disappear 1708 class Tag: 1709 def __eq__(self, other): 1710 e[0] = ET.Element('changed') 1711 next(i) 1712 return True 1713 1714 e = ET.Element('root') 1715 e.append(ET.Element(Tag())) 1716 e.append(ET.Element('tag')) 1717 i = e.iter('tag') 1718 try: 1719 t = next(i) 1720 except ValueError: 1721 self.skipTest('generators are not reentrant') 1722 self.assertIsInstance(t.tag, Tag) 1723 self.assertIsInstance(e[0].tag, str) 1724 self.assertEqual(e[0].tag, 'changed') 1725 1726 1727 # -------------------------------------------------------------------- 1728 1729 1730 class BasicElementTest(ElementTestCase, unittest.TestCase): 1731 def test_augmentation_type_errors(self): 1732 e = ET.Element('joe') 1733 self.assertRaises(TypeError, e.append, 'b') 1734 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo']) 1735 self.assertRaises(TypeError, e.insert, 0, 'foo') 1736 1737 def test_cyclic_gc(self): 1738 class Dummy: 1739 pass 1740 1741 # Test the shortest cycle: d->element->d 1742 d = Dummy() 1743 d.dummyref = ET.Element('joe', attr=d) 1744 wref = weakref.ref(d) 1745 del d 1746 gc_collect() 1747 self.assertIsNone(wref()) 1748 1749 # A longer cycle: d->e->e2->d 1750 e = ET.Element('joe') 1751 d = Dummy() 1752 d.dummyref = e 1753 wref = weakref.ref(d) 1754 e2 = ET.SubElement(e, 'foo', attr=d) 1755 del d, e, e2 1756 gc_collect() 1757 self.assertIsNone(wref()) 1758 1759 # A cycle between Element objects as children of one another 1760 # e1->e2->e3->e1 1761 e1 = ET.Element('e1') 1762 e2 = ET.Element('e2') 1763 e3 = ET.Element('e3') 1764 e1.append(e2) 1765 e2.append(e2) 1766 e3.append(e1) 1767 wref = weakref.ref(e1) 1768 del e1, e2, e3 1769 gc_collect() 1770 self.assertIsNone(wref()) 1771 1772 def test_weakref(self): 1773 flag = False 1774 def wref_cb(w): 1775 nonlocal flag 1776 flag = True 1777 e = ET.Element('e') 1778 wref = weakref.ref(e, wref_cb) 1779 self.assertEqual(wref().tag, 'e') 1780 del e 1781 self.assertEqual(flag, True) 1782 self.assertEqual(wref(), None) 1783 1784 def test_get_keyword_args(self): 1785 e1 = ET.Element('foo' , x=1, y=2, z=3) 1786 self.assertEqual(e1.get('x', default=7), 1) 1787 self.assertEqual(e1.get('w', default=7), 7) 1788 1789 def test_pickle(self): 1790 # issue #16076: the C implementation wasn't pickleable. 1791 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 1792 for dumper, loader in product(self.modules, repeat=2): 1793 e = dumper.Element('foo', bar=42) 1794 e.text = "text goes here" 1795 e.tail = "opposite of head" 1796 dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) 1797 e.append(dumper.Element('child')) 1798 e.findall('.//grandchild')[0].set('attr', 'other value') 1799 1800 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', 1801 dumper, loader, proto) 1802 1803 self.assertEqual(e2.tag, 'foo') 1804 self.assertEqual(e2.attrib['bar'], 42) 1805 self.assertEqual(len(e2), 2) 1806 self.assertEqualElements(e, e2) 1807 1808 def test_pickle_issue18997(self): 1809 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 1810 for dumper, loader in product(self.modules, repeat=2): 1811 XMLTEXT = """<?xml version="1.0"?> 1812 <group><dogs>4</dogs> 1813 </group>""" 1814 e1 = dumper.fromstring(XMLTEXT) 1815 if hasattr(e1, '__getstate__'): 1816 self.assertEqual(e1.__getstate__()['tag'], 'group') 1817 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', 1818 dumper, loader, proto) 1819 self.assertEqual(e2.tag, 'group') 1820 self.assertEqual(e2[0].tag, 'dogs') 1821 1822 1823 class BadElementTest(ElementTestCase, unittest.TestCase): 1824 def test_extend_mutable_list(self): 1825 class X: 1826 @property 1827 def __class__(self): 1828 L[:] = [ET.Element('baz')] 1829 return ET.Element 1830 L = [X()] 1831 e = ET.Element('foo') 1832 try: 1833 e.extend(L) 1834 except TypeError: 1835 pass 1836 1837 class Y(X, ET.Element): 1838 pass 1839 L = [Y('x')] 1840 e = ET.Element('foo') 1841 e.extend(L) 1842 1843 def test_extend_mutable_list2(self): 1844 class X: 1845 @property 1846 def __class__(self): 1847 del L[:] 1848 return ET.Element 1849 L = [X(), ET.Element('baz')] 1850 e = ET.Element('foo') 1851 try: 1852 e.extend(L) 1853 except TypeError: 1854 pass 1855 1856 class Y(X, ET.Element): 1857 pass 1858 L = [Y('bar'), ET.Element('baz')] 1859 e = ET.Element('foo') 1860 e.extend(L) 1861 1862 def test_remove_with_mutating(self): 1863 class X(ET.Element): 1864 def __eq__(self, o): 1865 del e[:] 1866 return False 1867 e = ET.Element('foo') 1868 e.extend([X('bar')]) 1869 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 1870 1871 e = ET.Element('foo') 1872 e.extend([ET.Element('bar')]) 1873 self.assertRaises(ValueError, e.remove, X('baz')) 1874 1875 def test_recursive_repr(self): 1876 # Issue #25455 1877 e = ET.Element('foo') 1878 with swap_attr(e, 'tag', e): 1879 with self.assertRaises(RuntimeError): 1880 repr(e) # Should not crash 1881 1882 class MutatingElementPath(str): 1883 def __new__(cls, elem, *args): 1884 self = str.__new__(cls, *args) 1885 self.elem = elem 1886 return self 1887 def __eq__(self, o): 1888 del self.elem[:] 1889 return True 1890 MutatingElementPath.__hash__ = str.__hash__ 1891 1892 class BadElementPath(str): 1893 def __eq__(self, o): 1894 raise 1/0 1895 BadElementPath.__hash__ = str.__hash__ 1896 1897 class BadElementPathTest(ElementTestCase, unittest.TestCase): 1898 def setUp(self): 1899 super().setUp() 1900 from xml.etree import ElementPath 1901 self.path_cache = ElementPath._cache 1902 ElementPath._cache = {} 1903 1904 def tearDown(self): 1905 from xml.etree import ElementPath 1906 ElementPath._cache = self.path_cache 1907 super().tearDown() 1908 1909 def test_find_with_mutating(self): 1910 e = ET.Element('foo') 1911 e.extend([ET.Element('bar')]) 1912 e.find(MutatingElementPath(e, 'x')) 1913 1914 def test_find_with_error(self): 1915 e = ET.Element('foo') 1916 e.extend([ET.Element('bar')]) 1917 try: 1918 e.find(BadElementPath('x')) 1919 except ZeroDivisionError: 1920 pass 1921 1922 def test_findtext_with_mutating(self): 1923 e = ET.Element('foo') 1924 e.extend([ET.Element('bar')]) 1925 e.findtext(MutatingElementPath(e, 'x')) 1926 1927 def test_findtext_with_error(self): 1928 e = ET.Element('foo') 1929 e.extend([ET.Element('bar')]) 1930 try: 1931 e.findtext(BadElementPath('x')) 1932 except ZeroDivisionError: 1933 pass 1934 1935 def test_findall_with_mutating(self): 1936 e = ET.Element('foo') 1937 e.extend([ET.Element('bar')]) 1938 e.findall(MutatingElementPath(e, 'x')) 1939 1940 def test_findall_with_error(self): 1941 e = ET.Element('foo') 1942 e.extend([ET.Element('bar')]) 1943 try: 1944 e.findall(BadElementPath('x')) 1945 except ZeroDivisionError: 1946 pass 1947 1948 1949 class ElementTreeTypeTest(unittest.TestCase): 1950 def test_istype(self): 1951 self.assertIsInstance(ET.ParseError, type) 1952 self.assertIsInstance(ET.QName, type) 1953 self.assertIsInstance(ET.ElementTree, type) 1954 self.assertIsInstance(ET.Element, type) 1955 self.assertIsInstance(ET.TreeBuilder, type) 1956 self.assertIsInstance(ET.XMLParser, type) 1957 1958 def test_Element_subclass_trivial(self): 1959 class MyElement(ET.Element): 1960 pass 1961 1962 mye = MyElement('foo') 1963 self.assertIsInstance(mye, ET.Element) 1964 self.assertIsInstance(mye, MyElement) 1965 self.assertEqual(mye.tag, 'foo') 1966 1967 # test that attribute assignment works (issue 14849) 1968 mye.text = "joe" 1969 self.assertEqual(mye.text, "joe") 1970 1971 def test_Element_subclass_constructor(self): 1972 class MyElement(ET.Element): 1973 def __init__(self, tag, attrib={}, **extra): 1974 super(MyElement, self).__init__(tag + '__', attrib, **extra) 1975 1976 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 1977 self.assertEqual(mye.tag, 'foo__') 1978 self.assertEqual(sorted(mye.items()), 1979 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 1980 1981 def test_Element_subclass_new_method(self): 1982 class MyElement(ET.Element): 1983 def newmethod(self): 1984 return self.tag 1985 1986 mye = MyElement('joe') 1987 self.assertEqual(mye.newmethod(), 'joe') 1988 1989 1990 class ElementFindTest(unittest.TestCase): 1991 def test_find_simple(self): 1992 e = ET.XML(SAMPLE_XML) 1993 self.assertEqual(e.find('tag').tag, 'tag') 1994 self.assertEqual(e.find('section/tag').tag, 'tag') 1995 self.assertEqual(e.find('./tag').tag, 'tag') 1996 1997 e[2] = ET.XML(SAMPLE_SECTION) 1998 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 1999 2000 self.assertEqual(e.findtext('./tag'), 'text') 2001 self.assertEqual(e.findtext('section/tag'), 'subtext') 2002 2003 # section/nexttag is found but has no text 2004 self.assertEqual(e.findtext('section/nexttag'), '') 2005 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 2006 2007 # tog doesn't exist and 'default' kicks in 2008 self.assertIsNone(e.findtext('tog')) 2009 self.assertEqual(e.findtext('tog', 'default'), 'default') 2010 2011 # Issue #16922 2012 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 2013 2014 def test_find_xpath(self): 2015 LINEAR_XML = ''' 2016 <body> 2017 <tag class='a'/> 2018 <tag class='b'/> 2019 <tag class='c'/> 2020 <tag class='d'/> 2021 </body>''' 2022 e = ET.XML(LINEAR_XML) 2023 2024 # Test for numeric indexing and last() 2025 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 2026 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 2027 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 2028 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 2029 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 2030 2031 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]') 2032 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]') 2033 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') 2034 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') 2035 2036 def test_findall(self): 2037 e = ET.XML(SAMPLE_XML) 2038 e[2] = ET.XML(SAMPLE_SECTION) 2039 self.assertEqual(summarize_list(e.findall('.')), ['body']) 2040 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 2041 self.assertEqual(summarize_list(e.findall('tog')), []) 2042 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 2043 self.assertEqual(summarize_list(e.findall('*')), 2044 ['tag', 'tag', 'section']) 2045 self.assertEqual(summarize_list(e.findall('.//tag')), 2046 ['tag'] * 4) 2047 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 2048 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 2049 self.assertEqual(summarize_list(e.findall('section/*')), 2050 ['tag', 'nexttag', 'nextsection']) 2051 self.assertEqual(summarize_list(e.findall('section//*')), 2052 ['tag', 'nexttag', 'nextsection', 'tag']) 2053 self.assertEqual(summarize_list(e.findall('section/.//*')), 2054 ['tag', 'nexttag', 'nextsection', 'tag']) 2055 self.assertEqual(summarize_list(e.findall('*/*')), 2056 ['tag', 'nexttag', 'nextsection']) 2057 self.assertEqual(summarize_list(e.findall('*//*')), 2058 ['tag', 'nexttag', 'nextsection', 'tag']) 2059 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 2060 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 2061 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 2062 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 2063 2064 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 2065 ['tag'] * 3) 2066 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 2067 ['tag']) 2068 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 2069 ['tag'] * 2) 2070 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 2071 ['tag']) 2072 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 2073 ['section']) 2074 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 2075 self.assertEqual(summarize_list(e.findall('../tag')), []) 2076 self.assertEqual(summarize_list(e.findall('section/../tag')), 2077 ['tag'] * 2) 2078 self.assertEqual(e.findall('section//'), e.findall('section//*')) 2079 2080 def test_test_find_with_ns(self): 2081 e = ET.XML(SAMPLE_XML_NS) 2082 self.assertEqual(summarize_list(e.findall('tag')), []) 2083 self.assertEqual( 2084 summarize_list(e.findall("{http://effbot.org/ns}tag")), 2085 ['{http://effbot.org/ns}tag'] * 2) 2086 self.assertEqual( 2087 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 2088 ['{http://effbot.org/ns}tag'] * 3) 2089 2090 def test_findall_different_nsmaps(self): 2091 root = ET.XML(''' 2092 <a xmlns:x="X" xmlns:y="Y"> 2093 <x:b><c/></x:b> 2094 <b/> 2095 <c><x:b/><b/></c><y:b/> 2096 </a>''') 2097 nsmap = {'xx': 'X'} 2098 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2099 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2100 nsmap = {'xx': 'Y'} 2101 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) 2102 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2103 2104 def test_bad_find(self): 2105 e = ET.XML(SAMPLE_XML) 2106 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): 2107 e.findall('/tag') 2108 2109 def test_find_through_ElementTree(self): 2110 e = ET.XML(SAMPLE_XML) 2111 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 2112 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 2113 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')), 2114 ['tag'] * 2) 2115 # this produces a warning 2116 self.assertEqual(summarize_list(ET.ElementTree(e).findall('//tag')), 2117 ['tag'] * 3) 2118 2119 2120 class ElementIterTest(unittest.TestCase): 2121 def _ilist(self, elem, tag=None): 2122 return summarize_list(elem.iter(tag)) 2123 2124 def test_basic(self): 2125 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 2126 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 2127 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 2128 self.assertEqual(next(doc.iter()).tag, 'html') 2129 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 2130 self.assertEqual(''.join(doc.find('body').itertext()), 2131 'this is a paragraph.') 2132 self.assertEqual(next(doc.itertext()), 'this is a ') 2133 2134 # iterparse should return an iterator 2135 sourcefile = serialize(doc, to_string=False) 2136 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 2137 2138 # With an explitit parser too (issue #9708) 2139 sourcefile = serialize(doc, to_string=False) 2140 parser = ET.XMLParser(target=ET.TreeBuilder()) 2141 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 2142 'end') 2143 2144 tree = ET.ElementTree(None) 2145 self.assertRaises(AttributeError, tree.iter) 2146 2147 # Issue #16913 2148 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 2149 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 2150 2151 def test_corners(self): 2152 # single root, no subelements 2153 a = ET.Element('a') 2154 self.assertEqual(self._ilist(a), ['a']) 2155 2156 # one child 2157 b = ET.SubElement(a, 'b') 2158 self.assertEqual(self._ilist(a), ['a', 'b']) 2159 2160 # one child and one grandchild 2161 c = ET.SubElement(b, 'c') 2162 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 2163 2164 # two children, only first with grandchild 2165 d = ET.SubElement(a, 'd') 2166 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 2167 2168 # replace first child by second 2169 a[0] = a[1] 2170 del a[1] 2171 self.assertEqual(self._ilist(a), ['a', 'd']) 2172 2173 def test_iter_by_tag(self): 2174 doc = ET.XML(''' 2175 <document> 2176 <house> 2177 <room>bedroom1</room> 2178 <room>bedroom2</room> 2179 </house> 2180 <shed>nothing here 2181 </shed> 2182 <house> 2183 <room>bedroom8</room> 2184 </house> 2185 </document>''') 2186 2187 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 2188 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 2189 2190 # test that iter also accepts 'tag' as a keyword arg 2191 self.assertEqual( 2192 summarize_list(doc.iter(tag='room')), 2193 ['room'] * 3) 2194 2195 # make sure both tag=None and tag='*' return all tags 2196 all_tags = ['document', 'house', 'room', 'room', 2197 'shed', 'house', 'room'] 2198 self.assertEqual(summarize_list(doc.iter()), all_tags) 2199 self.assertEqual(self._ilist(doc), all_tags) 2200 self.assertEqual(self._ilist(doc, '*'), all_tags) 2201 2202 def test_getiterator(self): 2203 doc = ET.XML(''' 2204 <document> 2205 <house> 2206 <room>bedroom1</room> 2207 <room>bedroom2</room> 2208 </house> 2209 <shed>nothing here 2210 </shed> 2211 <house> 2212 <room>bedroom8</room> 2213 </house> 2214 </document>''') 2215 2216 self.assertEqual(summarize_list(doc.getiterator('room')), 2217 ['room'] * 3) 2218 self.assertEqual(summarize_list(doc.getiterator('house')), 2219 ['house'] * 2) 2220 2221 # test that getiterator also accepts 'tag' as a keyword arg 2222 self.assertEqual( 2223 summarize_list(doc.getiterator(tag='room')), 2224 ['room'] * 3) 2225 2226 # make sure both tag=None and tag='*' return all tags 2227 all_tags = ['document', 'house', 'room', 'room', 2228 'shed', 'house', 'room'] 2229 self.assertEqual(summarize_list(doc.getiterator()), all_tags) 2230 self.assertEqual(summarize_list(doc.getiterator(None)), all_tags) 2231 self.assertEqual(summarize_list(doc.getiterator('*')), all_tags) 2232 2233 def test_copy(self): 2234 a = ET.Element('a') 2235 it = a.iter() 2236 with self.assertRaises(TypeError): 2237 copy.copy(it) 2238 2239 def test_pickle(self): 2240 a = ET.Element('a') 2241 it = a.iter() 2242 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 2243 with self.assertRaises((TypeError, pickle.PicklingError)): 2244 pickle.dumps(it, proto) 2245 2246 2247 class TreeBuilderTest(unittest.TestCase): 2248 sample1 = ('<!DOCTYPE html PUBLIC' 2249 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2250 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2251 '<html>text<div>subtext</div>tail</html>') 2252 2253 sample2 = '''<toplevel>sometext</toplevel>''' 2254 2255 def _check_sample1_element(self, e): 2256 self.assertEqual(e.tag, 'html') 2257 self.assertEqual(e.text, 'text') 2258 self.assertEqual(e.tail, None) 2259 self.assertEqual(e.attrib, {}) 2260 children = list(e) 2261 self.assertEqual(len(children), 1) 2262 child = children[0] 2263 self.assertEqual(child.tag, 'div') 2264 self.assertEqual(child.text, 'subtext') 2265 self.assertEqual(child.tail, 'tail') 2266 self.assertEqual(child.attrib, {}) 2267 2268 def test_dummy_builder(self): 2269 class BaseDummyBuilder: 2270 def close(self): 2271 return 42 2272 2273 class DummyBuilder(BaseDummyBuilder): 2274 data = start = end = lambda *a: None 2275 2276 parser = ET.XMLParser(target=DummyBuilder()) 2277 parser.feed(self.sample1) 2278 self.assertEqual(parser.close(), 42) 2279 2280 parser = ET.XMLParser(target=BaseDummyBuilder()) 2281 parser.feed(self.sample1) 2282 self.assertEqual(parser.close(), 42) 2283 2284 parser = ET.XMLParser(target=object()) 2285 parser.feed(self.sample1) 2286 self.assertIsNone(parser.close()) 2287 2288 def test_treebuilder_elementfactory_none(self): 2289 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 2290 parser.feed(self.sample1) 2291 e = parser.close() 2292 self._check_sample1_element(e) 2293 2294 def test_subclass(self): 2295 class MyTreeBuilder(ET.TreeBuilder): 2296 def foobar(self, x): 2297 return x * 2 2298 2299 tb = MyTreeBuilder() 2300 self.assertEqual(tb.foobar(10), 20) 2301 2302 parser = ET.XMLParser(target=tb) 2303 parser.feed(self.sample1) 2304 2305 e = parser.close() 2306 self._check_sample1_element(e) 2307 2308 def test_element_factory(self): 2309 lst = [] 2310 def myfactory(tag, attrib): 2311 nonlocal lst 2312 lst.append(tag) 2313 return ET.Element(tag, attrib) 2314 2315 tb = ET.TreeBuilder(element_factory=myfactory) 2316 parser = ET.XMLParser(target=tb) 2317 parser.feed(self.sample2) 2318 parser.close() 2319 2320 self.assertEqual(lst, ['toplevel']) 2321 2322 def _check_element_factory_class(self, cls): 2323 tb = ET.TreeBuilder(element_factory=cls) 2324 2325 parser = ET.XMLParser(target=tb) 2326 parser.feed(self.sample1) 2327 e = parser.close() 2328 self.assertIsInstance(e, cls) 2329 self._check_sample1_element(e) 2330 2331 def test_element_factory_subclass(self): 2332 class MyElement(ET.Element): 2333 pass 2334 self._check_element_factory_class(MyElement) 2335 2336 def test_element_factory_pure_python_subclass(self): 2337 # Mimick SimpleTAL's behaviour (issue #16089): both versions of 2338 # TreeBuilder should be able to cope with a subclass of the 2339 # pure Python Element class. 2340 base = ET._Element_Py 2341 # Not from a C extension 2342 self.assertEqual(base.__module__, 'xml.etree.ElementTree') 2343 # Force some multiple inheritance with a C class to make things 2344 # more interesting. 2345 class MyElement(base, ValueError): 2346 pass 2347 self._check_element_factory_class(MyElement) 2348 2349 def test_doctype(self): 2350 class DoctypeParser: 2351 _doctype = None 2352 2353 def doctype(self, name, pubid, system): 2354 self._doctype = (name, pubid, system) 2355 2356 def close(self): 2357 return self._doctype 2358 2359 parser = ET.XMLParser(target=DoctypeParser()) 2360 parser.feed(self.sample1) 2361 2362 self.assertEqual(parser.close(), 2363 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2364 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2365 2366 2367 class XMLParserTest(unittest.TestCase): 2368 sample1 = b'<file><line>22</line></file>' 2369 sample2 = (b'<!DOCTYPE html PUBLIC' 2370 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2371 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2372 b'<html>text</html>') 2373 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n' 2374 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>') 2375 2376 def _check_sample_element(self, e): 2377 self.assertEqual(e.tag, 'file') 2378 self.assertEqual(e[0].tag, 'line') 2379 self.assertEqual(e[0].text, '22') 2380 2381 def test_constructor_args(self): 2382 # Positional args. The first (html) is not supported, but should be 2383 # nevertheless correctly accepted. 2384 parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8') 2385 parser.feed(self.sample1) 2386 self._check_sample_element(parser.close()) 2387 2388 # Now as keyword args. 2389 parser2 = ET.XMLParser(encoding='utf-8', 2390 html=[{}], 2391 target=ET.TreeBuilder()) 2392 parser2.feed(self.sample1) 2393 self._check_sample_element(parser2.close()) 2394 2395 def test_subclass(self): 2396 class MyParser(ET.XMLParser): 2397 pass 2398 parser = MyParser() 2399 parser.feed(self.sample1) 2400 self._check_sample_element(parser.close()) 2401 2402 def test_doctype_warning(self): 2403 parser = ET.XMLParser() 2404 with self.assertWarns(DeprecationWarning): 2405 parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2406 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') 2407 parser.feed('<html/>') 2408 parser.close() 2409 2410 with warnings.catch_warnings(): 2411 warnings.simplefilter('error', DeprecationWarning) 2412 parser = ET.XMLParser() 2413 parser.feed(self.sample2) 2414 parser.close() 2415 2416 def test_subclass_doctype(self): 2417 _doctype = None 2418 class MyParserWithDoctype(ET.XMLParser): 2419 def doctype(self, name, pubid, system): 2420 nonlocal _doctype 2421 _doctype = (name, pubid, system) 2422 2423 parser = MyParserWithDoctype() 2424 with self.assertWarns(DeprecationWarning): 2425 parser.feed(self.sample2) 2426 parser.close() 2427 self.assertEqual(_doctype, 2428 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2429 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2430 2431 _doctype = _doctype2 = None 2432 with warnings.catch_warnings(): 2433 warnings.simplefilter('error', DeprecationWarning) 2434 class DoctypeParser: 2435 def doctype(self, name, pubid, system): 2436 nonlocal _doctype2 2437 _doctype2 = (name, pubid, system) 2438 2439 parser = MyParserWithDoctype(target=DoctypeParser()) 2440 parser.feed(self.sample2) 2441 parser.close() 2442 self.assertIsNone(_doctype) 2443 self.assertEqual(_doctype2, 2444 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2445 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2446 2447 def test_inherited_doctype(self): 2448 '''Ensure that ordinary usage is not deprecated (Issue 19176)''' 2449 with warnings.catch_warnings(): 2450 warnings.simplefilter('error', DeprecationWarning) 2451 class MyParserWithoutDoctype(ET.XMLParser): 2452 pass 2453 parser = MyParserWithoutDoctype() 2454 parser.feed(self.sample2) 2455 parser.close() 2456 2457 def test_parse_string(self): 2458 parser = ET.XMLParser(target=ET.TreeBuilder()) 2459 parser.feed(self.sample3) 2460 e = parser.close() 2461 self.assertEqual(e.tag, 'money') 2462 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') 2463 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') 2464 2465 2466 class NamespaceParseTest(unittest.TestCase): 2467 def test_find_with_namespace(self): 2468 nsmap = {'h': 'hello', 'f': 'foo'} 2469 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 2470 2471 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 2472 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 2473 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 2474 2475 2476 class ElementSlicingTest(unittest.TestCase): 2477 def _elem_tags(self, elemlist): 2478 return [e.tag for e in elemlist] 2479 2480 def _subelem_tags(self, elem): 2481 return self._elem_tags(list(elem)) 2482 2483 def _make_elem_with_children(self, numchildren): 2484 """Create an Element with a tag 'a', with the given amount of children 2485 named 'a0', 'a1' ... and so on. 2486 2487 """ 2488 e = ET.Element('a') 2489 for i in range(numchildren): 2490 ET.SubElement(e, 'a%s' % i) 2491 return e 2492 2493 def test_getslice_single_index(self): 2494 e = self._make_elem_with_children(10) 2495 2496 self.assertEqual(e[1].tag, 'a1') 2497 self.assertEqual(e[-2].tag, 'a8') 2498 2499 self.assertRaises(IndexError, lambda: e[12]) 2500 self.assertRaises(IndexError, lambda: e[-12]) 2501 2502 def test_getslice_range(self): 2503 e = self._make_elem_with_children(6) 2504 2505 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 2506 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 2507 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 2508 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 2509 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 2510 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 2511 2512 def test_getslice_steps(self): 2513 e = self._make_elem_with_children(10) 2514 2515 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 2516 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 2517 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 2518 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 2519 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 2520 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 2521 2522 def test_getslice_negative_steps(self): 2523 e = self._make_elem_with_children(4) 2524 2525 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 2526 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 2527 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 2528 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 2529 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 2530 2531 def test_delslice(self): 2532 e = self._make_elem_with_children(4) 2533 del e[0:2] 2534 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 2535 2536 e = self._make_elem_with_children(4) 2537 del e[0:] 2538 self.assertEqual(self._subelem_tags(e), []) 2539 2540 e = self._make_elem_with_children(4) 2541 del e[::-1] 2542 self.assertEqual(self._subelem_tags(e), []) 2543 2544 e = self._make_elem_with_children(4) 2545 del e[::-2] 2546 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 2547 2548 e = self._make_elem_with_children(4) 2549 del e[1::2] 2550 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 2551 2552 e = self._make_elem_with_children(2) 2553 del e[::2] 2554 self.assertEqual(self._subelem_tags(e), ['a1']) 2555 2556 def test_setslice_single_index(self): 2557 e = self._make_elem_with_children(4) 2558 e[1] = ET.Element('b') 2559 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2560 2561 e[-2] = ET.Element('c') 2562 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 2563 2564 with self.assertRaises(IndexError): 2565 e[5] = ET.Element('d') 2566 with self.assertRaises(IndexError): 2567 e[-5] = ET.Element('d') 2568 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 2569 2570 def test_setslice_range(self): 2571 e = self._make_elem_with_children(4) 2572 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 2573 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 2574 2575 e = self._make_elem_with_children(4) 2576 e[1:3] = [ET.Element('b')] 2577 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 2578 2579 e = self._make_elem_with_children(4) 2580 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 2581 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 2582 2583 def test_setslice_steps(self): 2584 e = self._make_elem_with_children(6) 2585 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 2586 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 2587 2588 e = self._make_elem_with_children(6) 2589 with self.assertRaises(ValueError): 2590 e[1:5:2] = [ET.Element('b')] 2591 with self.assertRaises(ValueError): 2592 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 2593 with self.assertRaises(ValueError): 2594 e[1:5:2] = [] 2595 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 2596 2597 e = self._make_elem_with_children(4) 2598 e[1::sys.maxsize] = [ET.Element('b')] 2599 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2600 e[1::sys.maxsize<<64] = [ET.Element('c')] 2601 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 2602 2603 def test_setslice_negative_steps(self): 2604 e = self._make_elem_with_children(4) 2605 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 2606 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 2607 2608 e = self._make_elem_with_children(4) 2609 with self.assertRaises(ValueError): 2610 e[2:0:-1] = [ET.Element('b')] 2611 with self.assertRaises(ValueError): 2612 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 2613 with self.assertRaises(ValueError): 2614 e[2:0:-1] = [] 2615 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 2616 2617 e = self._make_elem_with_children(4) 2618 e[1::-sys.maxsize] = [ET.Element('b')] 2619 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2620 e[1::-sys.maxsize-1] = [ET.Element('c')] 2621 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 2622 e[1::-sys.maxsize<<64] = [ET.Element('d')] 2623 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 2624 2625 2626 class IOTest(unittest.TestCase): 2627 def tearDown(self): 2628 support.unlink(TESTFN) 2629 2630 def test_encoding(self): 2631 # Test encoding issues. 2632 elem = ET.Element("tag") 2633 elem.text = "abc" 2634 self.assertEqual(serialize(elem), '<tag>abc</tag>') 2635 for enc in ("utf-8", "us-ascii"): 2636 with self.subTest(enc): 2637 self.assertEqual(serialize(elem, encoding=enc), 2638 b'<tag>abc</tag>') 2639 self.assertEqual(serialize(elem, encoding=enc.upper()), 2640 b'<tag>abc</tag>') 2641 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2642 with self.subTest(enc): 2643 self.assertEqual(serialize(elem, encoding=enc), 2644 ("<?xml version='1.0' encoding='%s'?>\n" 2645 "<tag>abc</tag>" % enc).encode(enc)) 2646 upper = enc.upper() 2647 self.assertEqual(serialize(elem, encoding=upper), 2648 ("<?xml version='1.0' encoding='%s'?>\n" 2649 "<tag>abc</tag>" % upper).encode(enc)) 2650 2651 elem = ET.Element("tag") 2652 elem.text = "<&\"\'>" 2653 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 2654 self.assertEqual(serialize(elem, encoding="utf-8"), 2655 b'<tag><&"\'></tag>') 2656 self.assertEqual(serialize(elem, encoding="us-ascii"), 2657 b'<tag><&"\'></tag>') 2658 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2659 self.assertEqual(serialize(elem, encoding=enc), 2660 ("<?xml version='1.0' encoding='%s'?>\n" 2661 "<tag><&\"'></tag>" % enc).encode(enc)) 2662 2663 elem = ET.Element("tag") 2664 elem.attrib["key"] = "<&\"\'>" 2665 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 2666 self.assertEqual(serialize(elem, encoding="utf-8"), 2667 b'<tag key="<&"\'>" />') 2668 self.assertEqual(serialize(elem, encoding="us-ascii"), 2669 b'<tag key="<&"\'>" />') 2670 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2671 self.assertEqual(serialize(elem, encoding=enc), 2672 ("<?xml version='1.0' encoding='%s'?>\n" 2673 "<tag key=\"<&"'>\" />" % enc).encode(enc)) 2674 2675 elem = ET.Element("tag") 2676 elem.text = '\xe5\xf6\xf6<>' 2677 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>') 2678 self.assertEqual(serialize(elem, encoding="utf-8"), 2679 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 2680 self.assertEqual(serialize(elem, encoding="us-ascii"), 2681 b'<tag>åöö<></tag>') 2682 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2683 self.assertEqual(serialize(elem, encoding=enc), 2684 ("<?xml version='1.0' encoding='%s'?>\n" 2685 "<tag><></tag>" % enc).encode(enc)) 2686 2687 elem = ET.Element("tag") 2688 elem.attrib["key"] = '\xe5\xf6\xf6<>' 2689 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />') 2690 self.assertEqual(serialize(elem, encoding="utf-8"), 2691 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 2692 self.assertEqual(serialize(elem, encoding="us-ascii"), 2693 b'<tag key="åöö<>" />') 2694 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"): 2695 self.assertEqual(serialize(elem, encoding=enc), 2696 ("<?xml version='1.0' encoding='%s'?>\n" 2697 "<tag key=\"<>\" />" % enc).encode(enc)) 2698 2699 def test_write_to_filename(self): 2700 tree = ET.ElementTree(ET.XML('''<site />''')) 2701 tree.write(TESTFN) 2702 with open(TESTFN, 'rb') as f: 2703 self.assertEqual(f.read(), b'''<site />''') 2704 2705 def test_write_to_text_file(self): 2706 tree = ET.ElementTree(ET.XML('''<site />''')) 2707 with open(TESTFN, 'w', encoding='utf-8') as f: 2708 tree.write(f, encoding='unicode') 2709 self.assertFalse(f.closed) 2710 with open(TESTFN, 'rb') as f: 2711 self.assertEqual(f.read(), b'''<site />''') 2712 2713 def test_write_to_binary_file(self): 2714 tree = ET.ElementTree(ET.XML('''<site />''')) 2715 with open(TESTFN, 'wb') as f: 2716 tree.write(f) 2717 self.assertFalse(f.closed) 2718 with open(TESTFN, 'rb') as f: 2719 self.assertEqual(f.read(), b'''<site />''') 2720 2721 def test_write_to_binary_file_with_bom(self): 2722 tree = ET.ElementTree(ET.XML('''<site />''')) 2723 # test BOM writing to buffered file 2724 with open(TESTFN, 'wb') as f: 2725 tree.write(f, encoding='utf-16') 2726 self.assertFalse(f.closed) 2727 with open(TESTFN, 'rb') as f: 2728 self.assertEqual(f.read(), 2729 '''<?xml version='1.0' encoding='utf-16'?>\n''' 2730 '''<site />'''.encode("utf-16")) 2731 # test BOM writing to non-buffered file 2732 with open(TESTFN, 'wb', buffering=0) as f: 2733 tree.write(f, encoding='utf-16') 2734 self.assertFalse(f.closed) 2735 with open(TESTFN, 'rb') as f: 2736 self.assertEqual(f.read(), 2737 '''<?xml version='1.0' encoding='utf-16'?>\n''' 2738 '''<site />'''.encode("utf-16")) 2739 2740 def test_read_from_stringio(self): 2741 tree = ET.ElementTree() 2742 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 2743 tree.parse(stream) 2744 self.assertEqual(tree.getroot().tag, 'site') 2745 2746 def test_write_to_stringio(self): 2747 tree = ET.ElementTree(ET.XML('''<site />''')) 2748 stream = io.StringIO() 2749 tree.write(stream, encoding='unicode') 2750 self.assertEqual(stream.getvalue(), '''<site />''') 2751 2752 def test_read_from_bytesio(self): 2753 tree = ET.ElementTree() 2754 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 2755 tree.parse(raw) 2756 self.assertEqual(tree.getroot().tag, 'site') 2757 2758 def test_write_to_bytesio(self): 2759 tree = ET.ElementTree(ET.XML('''<site />''')) 2760 raw = io.BytesIO() 2761 tree.write(raw) 2762 self.assertEqual(raw.getvalue(), b'''<site />''') 2763 2764 class dummy: 2765 pass 2766 2767 def test_read_from_user_text_reader(self): 2768 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 2769 reader = self.dummy() 2770 reader.read = stream.read 2771 tree = ET.ElementTree() 2772 tree.parse(reader) 2773 self.assertEqual(tree.getroot().tag, 'site') 2774 2775 def test_write_to_user_text_writer(self): 2776 tree = ET.ElementTree(ET.XML('''<site />''')) 2777 stream = io.StringIO() 2778 writer = self.dummy() 2779 writer.write = stream.write 2780 tree.write(writer, encoding='unicode') 2781 self.assertEqual(stream.getvalue(), '''<site />''') 2782 2783 def test_read_from_user_binary_reader(self): 2784 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 2785 reader = self.dummy() 2786 reader.read = raw.read 2787 tree = ET.ElementTree() 2788 tree.parse(reader) 2789 self.assertEqual(tree.getroot().tag, 'site') 2790 tree = ET.ElementTree() 2791 2792 def test_write_to_user_binary_writer(self): 2793 tree = ET.ElementTree(ET.XML('''<site />''')) 2794 raw = io.BytesIO() 2795 writer = self.dummy() 2796 writer.write = raw.write 2797 tree.write(writer) 2798 self.assertEqual(raw.getvalue(), b'''<site />''') 2799 2800 def test_write_to_user_binary_writer_with_bom(self): 2801 tree = ET.ElementTree(ET.XML('''<site />''')) 2802 raw = io.BytesIO() 2803 writer = self.dummy() 2804 writer.write = raw.write 2805 writer.seekable = lambda: True 2806 writer.tell = raw.tell 2807 tree.write(writer, encoding="utf-16") 2808 self.assertEqual(raw.getvalue(), 2809 '''<?xml version='1.0' encoding='utf-16'?>\n''' 2810 '''<site />'''.encode("utf-16")) 2811 2812 def test_tostringlist_invariant(self): 2813 root = ET.fromstring('<tag>foo</tag>') 2814 self.assertEqual( 2815 ET.tostring(root, 'unicode'), 2816 ''.join(ET.tostringlist(root, 'unicode'))) 2817 self.assertEqual( 2818 ET.tostring(root, 'utf-16'), 2819 b''.join(ET.tostringlist(root, 'utf-16'))) 2820 2821 def test_short_empty_elements(self): 2822 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>') 2823 self.assertEqual( 2824 ET.tostring(root, 'unicode'), 2825 '<tag>a<x />b<y />c</tag>') 2826 self.assertEqual( 2827 ET.tostring(root, 'unicode', short_empty_elements=True), 2828 '<tag>a<x />b<y />c</tag>') 2829 self.assertEqual( 2830 ET.tostring(root, 'unicode', short_empty_elements=False), 2831 '<tag>a<x></x>b<y></y>c</tag>') 2832 2833 2834 class ParseErrorTest(unittest.TestCase): 2835 def test_subclass(self): 2836 self.assertIsInstance(ET.ParseError(), SyntaxError) 2837 2838 def _get_error(self, s): 2839 try: 2840 ET.fromstring(s) 2841 except ET.ParseError as e: 2842 return e 2843 2844 def test_error_position(self): 2845 self.assertEqual(self._get_error('foo').position, (1, 0)) 2846 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 2847 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 2848 2849 def test_error_code(self): 2850 import xml.parsers.expat.errors as ERRORS 2851 self.assertEqual(self._get_error('foo').code, 2852 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) 2853 2854 2855 class KeywordArgsTest(unittest.TestCase): 2856 # Test various issues with keyword arguments passed to ET.Element 2857 # constructor and methods 2858 def test_issue14818(self): 2859 x = ET.XML("<a>foo</a>") 2860 self.assertEqual(x.find('a', None), 2861 x.find(path='a', namespaces=None)) 2862 self.assertEqual(x.findtext('a', None, None), 2863 x.findtext(path='a', default=None, namespaces=None)) 2864 self.assertEqual(x.findall('a', None), 2865 x.findall(path='a', namespaces=None)) 2866 self.assertEqual(list(x.iterfind('a', None)), 2867 list(x.iterfind(path='a', namespaces=None))) 2868 2869 self.assertEqual(ET.Element('a').attrib, {}) 2870 elements = [ 2871 ET.Element('a', dict(href="#", id="foo")), 2872 ET.Element('a', attrib=dict(href="#", id="foo")), 2873 ET.Element('a', dict(href="#"), id="foo"), 2874 ET.Element('a', href="#", id="foo"), 2875 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 2876 ] 2877 for e in elements: 2878 self.assertEqual(e.tag, 'a') 2879 self.assertEqual(e.attrib, dict(href="#", id="foo")) 2880 2881 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 2882 self.assertEqual(e2.attrib['key1'], 'value1') 2883 2884 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 2885 ET.Element('a', "I'm not a dict") 2886 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 2887 ET.Element('a', attrib="I'm not a dict") 2888 2889 # -------------------------------------------------------------------- 2890 2891 class NoAcceleratorTest(unittest.TestCase): 2892 def setUp(self): 2893 if not pyET: 2894 raise unittest.SkipTest('only for the Python version') 2895 2896 # Test that the C accelerator was not imported for pyET 2897 def test_correct_import_pyET(self): 2898 # The type of methods defined in Python code is types.FunctionType, 2899 # while the type of methods defined inside _elementtree is 2900 # <class 'wrapper_descriptor'> 2901 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 2902 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 2903 2904 # -------------------------------------------------------------------- 2905 2906 2907 class CleanContext(object): 2908 """Provide default namespace mapping and path cache.""" 2909 checkwarnings = None 2910 2911 def __init__(self, quiet=False): 2912 if sys.flags.optimize >= 2: 2913 # under -OO, doctests cannot be run and therefore not all warnings 2914 # will be emitted 2915 quiet = True 2916 deprecations = ( 2917 # Search behaviour is broken if search path starts with "/". 2918 ("This search is broken in 1.3 and earlier, and will be fixed " 2919 "in a future version. If you rely on the current behaviour, " 2920 "change it to '.+'", FutureWarning), 2921 # Element.getchildren() and Element.getiterator() are deprecated. 2922 ("This method will be removed in future versions. " 2923 "Use .+ instead.", DeprecationWarning), 2924 ("This method will be removed in future versions. " 2925 "Use .+ instead.", PendingDeprecationWarning)) 2926 self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet) 2927 2928 def __enter__(self): 2929 from xml.etree import ElementPath 2930 self._nsmap = ET.register_namespace._namespace_map 2931 # Copy the default namespace mapping 2932 self._nsmap_copy = self._nsmap.copy() 2933 # Copy the path cache (should be empty) 2934 self._path_cache = ElementPath._cache 2935 ElementPath._cache = self._path_cache.copy() 2936 self.checkwarnings.__enter__() 2937 2938 def __exit__(self, *args): 2939 from xml.etree import ElementPath 2940 # Restore mapping and path cache 2941 self._nsmap.clear() 2942 self._nsmap.update(self._nsmap_copy) 2943 ElementPath._cache = self._path_cache 2944 self.checkwarnings.__exit__(*args) 2945 2946 2947 def test_main(module=None): 2948 # When invoked without a module, runs the Python ET tests by loading pyET. 2949 # Otherwise, uses the given module as the ET. 2950 global pyET 2951 pyET = import_fresh_module('xml.etree.ElementTree', 2952 blocked=['_elementtree']) 2953 if module is None: 2954 module = pyET 2955 2956 global ET 2957 ET = module 2958 2959 test_classes = [ 2960 ModuleTest, 2961 ElementSlicingTest, 2962 BasicElementTest, 2963 BadElementTest, 2964 BadElementPathTest, 2965 ElementTreeTest, 2966 IOTest, 2967 ParseErrorTest, 2968 XIncludeTest, 2969 ElementTreeTypeTest, 2970 ElementFindTest, 2971 ElementIterTest, 2972 TreeBuilderTest, 2973 XMLParserTest, 2974 XMLPullParserTest, 2975 BugsTest, 2976 ] 2977 2978 # These tests will only run for the pure-Python version that doesn't import 2979 # _elementtree. We can't use skipUnless here, because pyET is filled in only 2980 # after the module is loaded. 2981 if pyET is not ET: 2982 test_classes.extend([ 2983 NoAcceleratorTest, 2984 ]) 2985 2986 try: 2987 # XXX the C module should give the same warnings as the Python module 2988 with CleanContext(quiet=(pyET is not ET)): 2989 support.run_unittest(*test_classes) 2990 finally: 2991 # don't interfere with subsequent tests 2992 ET = pyET = None 2993 2994 2995 if __name__ == '__main__': 2996 test_main() 2997