1 # IMPORTANT: the same tests are run from "test_xml_etree_c" in order 2 # to ensure consistency between the C implementation and the Python 3 # implementation. 4 # 5 # For this purpose, the module-level "ET" symbol is temporarily 6 # monkey-patched when running the "test_xml_etree_c" test suite. 7 8 import copy 9 import functools 10 import html 11 import io 12 import operator 13 import pickle 14 import sys 15 import types 16 import unittest 17 import warnings 18 import weakref 19 20 from itertools import product 21 from test import support 22 from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr 23 24 # pyET is the pure-Python implementation. 25 # 26 # ET is pyET in test_xml_etree and is the C accelerated version in 27 # test_xml_etree_c. 28 pyET = None 29 ET = None 30 31 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 32 try: 33 SIMPLE_XMLFILE.encode("utf-8") 34 except UnicodeEncodeError: 35 raise unittest.SkipTest("filename is not encodable to utf8") 36 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 37 UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") 38 39 SAMPLE_XML = """\ 40 <body> 41 <tag class='a'>text</tag> 42 <tag class='b' /> 43 <section> 44 <tag class='b' id='inner'>subtext</tag> 45 </section> 46 </body> 47 """ 48 49 SAMPLE_SECTION = """\ 50 <section> 51 <tag class='b' id='inner'>subtext</tag> 52 <nexttag /> 53 <nextsection> 54 <tag /> 55 </nextsection> 56 </section> 57 """ 58 59 SAMPLE_XML_NS = """ 60 <body xmlns="http://effbot.org/ns"> 61 <tag>text</tag> 62 <tag /> 63 <section> 64 <tag>subtext</tag> 65 </section> 66 </body> 67 """ 68 69 SAMPLE_XML_NS_ELEMS = """ 70 <root> 71 <h:table xmlns:h="hello"> 72 <h:tr> 73 <h:td>Apples</h:td> 74 <h:td>Bananas</h:td> 75 </h:tr> 76 </h:table> 77 78 <f:table xmlns:f="foo"> 79 <f:name>African Coffee Table</f:name> 80 <f:width>80</f:width> 81 <f:length>120</f:length> 82 </f:table> 83 </root> 84 """ 85 86 ENTITY_XML = """\ 87 <!DOCTYPE points [ 88 <!ENTITY % user-entities SYSTEM 'user-entities.xml'> 89 %user-entities; 90 ]> 91 <document>&entity;</document> 92 """ 93 94 EXTERNAL_ENTITY_XML = """\ 95 <!DOCTYPE points [ 96 <!ENTITY entity SYSTEM "file:///non-existing-file.xml"> 97 ]> 98 <document>&entity;</document> 99 """ 100 101 def checkwarnings(*filters, quiet=False): 102 def decorator(test): 103 def newtest(*args, **kwargs): 104 with support.check_warnings(*filters, quiet=quiet): 105 test(*args, **kwargs) 106 functools.update_wrapper(newtest, test) 107 return newtest 108 return decorator 109 110 111 class ModuleTest(unittest.TestCase): 112 def test_sanity(self): 113 # Import sanity. 114 115 from xml.etree import ElementTree 116 from xml.etree import ElementInclude 117 from xml.etree import ElementPath 118 119 def test_all(self): 120 names = ("xml.etree.ElementTree", "_elementtree") 121 support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",)) 122 123 124 def serialize(elem, to_string=True, encoding='unicode', **options): 125 if encoding != 'unicode': 126 file = io.BytesIO() 127 else: 128 file = io.StringIO() 129 tree = ET.ElementTree(elem) 130 tree.write(file, encoding=encoding, **options) 131 if to_string: 132 return file.getvalue() 133 else: 134 file.seek(0) 135 return file 136 137 def summarize_list(seq): 138 return [elem.tag for elem in seq] 139 140 141 class ElementTestCase: 142 @classmethod 143 def setUpClass(cls): 144 cls.modules = {pyET, ET} 145 146 def pickleRoundTrip(self, obj, name, dumper, loader, proto): 147 save_m = sys.modules[name] 148 try: 149 sys.modules[name] = dumper 150 temp = pickle.dumps(obj, proto) 151 sys.modules[name] = loader 152 result = pickle.loads(temp) 153 except pickle.PicklingError as pe: 154 # pyET must be second, because pyET may be (equal to) ET. 155 human = dict([(ET, "cET"), (pyET, "pyET")]) 156 raise support.TestFailed("Failed to round-trip %r from %r to %r" 157 % (obj, 158 human.get(dumper, dumper), 159 human.get(loader, loader))) from pe 160 finally: 161 sys.modules[name] = save_m 162 return result 163 164 def assertEqualElements(self, alice, bob): 165 self.assertIsInstance(alice, (ET.Element, pyET.Element)) 166 self.assertIsInstance(bob, (ET.Element, pyET.Element)) 167 self.assertEqual(len(list(alice)), len(list(bob))) 168 for x, y in zip(alice, bob): 169 self.assertEqualElements(x, y) 170 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') 171 self.assertEqual(properties(alice), properties(bob)) 172 173 # -------------------------------------------------------------------- 174 # element tree tests 175 176 class ElementTreeTest(unittest.TestCase): 177 178 def serialize_check(self, elem, expected): 179 self.assertEqual(serialize(elem), expected) 180 181 def test_interface(self): 182 # Test element tree interface. 183 184 def check_string(string): 185 len(string) 186 for char in string: 187 self.assertEqual(len(char), 1, 188 msg="expected one-character string, got %r" % char) 189 new_string = string + "" 190 new_string = string + " " 191 string[:0] 192 193 def check_mapping(mapping): 194 len(mapping) 195 keys = mapping.keys() 196 items = mapping.items() 197 for key in keys: 198 item = mapping[key] 199 mapping["key"] = "value" 200 self.assertEqual(mapping["key"], "value", 201 msg="expected value string, got %r" % mapping["key"]) 202 203 def check_element(element): 204 self.assertTrue(ET.iselement(element), msg="not an element") 205 direlem = dir(element) 206 for attr in 'tag', 'attrib', 'text', 'tail': 207 self.assertTrue(hasattr(element, attr), 208 msg='no %s member' % attr) 209 self.assertIn(attr, direlem, 210 msg='no %s visible by dir' % attr) 211 212 check_string(element.tag) 213 check_mapping(element.attrib) 214 if element.text is not None: 215 check_string(element.text) 216 if element.tail is not None: 217 check_string(element.tail) 218 for elem in element: 219 check_element(elem) 220 221 element = ET.Element("tag") 222 check_element(element) 223 tree = ET.ElementTree(element) 224 check_element(tree.getroot()) 225 element = ET.Element("t\xe4g", key="value") 226 tree = ET.ElementTree(element) 227 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$") 228 element = ET.Element("tag", key="value") 229 230 # Make sure all standard element methods exist. 231 232 def check_method(method): 233 self.assertTrue(hasattr(method, '__call__'), 234 msg="%s not callable" % method) 235 236 check_method(element.append) 237 check_method(element.extend) 238 check_method(element.insert) 239 check_method(element.remove) 240 check_method(element.getchildren) 241 check_method(element.find) 242 check_method(element.iterfind) 243 check_method(element.findall) 244 check_method(element.findtext) 245 check_method(element.clear) 246 check_method(element.get) 247 check_method(element.set) 248 check_method(element.keys) 249 check_method(element.items) 250 check_method(element.iter) 251 check_method(element.itertext) 252 check_method(element.getiterator) 253 254 # These methods return an iterable. See bug 6472. 255 256 def check_iter(it): 257 check_method(it.__next__) 258 259 check_iter(element.iterfind("tag")) 260 check_iter(element.iterfind("*")) 261 check_iter(tree.iterfind("tag")) 262 check_iter(tree.iterfind("*")) 263 264 # These aliases are provided: 265 266 self.assertEqual(ET.XML, ET.fromstring) 267 self.assertEqual(ET.PI, ET.ProcessingInstruction) 268 269 def test_set_attribute(self): 270 element = ET.Element('tag') 271 272 self.assertEqual(element.tag, 'tag') 273 element.tag = 'Tag' 274 self.assertEqual(element.tag, 'Tag') 275 element.tag = 'TAG' 276 self.assertEqual(element.tag, 'TAG') 277 278 self.assertIsNone(element.text) 279 element.text = 'Text' 280 self.assertEqual(element.text, 'Text') 281 element.text = 'TEXT' 282 self.assertEqual(element.text, 'TEXT') 283 284 self.assertIsNone(element.tail) 285 element.tail = 'Tail' 286 self.assertEqual(element.tail, 'Tail') 287 element.tail = 'TAIL' 288 self.assertEqual(element.tail, 'TAIL') 289 290 self.assertEqual(element.attrib, {}) 291 element.attrib = {'a': 'b', 'c': 'd'} 292 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 293 element.attrib = {'A': 'B', 'C': 'D'} 294 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 295 296 def test_simpleops(self): 297 # Basic method sanity checks. 298 299 elem = ET.XML("<body><tag/></body>") 300 self.serialize_check(elem, '<body><tag /></body>') 301 e = ET.Element("tag2") 302 elem.append(e) 303 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 304 elem.remove(e) 305 self.serialize_check(elem, '<body><tag /></body>') 306 elem.insert(0, e) 307 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 308 elem.remove(e) 309 elem.extend([e]) 310 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 311 elem.remove(e) 312 313 element = ET.Element("tag", key="value") 314 self.serialize_check(element, '<tag key="value" />') # 1 315 subelement = ET.Element("subtag") 316 element.append(subelement) 317 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 318 element.insert(0, subelement) 319 self.serialize_check(element, 320 '<tag key="value"><subtag /><subtag /></tag>') # 3 321 element.remove(subelement) 322 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 323 element.remove(subelement) 324 self.serialize_check(element, '<tag key="value" />') # 5 325 with self.assertRaises(ValueError) as cm: 326 element.remove(subelement) 327 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 328 self.serialize_check(element, '<tag key="value" />') # 6 329 element[0:0] = [subelement, subelement, subelement] 330 self.serialize_check(element[1], '<subtag />') 331 self.assertEqual(element[1:9], [element[1], element[2]]) 332 self.assertEqual(element[:9:2], [element[0], element[2]]) 333 del element[1:2] 334 self.serialize_check(element, 335 '<tag key="value"><subtag /><subtag /></tag>') 336 337 def test_cdata(self): 338 # Test CDATA handling (etc). 339 340 self.serialize_check(ET.XML("<tag>hello</tag>"), 341 '<tag>hello</tag>') 342 self.serialize_check(ET.XML("<tag>hello</tag>"), 343 '<tag>hello</tag>') 344 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 345 '<tag>hello</tag>') 346 347 def test_file_init(self): 348 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) 349 tree = ET.ElementTree(file=stringfile) 350 self.assertEqual(tree.find("tag").tag, 'tag') 351 self.assertEqual(tree.find("section/tag").tag, 'tag') 352 353 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 354 self.assertEqual(tree.find("element").tag, 'element') 355 self.assertEqual(tree.find("element/../empty-element").tag, 356 'empty-element') 357 358 def test_path_cache(self): 359 # Check that the path cache behaves sanely. 360 361 from xml.etree import ElementPath 362 363 elem = ET.XML(SAMPLE_XML) 364 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 365 cache_len_10 = len(ElementPath._cache) 366 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 367 self.assertEqual(len(ElementPath._cache), cache_len_10) 368 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 369 self.assertGreater(len(ElementPath._cache), cache_len_10) 370 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 371 self.assertLess(len(ElementPath._cache), 500) 372 373 def test_copy(self): 374 # Test copy handling (etc). 375 376 import copy 377 e1 = ET.XML("<tag>hello<foo/></tag>") 378 e2 = copy.copy(e1) 379 e3 = copy.deepcopy(e1) 380 e1.find("foo").tag = "bar" 381 self.serialize_check(e1, '<tag>hello<bar /></tag>') 382 self.serialize_check(e2, '<tag>hello<bar /></tag>') 383 self.serialize_check(e3, '<tag>hello<foo /></tag>') 384 385 def test_attrib(self): 386 # Test attribute handling. 387 388 elem = ET.Element("tag") 389 elem.get("key") # 1.1 390 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 391 392 elem.set("key", "value") 393 self.assertEqual(elem.get("key"), 'value') # 1.3 394 395 elem = ET.Element("tag", key="value") 396 self.assertEqual(elem.get("key"), 'value') # 2.1 397 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 398 399 attrib = {"key": "value"} 400 elem = ET.Element("tag", attrib) 401 attrib.clear() # check for aliasing issues 402 self.assertEqual(elem.get("key"), 'value') # 3.1 403 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 404 405 attrib = {"key": "value"} 406 elem = ET.Element("tag", **attrib) 407 attrib.clear() # check for aliasing issues 408 self.assertEqual(elem.get("key"), 'value') # 4.1 409 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 410 411 elem = ET.Element("tag", {"key": "other"}, key="value") 412 self.assertEqual(elem.get("key"), 'value') # 5.1 413 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 414 415 elem = ET.Element('test') 416 elem.text = "aa" 417 elem.set('testa', 'testval') 418 elem.set('testb', 'test2') 419 self.assertEqual(ET.tostring(elem), 420 b'<test testa="testval" testb="test2">aa</test>') 421 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 422 self.assertEqual(sorted(elem.items()), 423 [('testa', 'testval'), ('testb', 'test2')]) 424 self.assertEqual(elem.attrib['testb'], 'test2') 425 elem.attrib['testb'] = 'test1' 426 elem.attrib['testc'] = 'test2' 427 self.assertEqual(ET.tostring(elem), 428 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 429 430 elem = ET.Element('test') 431 elem.set('a', '\r') 432 elem.set('b', '\r\n') 433 elem.set('c', '\t\n\r ') 434 elem.set('d', '\n\n') 435 self.assertEqual(ET.tostring(elem), 436 b'<test a=" " b=" " c="	 " d=" " />') 437 438 def test_makeelement(self): 439 # Test makeelement handling. 440 441 elem = ET.Element("tag") 442 attrib = {"key": "value"} 443 subelem = elem.makeelement("subtag", attrib) 444 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 445 elem.append(subelem) 446 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 447 448 elem.clear() 449 self.serialize_check(elem, '<tag />') 450 elem.append(subelem) 451 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 452 elem.extend([subelem, subelem]) 453 self.serialize_check(elem, 454 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 455 elem[:] = [subelem] 456 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 457 elem[:] = tuple([subelem]) 458 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 459 460 def test_parsefile(self): 461 # Test parsing from file. 462 463 tree = ET.parse(SIMPLE_XMLFILE) 464 stream = io.StringIO() 465 tree.write(stream, encoding='unicode') 466 self.assertEqual(stream.getvalue(), 467 '<root>\n' 468 ' <element key="value">text</element>\n' 469 ' <element>text</element>tail\n' 470 ' <empty-element />\n' 471 '</root>') 472 tree = ET.parse(SIMPLE_NS_XMLFILE) 473 stream = io.StringIO() 474 tree.write(stream, encoding='unicode') 475 self.assertEqual(stream.getvalue(), 476 '<ns0:root xmlns:ns0="namespace">\n' 477 ' <ns0:element key="value">text</ns0:element>\n' 478 ' <ns0:element>text</ns0:element>tail\n' 479 ' <ns0:empty-element />\n' 480 '</ns0:root>') 481 482 with open(SIMPLE_XMLFILE) as f: 483 data = f.read() 484 485 parser = ET.XMLParser() 486 self.assertRegex(parser.version, r'^Expat ') 487 parser.feed(data) 488 self.serialize_check(parser.close(), 489 '<root>\n' 490 ' <element key="value">text</element>\n' 491 ' <element>text</element>tail\n' 492 ' <empty-element />\n' 493 '</root>') 494 495 target = ET.TreeBuilder() 496 parser = ET.XMLParser(target=target) 497 parser.feed(data) 498 self.serialize_check(parser.close(), 499 '<root>\n' 500 ' <element key="value">text</element>\n' 501 ' <element>text</element>tail\n' 502 ' <empty-element />\n' 503 '</root>') 504 505 def test_parseliteral(self): 506 element = ET.XML("<html><body>text</body></html>") 507 self.assertEqual(ET.tostring(element, encoding='unicode'), 508 '<html><body>text</body></html>') 509 element = ET.fromstring("<html><body>text</body></html>") 510 self.assertEqual(ET.tostring(element, encoding='unicode'), 511 '<html><body>text</body></html>') 512 sequence = ["<html><body>", "text</bo", "dy></html>"] 513 element = ET.fromstringlist(sequence) 514 self.assertEqual(ET.tostring(element), 515 b'<html><body>text</body></html>') 516 self.assertEqual(b"".join(ET.tostringlist(element)), 517 b'<html><body>text</body></html>') 518 self.assertEqual(ET.tostring(element, "ascii"), 519 b"<?xml version='1.0' encoding='ascii'?>\n" 520 b"<html><body>text</body></html>") 521 _, ids = ET.XMLID("<html><body>text</body></html>") 522 self.assertEqual(len(ids), 0) 523 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 524 self.assertEqual(len(ids), 1) 525 self.assertEqual(ids["body"].tag, 'body') 526 527 def test_iterparse(self): 528 # Test iterparse interface. 529 530 iterparse = ET.iterparse 531 532 context = iterparse(SIMPLE_XMLFILE) 533 action, elem = next(context) 534 self.assertEqual((action, elem.tag), ('end', 'element')) 535 self.assertEqual([(action, elem.tag) for action, elem in context], [ 536 ('end', 'element'), 537 ('end', 'empty-element'), 538 ('end', 'root'), 539 ]) 540 self.assertEqual(context.root.tag, 'root') 541 542 context = iterparse(SIMPLE_NS_XMLFILE) 543 self.assertEqual([(action, elem.tag) for action, elem in context], [ 544 ('end', '{namespace}element'), 545 ('end', '{namespace}element'), 546 ('end', '{namespace}empty-element'), 547 ('end', '{namespace}root'), 548 ]) 549 550 events = () 551 context = iterparse(SIMPLE_XMLFILE, events) 552 self.assertEqual([(action, elem.tag) for action, elem in context], []) 553 554 events = () 555 context = iterparse(SIMPLE_XMLFILE, events=events) 556 self.assertEqual([(action, elem.tag) for action, elem in context], []) 557 558 events = ("start", "end") 559 context = iterparse(SIMPLE_XMLFILE, events) 560 self.assertEqual([(action, elem.tag) for action, elem in context], [ 561 ('start', 'root'), 562 ('start', 'element'), 563 ('end', 'element'), 564 ('start', 'element'), 565 ('end', 'element'), 566 ('start', 'empty-element'), 567 ('end', 'empty-element'), 568 ('end', 'root'), 569 ]) 570 571 events = ("start", "end", "start-ns", "end-ns") 572 context = iterparse(SIMPLE_NS_XMLFILE, events) 573 self.assertEqual([(action, elem.tag) if action in ("start", "end") 574 else (action, elem) 575 for action, elem in context], [ 576 ('start-ns', ('', 'namespace')), 577 ('start', '{namespace}root'), 578 ('start', '{namespace}element'), 579 ('end', '{namespace}element'), 580 ('start', '{namespace}element'), 581 ('end', '{namespace}element'), 582 ('start', '{namespace}empty-element'), 583 ('end', '{namespace}empty-element'), 584 ('end', '{namespace}root'), 585 ('end-ns', None), 586 ]) 587 588 events = ('start-ns', 'end-ns') 589 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) 590 res = [action for action, elem in context] 591 self.assertEqual(res, ['start-ns', 'end-ns']) 592 593 events = ("start", "end", "bogus") 594 with open(SIMPLE_XMLFILE, "rb") as f: 595 with self.assertRaises(ValueError) as cm: 596 iterparse(f, events) 597 self.assertFalse(f.closed) 598 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 599 600 with support.check_no_resource_warning(self): 601 with self.assertRaises(ValueError) as cm: 602 iterparse(SIMPLE_XMLFILE, events) 603 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 604 del cm 605 606 source = io.BytesIO( 607 b"<?xml version='1.0' encoding='iso-8859-1'?>\n" 608 b"<body xmlns='http://éffbot.org/ns'\n" 609 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 610 events = ("start-ns",) 611 context = iterparse(source, events) 612 self.assertEqual([(action, elem) for action, elem in context], [ 613 ('start-ns', ('', 'http://\xe9ffbot.org/ns')), 614 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), 615 ]) 616 617 source = io.StringIO("<document />junk") 618 it = iterparse(source) 619 action, elem = next(it) 620 self.assertEqual((action, elem.tag), ('end', 'document')) 621 with self.assertRaises(ET.ParseError) as cm: 622 next(it) 623 self.assertEqual(str(cm.exception), 624 'junk after document element: line 1, column 12') 625 626 self.addCleanup(support.unlink, TESTFN) 627 with open(TESTFN, "wb") as f: 628 f.write(b"<document />junk") 629 it = iterparse(TESTFN) 630 action, elem = next(it) 631 self.assertEqual((action, elem.tag), ('end', 'document')) 632 with support.check_no_resource_warning(self): 633 with self.assertRaises(ET.ParseError) as cm: 634 next(it) 635 self.assertEqual(str(cm.exception), 636 'junk after document element: line 1, column 12') 637 del cm, it 638 639 def test_writefile(self): 640 elem = ET.Element("tag") 641 elem.text = "text" 642 self.serialize_check(elem, '<tag>text</tag>') 643 ET.SubElement(elem, "subtag").text = "subtext" 644 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 645 646 # Test tag suppression 647 elem.tag = None 648 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 649 elem.insert(0, ET.Comment("comment")) 650 self.serialize_check(elem, 651 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 652 653 elem[0] = ET.PI("key", "value") 654 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 655 656 def test_custom_builder(self): 657 # Test parser w. custom builder. 658 659 with open(SIMPLE_XMLFILE) as f: 660 data = f.read() 661 class Builder(list): 662 def start(self, tag, attrib): 663 self.append(("start", tag)) 664 def end(self, tag): 665 self.append(("end", tag)) 666 def data(self, text): 667 pass 668 builder = Builder() 669 parser = ET.XMLParser(target=builder) 670 parser.feed(data) 671 self.assertEqual(builder, [ 672 ('start', 'root'), 673 ('start', 'element'), 674 ('end', 'element'), 675 ('start', 'element'), 676 ('end', 'element'), 677 ('start', 'empty-element'), 678 ('end', 'empty-element'), 679 ('end', 'root'), 680 ]) 681 682 with open(SIMPLE_NS_XMLFILE) as f: 683 data = f.read() 684 class Builder(list): 685 def start(self, tag, attrib): 686 self.append(("start", tag)) 687 def end(self, tag): 688 self.append(("end", tag)) 689 def data(self, text): 690 pass 691 def pi(self, target, data): 692 self.append(("pi", target, data)) 693 def comment(self, data): 694 self.append(("comment", data)) 695 builder = Builder() 696 parser = ET.XMLParser(target=builder) 697 parser.feed(data) 698 self.assertEqual(builder, [ 699 ('pi', 'pi', 'data'), 700 ('comment', ' comment '), 701 ('start', '{namespace}root'), 702 ('start', '{namespace}element'), 703 ('end', '{namespace}element'), 704 ('start', '{namespace}element'), 705 ('end', '{namespace}element'), 706 ('start', '{namespace}empty-element'), 707 ('end', '{namespace}empty-element'), 708 ('end', '{namespace}root'), 709 ]) 710 711 712 # Element.getchildren() and ElementTree.getiterator() are deprecated. 713 @checkwarnings(("This method will be removed in future versions. " 714 "Use .+ instead.", 715 (DeprecationWarning, PendingDeprecationWarning))) 716 def test_getchildren(self): 717 # Test Element.getchildren() 718 719 with open(SIMPLE_XMLFILE, "rb") as f: 720 tree = ET.parse(f) 721 self.assertEqual([summarize_list(elem.getchildren()) 722 for elem in tree.getroot().iter()], [ 723 ['element', 'element', 'empty-element'], 724 [], 725 [], 726 [], 727 ]) 728 self.assertEqual([summarize_list(elem.getchildren()) 729 for elem in tree.getiterator()], [ 730 ['element', 'element', 'empty-element'], 731 [], 732 [], 733 [], 734 ]) 735 736 elem = ET.XML(SAMPLE_XML) 737 self.assertEqual(len(elem.getchildren()), 3) 738 self.assertEqual(len(elem[2].getchildren()), 1) 739 self.assertEqual(elem[:], elem.getchildren()) 740 child1 = elem[0] 741 child2 = elem[2] 742 del elem[1:2] 743 self.assertEqual(len(elem.getchildren()), 2) 744 self.assertEqual(child1, elem[0]) 745 self.assertEqual(child2, elem[1]) 746 elem[0:2] = [child2, child1] 747 self.assertEqual(child2, elem[0]) 748 self.assertEqual(child1, elem[1]) 749 self.assertNotEqual(child1, elem[0]) 750 elem.clear() 751 self.assertEqual(elem.getchildren(), []) 752 753 def test_writestring(self): 754 elem = ET.XML("<html><body>text</body></html>") 755 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 756 elem = ET.fromstring("<html><body>text</body></html>") 757 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 758 759 def test_encoding(self): 760 def check(encoding, body=''): 761 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 762 (encoding, body)) 763 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 764 self.assertEqual(ET.XML(xml).text, body) 765 check("ascii", 'a') 766 check("us-ascii", 'a') 767 check("iso-8859-1", '\xbd') 768 check("iso-8859-15", '\u20ac') 769 check("cp437", '\u221a') 770 check("mac-roman", '\u02da') 771 772 def xml(encoding): 773 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 774 def bxml(encoding): 775 return xml(encoding).encode(encoding) 776 supported_encodings = [ 777 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 778 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 779 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 780 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 781 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 782 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 783 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', 784 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 785 'cp1256', 'cp1257', 'cp1258', 786 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 787 'mac-roman', 'mac-turkish', 788 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 789 'iso2022-jp-3', 'iso2022-jp-ext', 790 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 791 'hz', 'ptcp154', 792 ] 793 for encoding in supported_encodings: 794 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 795 796 unsupported_ascii_compatible_encodings = [ 797 'big5', 'big5hkscs', 798 'cp932', 'cp949', 'cp950', 799 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 800 'gb2312', 'gbk', 'gb18030', 801 'iso2022-kr', 'johab', 802 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 803 'utf-7', 804 ] 805 for encoding in unsupported_ascii_compatible_encodings: 806 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 807 808 unsupported_ascii_incompatible_encodings = [ 809 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 810 'utf_32', 'utf_32_be', 'utf_32_le', 811 ] 812 for encoding in unsupported_ascii_incompatible_encodings: 813 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 814 815 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 816 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 817 818 def test_methods(self): 819 # Test serialization methods. 820 821 e = ET.XML("<html><link/><script>1 < 2</script></html>") 822 e.tail = "\n" 823 self.assertEqual(serialize(e), 824 '<html><link /><script>1 < 2</script></html>\n') 825 self.assertEqual(serialize(e, method=None), 826 '<html><link /><script>1 < 2</script></html>\n') 827 self.assertEqual(serialize(e, method="xml"), 828 '<html><link /><script>1 < 2</script></html>\n') 829 self.assertEqual(serialize(e, method="html"), 830 '<html><link><script>1 < 2</script></html>\n') 831 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 832 833 def test_issue18347(self): 834 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 835 self.assertEqual(serialize(e), 836 '<html><CamelCase>text</CamelCase></html>') 837 self.assertEqual(serialize(e, method="html"), 838 '<html><CamelCase>text</CamelCase></html>') 839 840 def test_entity(self): 841 # Test entity handling. 842 843 # 1) good entities 844 845 e = ET.XML("<document title='舰'>test</document>") 846 self.assertEqual(serialize(e, encoding="us-ascii"), 847 b'<document title="舰">test</document>') 848 self.serialize_check(e, '<document title="\u8230">test</document>') 849 850 # 2) bad entities 851 852 with self.assertRaises(ET.ParseError) as cm: 853 ET.XML("<document>&entity;</document>") 854 self.assertEqual(str(cm.exception), 855 'undefined entity: line 1, column 10') 856 857 with self.assertRaises(ET.ParseError) as cm: 858 ET.XML(ENTITY_XML) 859 self.assertEqual(str(cm.exception), 860 'undefined entity &entity;: line 5, column 10') 861 862 # 3) custom entity 863 864 parser = ET.XMLParser() 865 parser.entity["entity"] = "text" 866 parser.feed(ENTITY_XML) 867 root = parser.close() 868 self.serialize_check(root, '<document>text</document>') 869 870 # 4) external (SYSTEM) entity 871 872 with self.assertRaises(ET.ParseError) as cm: 873 ET.XML(EXTERNAL_ENTITY_XML) 874 self.assertEqual(str(cm.exception), 875 'undefined entity &entity;: line 4, column 10') 876 877 def test_namespace(self): 878 # Test namespace issues. 879 880 # 1) xml namespace 881 882 elem = ET.XML("<tag xml:lang='en' />") 883 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 884 885 # 2) other "well-known" namespaces 886 887 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 888 self.serialize_check(elem, 889 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 890 891 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 892 self.serialize_check(elem, 893 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 894 895 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 896 self.serialize_check(elem, 897 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 898 899 # 3) unknown namespaces 900 elem = ET.XML(SAMPLE_XML_NS) 901 self.serialize_check(elem, 902 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 903 ' <ns0:tag>text</ns0:tag>\n' 904 ' <ns0:tag />\n' 905 ' <ns0:section>\n' 906 ' <ns0:tag>subtext</ns0:tag>\n' 907 ' </ns0:section>\n' 908 '</ns0:body>') 909 910 def test_qname(self): 911 # Test QName handling. 912 913 # 1) decorated tags 914 915 elem = ET.Element("{uri}tag") 916 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 917 elem = ET.Element(ET.QName("{uri}tag")) 918 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 919 elem = ET.Element(ET.QName("uri", "tag")) 920 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 921 elem = ET.Element(ET.QName("uri", "tag")) 922 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 923 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 924 self.serialize_check(elem, 925 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 926 927 # 2) decorated attributes 928 929 elem.clear() 930 elem.attrib["{uri}key"] = "value" 931 self.serialize_check(elem, 932 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 933 934 elem.clear() 935 elem.attrib[ET.QName("{uri}key")] = "value" 936 self.serialize_check(elem, 937 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 938 939 # 3) decorated values are not converted by default, but the 940 # QName wrapper can be used for values 941 942 elem.clear() 943 elem.attrib["{uri}key"] = "{uri}value" 944 self.serialize_check(elem, 945 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 946 947 elem.clear() 948 elem.attrib["{uri}key"] = ET.QName("{uri}value") 949 self.serialize_check(elem, 950 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 951 952 elem.clear() 953 subelem = ET.Element("tag") 954 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 955 elem.append(subelem) 956 elem.append(subelem) 957 self.serialize_check(elem, 958 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 959 '<tag ns1:key="ns2:value" />' 960 '<tag ns1:key="ns2:value" />' 961 '</ns0:tag>') # 3.3 962 963 # 4) Direct QName tests 964 965 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 966 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 967 q1 = ET.QName('ns', 'tag') 968 q2 = ET.QName('ns', 'tag') 969 self.assertEqual(q1, q2) 970 q2 = ET.QName('ns', 'other-tag') 971 self.assertNotEqual(q1, q2) 972 self.assertNotEqual(q1, 'ns:tag') 973 self.assertEqual(q1, '{ns}tag') 974 975 def test_doctype_public(self): 976 # Test PUBLIC doctype. 977 978 elem = ET.XML('<!DOCTYPE html PUBLIC' 979 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 980 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 981 '<html>text</html>') 982 983 def test_xpath_tokenizer(self): 984 # Test the XPath tokenizer. 985 from xml.etree import ElementPath 986 def check(p, expected): 987 self.assertEqual([op or tag 988 for op, tag in ElementPath.xpath_tokenizer(p)], 989 expected) 990 991 # tests from the xml specification 992 check("*", ['*']) 993 check("text()", ['text', '()']) 994 check("@name", ['@', 'name']) 995 check("@*", ['@', '*']) 996 check("para[1]", ['para', '[', '1', ']']) 997 check("para[last()]", ['para', '[', 'last', '()', ']']) 998 check("*/para", ['*', '/', 'para']) 999 check("/doc/chapter[5]/section[2]", 1000 ['/', 'doc', '/', 'chapter', '[', '5', ']', 1001 '/', 'section', '[', '2', ']']) 1002 check("chapter//para", ['chapter', '//', 'para']) 1003 check("//para", ['//', 'para']) 1004 check("//olist/item", ['//', 'olist', '/', 'item']) 1005 check(".", ['.']) 1006 check(".//para", ['.', '//', 'para']) 1007 check("..", ['..']) 1008 check("../@lang", ['..', '/', '@', 'lang']) 1009 check("chapter[title]", ['chapter', '[', 'title', ']']) 1010 check("employee[@secretary and @assistant]", ['employee', 1011 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 1012 1013 # additional tests 1014 check("{http://spam}egg", ['{http://spam}egg']) 1015 check("./spam.egg", ['.', '/', 'spam.egg']) 1016 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 1017 1018 def test_processinginstruction(self): 1019 # Test ProcessingInstruction directly 1020 1021 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 1022 b'<?test instruction?>') 1023 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 1024 b'<?test instruction?>') 1025 1026 # Issue #2746 1027 1028 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 1029 b'<?test <testing&>?>') 1030 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), 1031 b"<?xml version='1.0' encoding='latin-1'?>\n" 1032 b"<?test <testing&>\xe3?>") 1033 1034 def test_html_empty_elems_serialization(self): 1035 # issue 15970 1036 # from http://www.w3.org/TR/html401/index/elements.html 1037 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 1038 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: 1039 for elem in [element, element.lower()]: 1040 expected = '<%s>' % elem 1041 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1042 self.assertEqual(serialized, expected) 1043 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1044 method='html') 1045 self.assertEqual(serialized, expected) 1046 1047 1048 class XMLPullParserTest(unittest.TestCase): 1049 1050 def _feed(self, parser, data, chunk_size=None): 1051 if chunk_size is None: 1052 parser.feed(data) 1053 else: 1054 for i in range(0, len(data), chunk_size): 1055 parser.feed(data[i:i+chunk_size]) 1056 1057 def assert_event_tags(self, parser, expected): 1058 events = parser.read_events() 1059 self.assertEqual([(action, elem.tag) for action, elem in events], 1060 expected) 1061 1062 def test_simple_xml(self): 1063 for chunk_size in (None, 1, 5): 1064 with self.subTest(chunk_size=chunk_size): 1065 parser = ET.XMLPullParser() 1066 self.assert_event_tags(parser, []) 1067 self._feed(parser, "<!-- comment -->\n", chunk_size) 1068 self.assert_event_tags(parser, []) 1069 self._feed(parser, 1070 "<root>\n <element key='value'>text</element", 1071 chunk_size) 1072 self.assert_event_tags(parser, []) 1073 self._feed(parser, ">\n", chunk_size) 1074 self.assert_event_tags(parser, [('end', 'element')]) 1075 self._feed(parser, "<element>text</element>tail\n", chunk_size) 1076 self._feed(parser, "<empty-element/>\n", chunk_size) 1077 self.assert_event_tags(parser, [ 1078 ('end', 'element'), 1079 ('end', 'empty-element'), 1080 ]) 1081 self._feed(parser, "</root>\n", chunk_size) 1082 self.assert_event_tags(parser, [('end', 'root')]) 1083 self.assertIsNone(parser.close()) 1084 1085 def test_feed_while_iterating(self): 1086 parser = ET.XMLPullParser() 1087 it = parser.read_events() 1088 self._feed(parser, "<root>\n <element key='value'>text</element>\n") 1089 action, elem = next(it) 1090 self.assertEqual((action, elem.tag), ('end', 'element')) 1091 self._feed(parser, "</root>\n") 1092 action, elem = next(it) 1093 self.assertEqual((action, elem.tag), ('end', 'root')) 1094 with self.assertRaises(StopIteration): 1095 next(it) 1096 1097 def test_simple_xml_with_ns(self): 1098 parser = ET.XMLPullParser() 1099 self.assert_event_tags(parser, []) 1100 self._feed(parser, "<!-- comment -->\n") 1101 self.assert_event_tags(parser, []) 1102 self._feed(parser, "<root xmlns='namespace'>\n") 1103 self.assert_event_tags(parser, []) 1104 self._feed(parser, "<element key='value'>text</element") 1105 self.assert_event_tags(parser, []) 1106 self._feed(parser, ">\n") 1107 self.assert_event_tags(parser, [('end', '{namespace}element')]) 1108 self._feed(parser, "<element>text</element>tail\n") 1109 self._feed(parser, "<empty-element/>\n") 1110 self.assert_event_tags(parser, [ 1111 ('end', '{namespace}element'), 1112 ('end', '{namespace}empty-element'), 1113 ]) 1114 self._feed(parser, "</root>\n") 1115 self.assert_event_tags(parser, [('end', '{namespace}root')]) 1116 self.assertIsNone(parser.close()) 1117 1118 def test_ns_events(self): 1119 parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) 1120 self._feed(parser, "<!-- comment -->\n") 1121 self._feed(parser, "<root xmlns='namespace'>\n") 1122 self.assertEqual( 1123 list(parser.read_events()), 1124 [('start-ns', ('', 'namespace'))]) 1125 self._feed(parser, "<element key='value'>text</element") 1126 self._feed(parser, ">\n") 1127 self._feed(parser, "<element>text</element>tail\n") 1128 self._feed(parser, "<empty-element/>\n") 1129 self._feed(parser, "</root>\n") 1130 self.assertEqual(list(parser.read_events()), [('end-ns', None)]) 1131 self.assertIsNone(parser.close()) 1132 1133 def test_events(self): 1134 parser = ET.XMLPullParser(events=()) 1135 self._feed(parser, "<root/>\n") 1136 self.assert_event_tags(parser, []) 1137 1138 parser = ET.XMLPullParser(events=('start', 'end')) 1139 self._feed(parser, "<!-- comment -->\n") 1140 self.assert_event_tags(parser, []) 1141 self._feed(parser, "<root>\n") 1142 self.assert_event_tags(parser, [('start', 'root')]) 1143 self._feed(parser, "<element key='value'>text</element") 1144 self.assert_event_tags(parser, [('start', 'element')]) 1145 self._feed(parser, ">\n") 1146 self.assert_event_tags(parser, [('end', 'element')]) 1147 self._feed(parser, 1148 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1149 self.assert_event_tags(parser, [ 1150 ('start', '{foo}element'), 1151 ('start', '{foo}empty-element'), 1152 ('end', '{foo}empty-element'), 1153 ('end', '{foo}element'), 1154 ]) 1155 self._feed(parser, "</root>") 1156 self.assertIsNone(parser.close()) 1157 self.assert_event_tags(parser, [('end', 'root')]) 1158 1159 parser = ET.XMLPullParser(events=('start',)) 1160 self._feed(parser, "<!-- comment -->\n") 1161 self.assert_event_tags(parser, []) 1162 self._feed(parser, "<root>\n") 1163 self.assert_event_tags(parser, [('start', 'root')]) 1164 self._feed(parser, "<element key='value'>text</element") 1165 self.assert_event_tags(parser, [('start', 'element')]) 1166 self._feed(parser, ">\n") 1167 self.assert_event_tags(parser, []) 1168 self._feed(parser, 1169 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1170 self.assert_event_tags(parser, [ 1171 ('start', '{foo}element'), 1172 ('start', '{foo}empty-element'), 1173 ]) 1174 self._feed(parser, "</root>") 1175 self.assertIsNone(parser.close()) 1176 1177 def test_events_sequence(self): 1178 # Test that events can be some sequence that's not just a tuple or list 1179 eventset = {'end', 'start'} 1180 parser = ET.XMLPullParser(events=eventset) 1181 self._feed(parser, "<foo>bar</foo>") 1182 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1183 1184 class DummyIter: 1185 def __init__(self): 1186 self.events = iter(['start', 'end', 'start-ns']) 1187 def __iter__(self): 1188 return self 1189 def __next__(self): 1190 return next(self.events) 1191 1192 parser = ET.XMLPullParser(events=DummyIter()) 1193 self._feed(parser, "<foo>bar</foo>") 1194 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1195 1196 1197 def test_unknown_event(self): 1198 with self.assertRaises(ValueError): 1199 ET.XMLPullParser(events=('start', 'end', 'bogus')) 1200 1201 1202 # 1203 # xinclude tests (samples from appendix C of the xinclude specification) 1204 1205 XINCLUDE = {} 1206 1207 XINCLUDE["C1.xml"] = """\ 1208 <?xml version='1.0'?> 1209 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1210 <p>120 Mz is adequate for an average home user.</p> 1211 <xi:include href="disclaimer.xml"/> 1212 </document> 1213 """ 1214 1215 XINCLUDE["disclaimer.xml"] = """\ 1216 <?xml version='1.0'?> 1217 <disclaimer> 1218 <p>The opinions represented herein represent those of the individual 1219 and should not be interpreted as official policy endorsed by this 1220 organization.</p> 1221 </disclaimer> 1222 """ 1223 1224 XINCLUDE["C2.xml"] = """\ 1225 <?xml version='1.0'?> 1226 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1227 <p>This document has been accessed 1228 <xi:include href="count.txt" parse="text"/> times.</p> 1229 </document> 1230 """ 1231 1232 XINCLUDE["count.txt"] = "324387" 1233 1234 XINCLUDE["C2b.xml"] = """\ 1235 <?xml version='1.0'?> 1236 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1237 <p>This document has been <em>accessed</em> 1238 <xi:include href="count.txt" parse="text"/> times.</p> 1239 </document> 1240 """ 1241 1242 XINCLUDE["C3.xml"] = """\ 1243 <?xml version='1.0'?> 1244 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1245 <p>The following is the source of the "data.xml" resource:</p> 1246 <example><xi:include href="data.xml" parse="text"/></example> 1247 </document> 1248 """ 1249 1250 XINCLUDE["data.xml"] = """\ 1251 <?xml version='1.0'?> 1252 <data> 1253 <item><![CDATA[Brooks & Shields]]></item> 1254 </data> 1255 """ 1256 1257 XINCLUDE["C5.xml"] = """\ 1258 <?xml version='1.0'?> 1259 <div xmlns:xi="http://www.w3.org/2001/XInclude"> 1260 <xi:include href="example.txt" parse="text"> 1261 <xi:fallback> 1262 <xi:include href="fallback-example.txt" parse="text"> 1263 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1264 </xi:include> 1265 </xi:fallback> 1266 </xi:include> 1267 </div> 1268 """ 1269 1270 XINCLUDE["default.xml"] = """\ 1271 <?xml version='1.0'?> 1272 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1273 <p>Example.</p> 1274 <xi:include href="{}"/> 1275 </document> 1276 """.format(html.escape(SIMPLE_XMLFILE, True)) 1277 1278 # 1279 # badly formatted xi:include tags 1280 1281 XINCLUDE_BAD = {} 1282 1283 XINCLUDE_BAD["B1.xml"] = """\ 1284 <?xml version='1.0'?> 1285 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1286 <p>120 Mz is adequate for an average home user.</p> 1287 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1288 </document> 1289 """ 1290 1291 XINCLUDE_BAD["B2.xml"] = """\ 1292 <?xml version='1.0'?> 1293 <div xmlns:xi="http://www.w3.org/2001/XInclude"> 1294 <xi:fallback></xi:fallback> 1295 </div> 1296 """ 1297 1298 class XIncludeTest(unittest.TestCase): 1299 1300 def xinclude_loader(self, href, parse="xml", encoding=None): 1301 try: 1302 data = XINCLUDE[href] 1303 except KeyError: 1304 raise OSError("resource not found") 1305 if parse == "xml": 1306 data = ET.XML(data) 1307 return data 1308 1309 def none_loader(self, href, parser, encoding=None): 1310 return None 1311 1312 def _my_loader(self, href, parse): 1313 # Used to avoid a test-dependency problem where the default loader 1314 # of ElementInclude uses the pyET parser for cET tests. 1315 if parse == 'xml': 1316 with open(href, 'rb') as f: 1317 return ET.parse(f).getroot() 1318 else: 1319 return None 1320 1321 def test_xinclude_default(self): 1322 from xml.etree import ElementInclude 1323 doc = self.xinclude_loader('default.xml') 1324 ElementInclude.include(doc, self._my_loader) 1325 self.assertEqual(serialize(doc), 1326 '<document>\n' 1327 ' <p>Example.</p>\n' 1328 ' <root>\n' 1329 ' <element key="value">text</element>\n' 1330 ' <element>text</element>tail\n' 1331 ' <empty-element />\n' 1332 '</root>\n' 1333 '</document>') 1334 1335 def test_xinclude(self): 1336 from xml.etree import ElementInclude 1337 1338 # Basic inclusion example (XInclude C.1) 1339 document = self.xinclude_loader("C1.xml") 1340 ElementInclude.include(document, self.xinclude_loader) 1341 self.assertEqual(serialize(document), 1342 '<document>\n' 1343 ' <p>120 Mz is adequate for an average home user.</p>\n' 1344 ' <disclaimer>\n' 1345 ' <p>The opinions represented herein represent those of the individual\n' 1346 ' and should not be interpreted as official policy endorsed by this\n' 1347 ' organization.</p>\n' 1348 '</disclaimer>\n' 1349 '</document>') # C1 1350 1351 # Textual inclusion example (XInclude C.2) 1352 document = self.xinclude_loader("C2.xml") 1353 ElementInclude.include(document, self.xinclude_loader) 1354 self.assertEqual(serialize(document), 1355 '<document>\n' 1356 ' <p>This document has been accessed\n' 1357 ' 324387 times.</p>\n' 1358 '</document>') # C2 1359 1360 # Textual inclusion after sibling element (based on modified XInclude C.2) 1361 document = self.xinclude_loader("C2b.xml") 1362 ElementInclude.include(document, self.xinclude_loader) 1363 self.assertEqual(serialize(document), 1364 '<document>\n' 1365 ' <p>This document has been <em>accessed</em>\n' 1366 ' 324387 times.</p>\n' 1367 '</document>') # C2b 1368 1369 # Textual inclusion of XML example (XInclude C.3) 1370 document = self.xinclude_loader("C3.xml") 1371 ElementInclude.include(document, self.xinclude_loader) 1372 self.assertEqual(serialize(document), 1373 '<document>\n' 1374 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1375 " <example><?xml version='1.0'?>\n" 1376 '<data>\n' 1377 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1378 '</data>\n' 1379 '</example>\n' 1380 '</document>') # C3 1381 1382 # Fallback example (XInclude C.5) 1383 # Note! Fallback support is not yet implemented 1384 document = self.xinclude_loader("C5.xml") 1385 with self.assertRaises(OSError) as cm: 1386 ElementInclude.include(document, self.xinclude_loader) 1387 self.assertEqual(str(cm.exception), 'resource not found') 1388 self.assertEqual(serialize(document), 1389 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1390 ' <ns0:include href="example.txt" parse="text">\n' 1391 ' <ns0:fallback>\n' 1392 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1393 ' <ns0:fallback><a href="mailto:bob (at] example.org">Report error</a></ns0:fallback>\n' 1394 ' </ns0:include>\n' 1395 ' </ns0:fallback>\n' 1396 ' </ns0:include>\n' 1397 '</div>') # C5 1398 1399 def test_xinclude_failures(self): 1400 from xml.etree import ElementInclude 1401 1402 # Test failure to locate included XML file. 1403 document = ET.XML(XINCLUDE["C1.xml"]) 1404 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1405 ElementInclude.include(document, loader=self.none_loader) 1406 self.assertEqual(str(cm.exception), 1407 "cannot load 'disclaimer.xml' as 'xml'") 1408 1409 # Test failure to locate included text file. 1410 document = ET.XML(XINCLUDE["C2.xml"]) 1411 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1412 ElementInclude.include(document, loader=self.none_loader) 1413 self.assertEqual(str(cm.exception), 1414 "cannot load 'count.txt' as 'text'") 1415 1416 # Test bad parse type. 1417 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1418 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1419 ElementInclude.include(document, loader=self.none_loader) 1420 self.assertEqual(str(cm.exception), 1421 "unknown parse type in xi:include tag ('BAD_TYPE')") 1422 1423 # Test xi:fallback outside xi:include. 1424 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1425 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1426 ElementInclude.include(document, loader=self.none_loader) 1427 self.assertEqual(str(cm.exception), 1428 "xi:fallback tag must be child of xi:include " 1429 "('{http://www.w3.org/2001/XInclude}fallback')") 1430 1431 # -------------------------------------------------------------------- 1432 # reported bugs 1433 1434 class BugsTest(unittest.TestCase): 1435 1436 def test_bug_xmltoolkit21(self): 1437 # marshaller gives obscure errors for non-string values 1438 1439 def check(elem): 1440 with self.assertRaises(TypeError) as cm: 1441 serialize(elem) 1442 self.assertEqual(str(cm.exception), 1443 'cannot serialize 123 (type int)') 1444 1445 elem = ET.Element(123) 1446 check(elem) # tag 1447 1448 elem = ET.Element("elem") 1449 elem.text = 123 1450 check(elem) # text 1451 1452 elem = ET.Element("elem") 1453 elem.tail = 123 1454 check(elem) # tail 1455 1456 elem = ET.Element("elem") 1457 elem.set(123, "123") 1458 check(elem) # attribute key 1459 1460 elem = ET.Element("elem") 1461 elem.set("123", 123) 1462 check(elem) # attribute value 1463 1464 def test_bug_xmltoolkit25(self): 1465 # typo in ElementTree.findtext 1466 1467 elem = ET.XML(SAMPLE_XML) 1468 tree = ET.ElementTree(elem) 1469 self.assertEqual(tree.findtext("tag"), 'text') 1470 self.assertEqual(tree.findtext("section/tag"), 'subtext') 1471 1472 def test_bug_xmltoolkit28(self): 1473 # .//tag causes exceptions 1474 1475 tree = ET.XML("<doc><table><tbody/></table></doc>") 1476 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 1477 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 1478 1479 def test_bug_xmltoolkitX1(self): 1480 # dump() doesn't flush the output buffer 1481 1482 tree = ET.XML("<doc><table><tbody/></table></doc>") 1483 with support.captured_stdout() as stdout: 1484 ET.dump(tree) 1485 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 1486 1487 def test_bug_xmltoolkit39(self): 1488 # non-ascii element and attribute names doesn't work 1489 1490 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1491 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1492 1493 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1494 b"<tag \xe4ttr='välue' />") 1495 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'}) 1496 self.assertEqual(ET.tostring(tree, "utf-8"), 1497 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1498 1499 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1500 b'<t\xe4g>text</t\xe4g>') 1501 self.assertEqual(ET.tostring(tree, "utf-8"), 1502 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 1503 1504 tree = ET.Element("t\u00e4g") 1505 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1506 1507 tree = ET.Element("tag") 1508 tree.set("\u00e4ttr", "v\u00e4lue") 1509 self.assertEqual(ET.tostring(tree, "utf-8"), 1510 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1511 1512 def test_bug_xmltoolkit54(self): 1513 # problems handling internally defined entities 1514 1515 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 1516 '<doc>&ldots;</doc>') 1517 self.assertEqual(serialize(e, encoding="us-ascii"), 1518 b'<doc>舰</doc>') 1519 self.assertEqual(serialize(e), '<doc>\u8230</doc>') 1520 1521 def test_bug_xmltoolkit55(self): 1522 # make sure we're reporting the first error, not the last 1523 1524 with self.assertRaises(ET.ParseError) as cm: 1525 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>" 1526 b'<doc>&ldots;&ndots;&rdots;</doc>') 1527 self.assertEqual(str(cm.exception), 1528 'undefined entity &ldots;: line 1, column 36') 1529 1530 def test_bug_xmltoolkit60(self): 1531 # Handle crash in stream source. 1532 1533 class ExceptionFile: 1534 def read(self, x): 1535 raise OSError 1536 1537 self.assertRaises(OSError, ET.parse, ExceptionFile()) 1538 1539 def test_bug_xmltoolkit62(self): 1540 # Don't crash when using custom entities. 1541 1542 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'} 1543 parser = ET.XMLParser() 1544 parser.entity.update(ENTITIES) 1545 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 1546 <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1547 <patent-application-publication> 1548 <subdoc-abstract> 1549 <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1550 </subdoc-abstract> 1551 </patent-application-publication>""") 1552 t = parser.close() 1553 self.assertEqual(t.find('.//paragraph').text, 1554 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 1555 1556 @unittest.skipIf(sys.gettrace(), "Skips under coverage.") 1557 def test_bug_xmltoolkit63(self): 1558 # Check reference leak. 1559 def xmltoolkit63(): 1560 tree = ET.TreeBuilder() 1561 tree.start("tag", {}) 1562 tree.data("text") 1563 tree.end("tag") 1564 1565 xmltoolkit63() 1566 count = sys.getrefcount(None) 1567 for i in range(1000): 1568 xmltoolkit63() 1569 self.assertEqual(sys.getrefcount(None), count) 1570 1571 def test_bug_200708_newline(self): 1572 # Preserve newlines in attributes. 1573 1574 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1575 self.assertEqual(ET.tostring(e), 1576 b'<SomeTag text="def _f(): return 3 " />') 1577 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 1578 'def _f():\n return 3\n') 1579 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 1580 b'<SomeTag text="def _f(): return 3 " />') 1581 1582 def test_bug_200708_close(self): 1583 # Test default builder. 1584 parser = ET.XMLParser() # default 1585 parser.feed("<element>some text</element>") 1586 self.assertEqual(parser.close().tag, 'element') 1587 1588 # Test custom builder. 1589 class EchoTarget: 1590 def close(self): 1591 return ET.Element("element") # simulate root 1592 parser = ET.XMLParser(target=EchoTarget()) 1593 parser.feed("<element>some text</element>") 1594 self.assertEqual(parser.close().tag, 'element') 1595 1596 def test_bug_200709_default_namespace(self): 1597 e = ET.Element("{default}elem") 1598 s = ET.SubElement(e, "{default}elem") 1599 self.assertEqual(serialize(e, default_namespace="default"), # 1 1600 '<elem xmlns="default"><elem /></elem>') 1601 1602 e = ET.Element("{default}elem") 1603 s = ET.SubElement(e, "{default}elem") 1604 s = ET.SubElement(e, "{not-default}elem") 1605 self.assertEqual(serialize(e, default_namespace="default"), # 2 1606 '<elem xmlns="default" xmlns:ns1="not-default">' 1607 '<elem />' 1608 '<ns1:elem />' 1609 '</elem>') 1610 1611 e = ET.Element("{default}elem") 1612 s = ET.SubElement(e, "{default}elem") 1613 s = ET.SubElement(e, "elem") # unprefixed name 1614 with self.assertRaises(ValueError) as cm: 1615 serialize(e, default_namespace="default") # 3 1616 self.assertEqual(str(cm.exception), 1617 'cannot use non-qualified names with default_namespace option') 1618 1619 def test_bug_200709_register_namespace(self): 1620 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1621 self.assertEqual(ET.tostring(e), 1622 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 1623 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1624 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1625 self.assertEqual(ET.tostring(e), 1626 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 1627 1628 # And the Dublin Core namespace is in the default list: 1629 1630 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 1631 self.assertEqual(ET.tostring(e), 1632 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 1633 1634 def test_bug_200709_element_comment(self): 1635 # Not sure if this can be fixed, really (since the serializer needs 1636 # ET.Comment, not cET.comment). 1637 1638 a = ET.Element('a') 1639 a.append(ET.Comment('foo')) 1640 self.assertEqual(a[0].tag, ET.Comment) 1641 1642 a = ET.Element('a') 1643 a.append(ET.PI('foo')) 1644 self.assertEqual(a[0].tag, ET.PI) 1645 1646 def test_bug_200709_element_insert(self): 1647 a = ET.Element('a') 1648 b = ET.SubElement(a, 'b') 1649 c = ET.SubElement(a, 'c') 1650 d = ET.Element('d') 1651 a.insert(0, d) 1652 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 1653 a.insert(-1, d) 1654 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 1655 1656 def test_bug_200709_iter_comment(self): 1657 a = ET.Element('a') 1658 b = ET.SubElement(a, 'b') 1659 comment_b = ET.Comment("TEST-b") 1660 b.append(comment_b) 1661 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 1662 1663 # -------------------------------------------------------------------- 1664 # reported on bugs.python.org 1665 1666 def test_bug_1534630(self): 1667 bob = ET.TreeBuilder() 1668 e = bob.data("data") 1669 e = bob.start("tag", {}) 1670 e = bob.end("tag") 1671 e = bob.close() 1672 self.assertEqual(serialize(e), '<tag />') 1673 1674 def test_issue6233(self): 1675 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 1676 b'<body>t\xc3\xa3g</body>') 1677 self.assertEqual(ET.tostring(e, 'ascii'), 1678 b"<?xml version='1.0' encoding='ascii'?>\n" 1679 b'<body>tãg</body>') 1680 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1681 b'<body>t\xe3g</body>') 1682 self.assertEqual(ET.tostring(e, 'ascii'), 1683 b"<?xml version='1.0' encoding='ascii'?>\n" 1684 b'<body>tãg</body>') 1685 1686 def test_issue3151(self): 1687 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1688 self.assertEqual(e.tag, '{${stuff}}localname') 1689 t = ET.ElementTree(e) 1690 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />') 1691 1692 def test_issue6565(self): 1693 elem = ET.XML("<body><tag/></body>") 1694 self.assertEqual(summarize_list(elem), ['tag']) 1695 newelem = ET.XML(SAMPLE_XML) 1696 elem[:] = newelem[:] 1697 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 1698 1699 def test_issue10777(self): 1700 # Registering a namespace twice caused a "dictionary changed size during 1701 # iteration" bug. 1702 1703 ET.register_namespace('test10777', 'http://myuri/') 1704 ET.register_namespace('test10777', 'http://myuri/') 1705 1706 def test_lost_text(self): 1707 # Issue #25902: Borrowed text can disappear 1708 class Text: 1709 def __bool__(self): 1710 e.text = 'changed' 1711 return True 1712 1713 e = ET.Element('tag') 1714 e.text = Text() 1715 i = e.itertext() 1716 t = next(i) 1717 self.assertIsInstance(t, Text) 1718 self.assertIsInstance(e.text, str) 1719 self.assertEqual(e.text, 'changed') 1720 1721 def test_lost_tail(self): 1722 # Issue #25902: Borrowed tail can disappear 1723 class Text: 1724 def __bool__(self): 1725 e[0].tail = 'changed' 1726 return True 1727 1728 e = ET.Element('root') 1729 e.append(ET.Element('tag')) 1730 e[0].tail = Text() 1731 i = e.itertext() 1732 t = next(i) 1733 self.assertIsInstance(t, Text) 1734 self.assertIsInstance(e[0].tail, str) 1735 self.assertEqual(e[0].tail, 'changed') 1736 1737 def test_lost_elem(self): 1738 # Issue #25902: Borrowed element can disappear 1739 class Tag: 1740 def __eq__(self, other): 1741 e[0] = ET.Element('changed') 1742 next(i) 1743 return True 1744 1745 e = ET.Element('root') 1746 e.append(ET.Element(Tag())) 1747 e.append(ET.Element('tag')) 1748 i = e.iter('tag') 1749 try: 1750 t = next(i) 1751 except ValueError: 1752 self.skipTest('generators are not reentrant') 1753 self.assertIsInstance(t.tag, Tag) 1754 self.assertIsInstance(e[0].tag, str) 1755 self.assertEqual(e[0].tag, 'changed') 1756 1757 def check_expat224_utf8_bug(self, text): 1758 xml = b'<a b="%s"/>' % text 1759 root = ET.XML(xml) 1760 self.assertEqual(root.get('b'), text.decode('utf-8')) 1761 1762 def test_expat224_utf8_bug(self): 1763 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. 1764 # Check that Expat 2.2.4 fixed the bug. 1765 # 1766 # Test buffer bounds at odd and even positions. 1767 1768 text = b'\xc3\xa0' * 1024 1769 self.check_expat224_utf8_bug(text) 1770 1771 text = b'x' + b'\xc3\xa0' * 1024 1772 self.check_expat224_utf8_bug(text) 1773 1774 def test_expat224_utf8_bug_file(self): 1775 with open(UTF8_BUG_XMLFILE, 'rb') as fp: 1776 raw = fp.read() 1777 root = ET.fromstring(raw) 1778 xmlattr = root.get('b') 1779 1780 # "Parse" manually the XML file to extract the value of the 'b' 1781 # attribute of the <a b='xxx' /> XML element 1782 text = raw.decode('utf-8').strip() 1783 text = text.replace('\r\n', ' ') 1784 text = text[6:-4] 1785 self.assertEqual(root.get('b'), text) 1786 1787 1788 1789 # -------------------------------------------------------------------- 1790 1791 1792 class BasicElementTest(ElementTestCase, unittest.TestCase): 1793 def test_augmentation_type_errors(self): 1794 e = ET.Element('joe') 1795 self.assertRaises(TypeError, e.append, 'b') 1796 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo']) 1797 self.assertRaises(TypeError, e.insert, 0, 'foo') 1798 1799 def test_cyclic_gc(self): 1800 class Dummy: 1801 pass 1802 1803 # Test the shortest cycle: d->element->d 1804 d = Dummy() 1805 d.dummyref = ET.Element('joe', attr=d) 1806 wref = weakref.ref(d) 1807 del d 1808 gc_collect() 1809 self.assertIsNone(wref()) 1810 1811 # A longer cycle: d->e->e2->d 1812 e = ET.Element('joe') 1813 d = Dummy() 1814 d.dummyref = e 1815 wref = weakref.ref(d) 1816 e2 = ET.SubElement(e, 'foo', attr=d) 1817 del d, e, e2 1818 gc_collect() 1819 self.assertIsNone(wref()) 1820 1821 # A cycle between Element objects as children of one another 1822 # e1->e2->e3->e1 1823 e1 = ET.Element('e1') 1824 e2 = ET.Element('e2') 1825 e3 = ET.Element('e3') 1826 e1.append(e2) 1827 e2.append(e2) 1828 e3.append(e1) 1829 wref = weakref.ref(e1) 1830 del e1, e2, e3 1831 gc_collect() 1832 self.assertIsNone(wref()) 1833 1834 def test_weakref(self): 1835 flag = False 1836 def wref_cb(w): 1837 nonlocal flag 1838 flag = True 1839 e = ET.Element('e') 1840 wref = weakref.ref(e, wref_cb) 1841 self.assertEqual(wref().tag, 'e') 1842 del e 1843 self.assertEqual(flag, True) 1844 self.assertEqual(wref(), None) 1845 1846 def test_get_keyword_args(self): 1847 e1 = ET.Element('foo' , x=1, y=2, z=3) 1848 self.assertEqual(e1.get('x', default=7), 1) 1849 self.assertEqual(e1.get('w', default=7), 7) 1850 1851 def test_pickle(self): 1852 # issue #16076: the C implementation wasn't pickleable. 1853 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 1854 for dumper, loader in product(self.modules, repeat=2): 1855 e = dumper.Element('foo', bar=42) 1856 e.text = "text goes here" 1857 e.tail = "opposite of head" 1858 dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) 1859 e.append(dumper.Element('child')) 1860 e.findall('.//grandchild')[0].set('attr', 'other value') 1861 1862 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', 1863 dumper, loader, proto) 1864 1865 self.assertEqual(e2.tag, 'foo') 1866 self.assertEqual(e2.attrib['bar'], 42) 1867 self.assertEqual(len(e2), 2) 1868 self.assertEqualElements(e, e2) 1869 1870 def test_pickle_issue18997(self): 1871 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 1872 for dumper, loader in product(self.modules, repeat=2): 1873 XMLTEXT = """<?xml version="1.0"?> 1874 <group><dogs>4</dogs> 1875 </group>""" 1876 e1 = dumper.fromstring(XMLTEXT) 1877 if hasattr(e1, '__getstate__'): 1878 self.assertEqual(e1.__getstate__()['tag'], 'group') 1879 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', 1880 dumper, loader, proto) 1881 self.assertEqual(e2.tag, 'group') 1882 self.assertEqual(e2[0].tag, 'dogs') 1883 1884 1885 class BadElementTest(ElementTestCase, unittest.TestCase): 1886 def test_extend_mutable_list(self): 1887 class X: 1888 @property 1889 def __class__(self): 1890 L[:] = [ET.Element('baz')] 1891 return ET.Element 1892 L = [X()] 1893 e = ET.Element('foo') 1894 try: 1895 e.extend(L) 1896 except TypeError: 1897 pass 1898 1899 class Y(X, ET.Element): 1900 pass 1901 L = [Y('x')] 1902 e = ET.Element('foo') 1903 e.extend(L) 1904 1905 def test_extend_mutable_list2(self): 1906 class X: 1907 @property 1908 def __class__(self): 1909 del L[:] 1910 return ET.Element 1911 L = [X(), ET.Element('baz')] 1912 e = ET.Element('foo') 1913 try: 1914 e.extend(L) 1915 except TypeError: 1916 pass 1917 1918 class Y(X, ET.Element): 1919 pass 1920 L = [Y('bar'), ET.Element('baz')] 1921 e = ET.Element('foo') 1922 e.extend(L) 1923 1924 def test_remove_with_mutating(self): 1925 class X(ET.Element): 1926 def __eq__(self, o): 1927 del e[:] 1928 return False 1929 e = ET.Element('foo') 1930 e.extend([X('bar')]) 1931 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 1932 1933 e = ET.Element('foo') 1934 e.extend([ET.Element('bar')]) 1935 self.assertRaises(ValueError, e.remove, X('baz')) 1936 1937 def test_recursive_repr(self): 1938 # Issue #25455 1939 e = ET.Element('foo') 1940 with swap_attr(e, 'tag', e): 1941 with self.assertRaises(RuntimeError): 1942 repr(e) # Should not crash 1943 1944 def test_element_get_text(self): 1945 # Issue #27863 1946 class X(str): 1947 def __del__(self): 1948 try: 1949 elem.text 1950 except NameError: 1951 pass 1952 1953 b = ET.TreeBuilder() 1954 b.start('tag', {}) 1955 b.data('ABCD') 1956 b.data(X('EFGH')) 1957 b.data('IJKL') 1958 b.end('tag') 1959 1960 elem = b.close() 1961 self.assertEqual(elem.text, 'ABCDEFGHIJKL') 1962 1963 def test_element_get_tail(self): 1964 # Issue #27863 1965 class X(str): 1966 def __del__(self): 1967 try: 1968 elem[0].tail 1969 except NameError: 1970 pass 1971 1972 b = ET.TreeBuilder() 1973 b.start('root', {}) 1974 b.start('tag', {}) 1975 b.end('tag') 1976 b.data('ABCD') 1977 b.data(X('EFGH')) 1978 b.data('IJKL') 1979 b.end('root') 1980 1981 elem = b.close() 1982 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') 1983 1984 def test_element_iter(self): 1985 # Issue #27863 1986 state = { 1987 'tag': 'tag', 1988 '_children': [None], # non-Element 1989 'attrib': 'attr', 1990 'tail': 'tail', 1991 'text': 'text', 1992 } 1993 1994 e = ET.Element('tag') 1995 try: 1996 e.__setstate__(state) 1997 except AttributeError: 1998 e.__dict__ = state 1999 2000 it = e.iter() 2001 self.assertIs(next(it), e) 2002 self.assertRaises(AttributeError, next, it) 2003 2004 def test_subscr(self): 2005 # Issue #27863 2006 class X: 2007 def __index__(self): 2008 del e[:] 2009 return 1 2010 2011 e = ET.Element('elem') 2012 e.append(ET.Element('child')) 2013 e[:X()] # shouldn't crash 2014 2015 e.append(ET.Element('child')) 2016 e[0:10:X()] # shouldn't crash 2017 2018 def test_ass_subscr(self): 2019 # Issue #27863 2020 class X: 2021 def __index__(self): 2022 e[:] = [] 2023 return 1 2024 2025 e = ET.Element('elem') 2026 for _ in range(10): 2027 e.insert(0, ET.Element('child')) 2028 2029 e[0:10:X()] = [] # shouldn't crash 2030 2031 def test_treebuilder_start(self): 2032 # Issue #27863 2033 def element_factory(x, y): 2034 return [] 2035 b = ET.TreeBuilder(element_factory=element_factory) 2036 2037 b.start('tag', {}) 2038 b.data('ABCD') 2039 self.assertRaises(AttributeError, b.start, 'tag2', {}) 2040 del b 2041 gc_collect() 2042 2043 def test_treebuilder_end(self): 2044 # Issue #27863 2045 def element_factory(x, y): 2046 return [] 2047 b = ET.TreeBuilder(element_factory=element_factory) 2048 2049 b.start('tag', {}) 2050 b.data('ABCD') 2051 self.assertRaises(AttributeError, b.end, 'tag') 2052 del b 2053 gc_collect() 2054 2055 2056 class MutatingElementPath(str): 2057 def __new__(cls, elem, *args): 2058 self = str.__new__(cls, *args) 2059 self.elem = elem 2060 return self 2061 def __eq__(self, o): 2062 del self.elem[:] 2063 return True 2064 MutatingElementPath.__hash__ = str.__hash__ 2065 2066 class BadElementPath(str): 2067 def __eq__(self, o): 2068 raise 1/0 2069 BadElementPath.__hash__ = str.__hash__ 2070 2071 class BadElementPathTest(ElementTestCase, unittest.TestCase): 2072 def setUp(self): 2073 super().setUp() 2074 from xml.etree import ElementPath 2075 self.path_cache = ElementPath._cache 2076 ElementPath._cache = {} 2077 2078 def tearDown(self): 2079 from xml.etree import ElementPath 2080 ElementPath._cache = self.path_cache 2081 super().tearDown() 2082 2083 def test_find_with_mutating(self): 2084 e = ET.Element('foo') 2085 e.extend([ET.Element('bar')]) 2086 e.find(MutatingElementPath(e, 'x')) 2087 2088 def test_find_with_error(self): 2089 e = ET.Element('foo') 2090 e.extend([ET.Element('bar')]) 2091 try: 2092 e.find(BadElementPath('x')) 2093 except ZeroDivisionError: 2094 pass 2095 2096 def test_findtext_with_mutating(self): 2097 e = ET.Element('foo') 2098 e.extend([ET.Element('bar')]) 2099 e.findtext(MutatingElementPath(e, 'x')) 2100 2101 def test_findtext_with_error(self): 2102 e = ET.Element('foo') 2103 e.extend([ET.Element('bar')]) 2104 try: 2105 e.findtext(BadElementPath('x')) 2106 except ZeroDivisionError: 2107 pass 2108 2109 def test_findall_with_mutating(self): 2110 e = ET.Element('foo') 2111 e.extend([ET.Element('bar')]) 2112 e.findall(MutatingElementPath(e, 'x')) 2113 2114 def test_findall_with_error(self): 2115 e = ET.Element('foo') 2116 e.extend([ET.Element('bar')]) 2117 try: 2118 e.findall(BadElementPath('x')) 2119 except ZeroDivisionError: 2120 pass 2121 2122 2123 class ElementTreeTypeTest(unittest.TestCase): 2124 def test_istype(self): 2125 self.assertIsInstance(ET.ParseError, type) 2126 self.assertIsInstance(ET.QName, type) 2127 self.assertIsInstance(ET.ElementTree, type) 2128 self.assertIsInstance(ET.Element, type) 2129 self.assertIsInstance(ET.TreeBuilder, type) 2130 self.assertIsInstance(ET.XMLParser, type) 2131 2132 def test_Element_subclass_trivial(self): 2133 class MyElement(ET.Element): 2134 pass 2135 2136 mye = MyElement('foo') 2137 self.assertIsInstance(mye, ET.Element) 2138 self.assertIsInstance(mye, MyElement) 2139 self.assertEqual(mye.tag, 'foo') 2140 2141 # test that attribute assignment works (issue 14849) 2142 mye.text = "joe" 2143 self.assertEqual(mye.text, "joe") 2144 2145 def test_Element_subclass_constructor(self): 2146 class MyElement(ET.Element): 2147 def __init__(self, tag, attrib={}, **extra): 2148 super(MyElement, self).__init__(tag + '__', attrib, **extra) 2149 2150 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 2151 self.assertEqual(mye.tag, 'foo__') 2152 self.assertEqual(sorted(mye.items()), 2153 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 2154 2155 def test_Element_subclass_new_method(self): 2156 class MyElement(ET.Element): 2157 def newmethod(self): 2158 return self.tag 2159 2160 mye = MyElement('joe') 2161 self.assertEqual(mye.newmethod(), 'joe') 2162 2163 def test_Element_subclass_find(self): 2164 class MyElement(ET.Element): 2165 pass 2166 2167 e = ET.Element('foo') 2168 e.text = 'text' 2169 sub = MyElement('bar') 2170 sub.text = 'subtext' 2171 e.append(sub) 2172 self.assertEqual(e.findtext('bar'), 'subtext') 2173 self.assertEqual(e.find('bar').tag, 'bar') 2174 found = list(e.findall('bar')) 2175 self.assertEqual(len(found), 1, found) 2176 self.assertEqual(found[0].tag, 'bar') 2177 2178 2179 class ElementFindTest(unittest.TestCase): 2180 def test_find_simple(self): 2181 e = ET.XML(SAMPLE_XML) 2182 self.assertEqual(e.find('tag').tag, 'tag') 2183 self.assertEqual(e.find('section/tag').tag, 'tag') 2184 self.assertEqual(e.find('./tag').tag, 'tag') 2185 2186 e[2] = ET.XML(SAMPLE_SECTION) 2187 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 2188 2189 self.assertEqual(e.findtext('./tag'), 'text') 2190 self.assertEqual(e.findtext('section/tag'), 'subtext') 2191 2192 # section/nexttag is found but has no text 2193 self.assertEqual(e.findtext('section/nexttag'), '') 2194 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 2195 2196 # tog doesn't exist and 'default' kicks in 2197 self.assertIsNone(e.findtext('tog')) 2198 self.assertEqual(e.findtext('tog', 'default'), 'default') 2199 2200 # Issue #16922 2201 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 2202 2203 def test_find_xpath(self): 2204 LINEAR_XML = ''' 2205 <body> 2206 <tag class='a'/> 2207 <tag class='b'/> 2208 <tag class='c'/> 2209 <tag class='d'/> 2210 </body>''' 2211 e = ET.XML(LINEAR_XML) 2212 2213 # Test for numeric indexing and last() 2214 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 2215 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 2216 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 2217 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 2218 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 2219 2220 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]') 2221 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]') 2222 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') 2223 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') 2224 2225 def test_findall(self): 2226 e = ET.XML(SAMPLE_XML) 2227 e[2] = ET.XML(SAMPLE_SECTION) 2228 self.assertEqual(summarize_list(e.findall('.')), ['body']) 2229 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 2230 self.assertEqual(summarize_list(e.findall('tog')), []) 2231 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 2232 self.assertEqual(summarize_list(e.findall('*')), 2233 ['tag', 'tag', 'section']) 2234 self.assertEqual(summarize_list(e.findall('.//tag')), 2235 ['tag'] * 4) 2236 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 2237 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 2238 self.assertEqual(summarize_list(e.findall('section/*')), 2239 ['tag', 'nexttag', 'nextsection']) 2240 self.assertEqual(summarize_list(e.findall('section//*')), 2241 ['tag', 'nexttag', 'nextsection', 'tag']) 2242 self.assertEqual(summarize_list(e.findall('section/.//*')), 2243 ['tag', 'nexttag', 'nextsection', 'tag']) 2244 self.assertEqual(summarize_list(e.findall('*/*')), 2245 ['tag', 'nexttag', 'nextsection']) 2246 self.assertEqual(summarize_list(e.findall('*//*')), 2247 ['tag', 'nexttag', 'nextsection', 'tag']) 2248 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 2249 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 2250 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 2251 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 2252 2253 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 2254 ['tag'] * 3) 2255 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 2256 ['tag']) 2257 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 2258 ['tag'] * 2) 2259 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 2260 ['tag']) 2261 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 2262 ['section']) 2263 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 2264 self.assertEqual(summarize_list(e.findall('../tag')), []) 2265 self.assertEqual(summarize_list(e.findall('section/../tag')), 2266 ['tag'] * 2) 2267 self.assertEqual(e.findall('section//'), e.findall('section//*')) 2268 2269 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")), 2270 ['section']) 2271 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")), 2272 ['section']) 2273 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")), 2274 ['section']) 2275 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2276 ['section']) 2277 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), 2278 ['section']) 2279 2280 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), 2281 ['tag']) 2282 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), 2283 ['tag']) 2284 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')), 2285 ['tag']) 2286 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')), 2287 ['tag']) 2288 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2289 ['tag']) 2290 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")), 2291 []) 2292 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), 2293 []) 2294 2295 # duplicate section => 2x tag matches 2296 e[1] = e[2] 2297 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2298 ['section', 'section']) 2299 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2300 ['tag', 'tag']) 2301 2302 def test_test_find_with_ns(self): 2303 e = ET.XML(SAMPLE_XML_NS) 2304 self.assertEqual(summarize_list(e.findall('tag')), []) 2305 self.assertEqual( 2306 summarize_list(e.findall("{http://effbot.org/ns}tag")), 2307 ['{http://effbot.org/ns}tag'] * 2) 2308 self.assertEqual( 2309 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 2310 ['{http://effbot.org/ns}tag'] * 3) 2311 2312 def test_findall_different_nsmaps(self): 2313 root = ET.XML(''' 2314 <a xmlns:x="X" xmlns:y="Y"> 2315 <x:b><c/></x:b> 2316 <b/> 2317 <c><x:b/><b/></c><y:b/> 2318 </a>''') 2319 nsmap = {'xx': 'X'} 2320 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2321 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2322 nsmap = {'xx': 'Y'} 2323 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) 2324 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2325 2326 def test_bad_find(self): 2327 e = ET.XML(SAMPLE_XML) 2328 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): 2329 e.findall('/tag') 2330 2331 def test_find_through_ElementTree(self): 2332 e = ET.XML(SAMPLE_XML) 2333 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 2334 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 2335 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')), 2336 ['tag'] * 2) 2337 # this produces a warning 2338 msg = ("This search is broken in 1.3 and earlier, and will be fixed " 2339 "in a future version. If you rely on the current behaviour, " 2340 "change it to '.+'") 2341 with self.assertWarnsRegex(FutureWarning, msg): 2342 it = ET.ElementTree(e).findall('//tag') 2343 self.assertEqual(summarize_list(it), ['tag'] * 3) 2344 2345 2346 class ElementIterTest(unittest.TestCase): 2347 def _ilist(self, elem, tag=None): 2348 return summarize_list(elem.iter(tag)) 2349 2350 def test_basic(self): 2351 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 2352 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 2353 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 2354 self.assertEqual(next(doc.iter()).tag, 'html') 2355 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 2356 self.assertEqual(''.join(doc.find('body').itertext()), 2357 'this is a paragraph.') 2358 self.assertEqual(next(doc.itertext()), 'this is a ') 2359 2360 # iterparse should return an iterator 2361 sourcefile = serialize(doc, to_string=False) 2362 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 2363 2364 # With an explicit parser too (issue #9708) 2365 sourcefile = serialize(doc, to_string=False) 2366 parser = ET.XMLParser(target=ET.TreeBuilder()) 2367 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 2368 'end') 2369 2370 tree = ET.ElementTree(None) 2371 self.assertRaises(AttributeError, tree.iter) 2372 2373 # Issue #16913 2374 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 2375 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 2376 2377 def test_corners(self): 2378 # single root, no subelements 2379 a = ET.Element('a') 2380 self.assertEqual(self._ilist(a), ['a']) 2381 2382 # one child 2383 b = ET.SubElement(a, 'b') 2384 self.assertEqual(self._ilist(a), ['a', 'b']) 2385 2386 # one child and one grandchild 2387 c = ET.SubElement(b, 'c') 2388 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 2389 2390 # two children, only first with grandchild 2391 d = ET.SubElement(a, 'd') 2392 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 2393 2394 # replace first child by second 2395 a[0] = a[1] 2396 del a[1] 2397 self.assertEqual(self._ilist(a), ['a', 'd']) 2398 2399 def test_iter_by_tag(self): 2400 doc = ET.XML(''' 2401 <document> 2402 <house> 2403 <room>bedroom1</room> 2404 <room>bedroom2</room> 2405 </house> 2406 <shed>nothing here 2407 </shed> 2408 <house> 2409 <room>bedroom8</room> 2410 </house> 2411 </document>''') 2412 2413 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 2414 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 2415 2416 # test that iter also accepts 'tag' as a keyword arg 2417 self.assertEqual( 2418 summarize_list(doc.iter(tag='room')), 2419 ['room'] * 3) 2420 2421 # make sure both tag=None and tag='*' return all tags 2422 all_tags = ['document', 'house', 'room', 'room', 2423 'shed', 'house', 'room'] 2424 self.assertEqual(summarize_list(doc.iter()), all_tags) 2425 self.assertEqual(self._ilist(doc), all_tags) 2426 self.assertEqual(self._ilist(doc, '*'), all_tags) 2427 2428 # Element.getiterator() is deprecated. 2429 @checkwarnings(("This method will be removed in future versions. " 2430 "Use .+ instead.", PendingDeprecationWarning)) 2431 def test_getiterator(self): 2432 doc = ET.XML(''' 2433 <document> 2434 <house> 2435 <room>bedroom1</room> 2436 <room>bedroom2</room> 2437 </house> 2438 <shed>nothing here 2439 </shed> 2440 <house> 2441 <room>bedroom8</room> 2442 </house> 2443 </document>''') 2444 2445 self.assertEqual(summarize_list(doc.getiterator('room')), 2446 ['room'] * 3) 2447 self.assertEqual(summarize_list(doc.getiterator('house')), 2448 ['house'] * 2) 2449 2450 # test that getiterator also accepts 'tag' as a keyword arg 2451 self.assertEqual( 2452 summarize_list(doc.getiterator(tag='room')), 2453 ['room'] * 3) 2454 2455 # make sure both tag=None and tag='*' return all tags 2456 all_tags = ['document', 'house', 'room', 'room', 2457 'shed', 'house', 'room'] 2458 self.assertEqual(summarize_list(doc.getiterator()), all_tags) 2459 self.assertEqual(summarize_list(doc.getiterator(None)), all_tags) 2460 self.assertEqual(summarize_list(doc.getiterator('*')), all_tags) 2461 2462 def test_copy(self): 2463 a = ET.Element('a') 2464 it = a.iter() 2465 with self.assertRaises(TypeError): 2466 copy.copy(it) 2467 2468 def test_pickle(self): 2469 a = ET.Element('a') 2470 it = a.iter() 2471 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 2472 with self.assertRaises((TypeError, pickle.PicklingError)): 2473 pickle.dumps(it, proto) 2474 2475 2476 class TreeBuilderTest(unittest.TestCase): 2477 sample1 = ('<!DOCTYPE html PUBLIC' 2478 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2479 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2480 '<html>text<div>subtext</div>tail</html>') 2481 2482 sample2 = '''<toplevel>sometext</toplevel>''' 2483 2484 def _check_sample1_element(self, e): 2485 self.assertEqual(e.tag, 'html') 2486 self.assertEqual(e.text, 'text') 2487 self.assertEqual(e.tail, None) 2488 self.assertEqual(e.attrib, {}) 2489 children = list(e) 2490 self.assertEqual(len(children), 1) 2491 child = children[0] 2492 self.assertEqual(child.tag, 'div') 2493 self.assertEqual(child.text, 'subtext') 2494 self.assertEqual(child.tail, 'tail') 2495 self.assertEqual(child.attrib, {}) 2496 2497 def test_dummy_builder(self): 2498 class BaseDummyBuilder: 2499 def close(self): 2500 return 42 2501 2502 class DummyBuilder(BaseDummyBuilder): 2503 data = start = end = lambda *a: None 2504 2505 parser = ET.XMLParser(target=DummyBuilder()) 2506 parser.feed(self.sample1) 2507 self.assertEqual(parser.close(), 42) 2508 2509 parser = ET.XMLParser(target=BaseDummyBuilder()) 2510 parser.feed(self.sample1) 2511 self.assertEqual(parser.close(), 42) 2512 2513 parser = ET.XMLParser(target=object()) 2514 parser.feed(self.sample1) 2515 self.assertIsNone(parser.close()) 2516 2517 def test_treebuilder_elementfactory_none(self): 2518 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 2519 parser.feed(self.sample1) 2520 e = parser.close() 2521 self._check_sample1_element(e) 2522 2523 def test_subclass(self): 2524 class MyTreeBuilder(ET.TreeBuilder): 2525 def foobar(self, x): 2526 return x * 2 2527 2528 tb = MyTreeBuilder() 2529 self.assertEqual(tb.foobar(10), 20) 2530 2531 parser = ET.XMLParser(target=tb) 2532 parser.feed(self.sample1) 2533 2534 e = parser.close() 2535 self._check_sample1_element(e) 2536 2537 def test_element_factory(self): 2538 lst = [] 2539 def myfactory(tag, attrib): 2540 nonlocal lst 2541 lst.append(tag) 2542 return ET.Element(tag, attrib) 2543 2544 tb = ET.TreeBuilder(element_factory=myfactory) 2545 parser = ET.XMLParser(target=tb) 2546 parser.feed(self.sample2) 2547 parser.close() 2548 2549 self.assertEqual(lst, ['toplevel']) 2550 2551 def _check_element_factory_class(self, cls): 2552 tb = ET.TreeBuilder(element_factory=cls) 2553 2554 parser = ET.XMLParser(target=tb) 2555 parser.feed(self.sample1) 2556 e = parser.close() 2557 self.assertIsInstance(e, cls) 2558 self._check_sample1_element(e) 2559 2560 def test_element_factory_subclass(self): 2561 class MyElement(ET.Element): 2562 pass 2563 self._check_element_factory_class(MyElement) 2564 2565 def test_element_factory_pure_python_subclass(self): 2566 # Mimick SimpleTAL's behaviour (issue #16089): both versions of 2567 # TreeBuilder should be able to cope with a subclass of the 2568 # pure Python Element class. 2569 base = ET._Element_Py 2570 # Not from a C extension 2571 self.assertEqual(base.__module__, 'xml.etree.ElementTree') 2572 # Force some multiple inheritance with a C class to make things 2573 # more interesting. 2574 class MyElement(base, ValueError): 2575 pass 2576 self._check_element_factory_class(MyElement) 2577 2578 def test_doctype(self): 2579 class DoctypeParser: 2580 _doctype = None 2581 2582 def doctype(self, name, pubid, system): 2583 self._doctype = (name, pubid, system) 2584 2585 def close(self): 2586 return self._doctype 2587 2588 parser = ET.XMLParser(target=DoctypeParser()) 2589 parser.feed(self.sample1) 2590 2591 self.assertEqual(parser.close(), 2592 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2593 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2594 2595 def test_builder_lookup_errors(self): 2596 class RaisingBuilder: 2597 def __init__(self, raise_in=None, what=ValueError): 2598 self.raise_in = raise_in 2599 self.what = what 2600 2601 def __getattr__(self, name): 2602 if name == self.raise_in: 2603 raise self.what(self.raise_in) 2604 def handle(*args): 2605 pass 2606 return handle 2607 2608 ET.XMLParser(target=RaisingBuilder()) 2609 # cET also checks for 'close' and 'doctype', PyET does it only at need 2610 for event in ('start', 'data', 'end', 'comment', 'pi'): 2611 with self.assertRaisesRegex(ValueError, event): 2612 ET.XMLParser(target=RaisingBuilder(event)) 2613 2614 ET.XMLParser(target=RaisingBuilder(what=AttributeError)) 2615 for event in ('start', 'data', 'end', 'comment', 'pi'): 2616 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError)) 2617 parser.feed(self.sample1) 2618 self.assertIsNone(parser.close()) 2619 2620 2621 class XMLParserTest(unittest.TestCase): 2622 sample1 = b'<file><line>22</line></file>' 2623 sample2 = (b'<!DOCTYPE html PUBLIC' 2624 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2625 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2626 b'<html>text</html>') 2627 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n' 2628 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>') 2629 2630 def _check_sample_element(self, e): 2631 self.assertEqual(e.tag, 'file') 2632 self.assertEqual(e[0].tag, 'line') 2633 self.assertEqual(e[0].text, '22') 2634 2635 def test_constructor_args(self): 2636 # Positional args. The first (html) is not supported, but should be 2637 # nevertheless correctly accepted. 2638 with self.assertWarnsRegex(DeprecationWarning, r'\bhtml\b'): 2639 parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8') 2640 parser.feed(self.sample1) 2641 self._check_sample_element(parser.close()) 2642 2643 # Now as keyword args. 2644 parser2 = ET.XMLParser(encoding='utf-8', 2645 target=ET.TreeBuilder()) 2646 parser2.feed(self.sample1) 2647 self._check_sample_element(parser2.close()) 2648 2649 def test_subclass(self): 2650 class MyParser(ET.XMLParser): 2651 pass 2652 parser = MyParser() 2653 parser.feed(self.sample1) 2654 self._check_sample_element(parser.close()) 2655 2656 def test_doctype_warning(self): 2657 parser = ET.XMLParser() 2658 with self.assertWarns(DeprecationWarning): 2659 parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2660 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') 2661 parser.feed('<html/>') 2662 parser.close() 2663 2664 with warnings.catch_warnings(): 2665 warnings.simplefilter('error', DeprecationWarning) 2666 parser = ET.XMLParser() 2667 parser.feed(self.sample2) 2668 parser.close() 2669 2670 def test_subclass_doctype(self): 2671 _doctype = None 2672 class MyParserWithDoctype(ET.XMLParser): 2673 def doctype(self, name, pubid, system): 2674 nonlocal _doctype 2675 _doctype = (name, pubid, system) 2676 2677 parser = MyParserWithDoctype() 2678 with self.assertWarns(DeprecationWarning): 2679 parser.feed(self.sample2) 2680 parser.close() 2681 self.assertEqual(_doctype, 2682 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2683 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2684 2685 _doctype = _doctype2 = None 2686 with warnings.catch_warnings(): 2687 warnings.simplefilter('error', DeprecationWarning) 2688 class DoctypeParser: 2689 def doctype(self, name, pubid, system): 2690 nonlocal _doctype2 2691 _doctype2 = (name, pubid, system) 2692 2693 parser = MyParserWithDoctype(target=DoctypeParser()) 2694 parser.feed(self.sample2) 2695 parser.close() 2696 self.assertIsNone(_doctype) 2697 self.assertEqual(_doctype2, 2698 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2699 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2700 2701 def test_inherited_doctype(self): 2702 '''Ensure that ordinary usage is not deprecated (Issue 19176)''' 2703 with warnings.catch_warnings(): 2704 warnings.simplefilter('error', DeprecationWarning) 2705 class MyParserWithoutDoctype(ET.XMLParser): 2706 pass 2707 parser = MyParserWithoutDoctype() 2708 parser.feed(self.sample2) 2709 parser.close() 2710 2711 def test_parse_string(self): 2712 parser = ET.XMLParser(target=ET.TreeBuilder()) 2713 parser.feed(self.sample3) 2714 e = parser.close() 2715 self.assertEqual(e.tag, 'money') 2716 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') 2717 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') 2718 2719 2720 class NamespaceParseTest(unittest.TestCase): 2721 def test_find_with_namespace(self): 2722 nsmap = {'h': 'hello', 'f': 'foo'} 2723 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 2724 2725 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 2726 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 2727 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 2728 2729 2730 class ElementSlicingTest(unittest.TestCase): 2731 def _elem_tags(self, elemlist): 2732 return [e.tag for e in elemlist] 2733 2734 def _subelem_tags(self, elem): 2735 return self._elem_tags(list(elem)) 2736 2737 def _make_elem_with_children(self, numchildren): 2738 """Create an Element with a tag 'a', with the given amount of children 2739 named 'a0', 'a1' ... and so on. 2740 2741 """ 2742 e = ET.Element('a') 2743 for i in range(numchildren): 2744 ET.SubElement(e, 'a%s' % i) 2745 return e 2746 2747 def test_getslice_single_index(self): 2748 e = self._make_elem_with_children(10) 2749 2750 self.assertEqual(e[1].tag, 'a1') 2751 self.assertEqual(e[-2].tag, 'a8') 2752 2753 self.assertRaises(IndexError, lambda: e[12]) 2754 self.assertRaises(IndexError, lambda: e[-12]) 2755 2756 def test_getslice_range(self): 2757 e = self._make_elem_with_children(6) 2758 2759 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 2760 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 2761 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 2762 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 2763 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 2764 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 2765 2766 def test_getslice_steps(self): 2767 e = self._make_elem_with_children(10) 2768 2769 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 2770 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 2771 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 2772 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 2773 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 2774 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 2775 2776 def test_getslice_negative_steps(self): 2777 e = self._make_elem_with_children(4) 2778 2779 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 2780 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 2781 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 2782 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 2783 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 2784 2785 def test_delslice(self): 2786 e = self._make_elem_with_children(4) 2787 del e[0:2] 2788 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 2789 2790 e = self._make_elem_with_children(4) 2791 del e[0:] 2792 self.assertEqual(self._subelem_tags(e), []) 2793 2794 e = self._make_elem_with_children(4) 2795 del e[::-1] 2796 self.assertEqual(self._subelem_tags(e), []) 2797 2798 e = self._make_elem_with_children(4) 2799 del e[::-2] 2800 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 2801 2802 e = self._make_elem_with_children(4) 2803 del e[1::2] 2804 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 2805 2806 e = self._make_elem_with_children(2) 2807 del e[::2] 2808 self.assertEqual(self._subelem_tags(e), ['a1']) 2809 2810 def test_setslice_single_index(self): 2811 e = self._make_elem_with_children(4) 2812 e[1] = ET.Element('b') 2813 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2814 2815 e[-2] = ET.Element('c') 2816 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 2817 2818 with self.assertRaises(IndexError): 2819 e[5] = ET.Element('d') 2820 with self.assertRaises(IndexError): 2821 e[-5] = ET.Element('d') 2822 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 2823 2824 def test_setslice_range(self): 2825 e = self._make_elem_with_children(4) 2826 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 2827 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 2828 2829 e = self._make_elem_with_children(4) 2830 e[1:3] = [ET.Element('b')] 2831 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 2832 2833 e = self._make_elem_with_children(4) 2834 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 2835 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 2836 2837 def test_setslice_steps(self): 2838 e = self._make_elem_with_children(6) 2839 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 2840 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 2841 2842 e = self._make_elem_with_children(6) 2843 with self.assertRaises(ValueError): 2844 e[1:5:2] = [ET.Element('b')] 2845 with self.assertRaises(ValueError): 2846 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 2847 with self.assertRaises(ValueError): 2848 e[1:5:2] = [] 2849 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 2850 2851 e = self._make_elem_with_children(4) 2852 e[1::sys.maxsize] = [ET.Element('b')] 2853 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2854 e[1::sys.maxsize<<64] = [ET.Element('c')] 2855 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 2856 2857 def test_setslice_negative_steps(self): 2858 e = self._make_elem_with_children(4) 2859 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 2860 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 2861 2862 e = self._make_elem_with_children(4) 2863 with self.assertRaises(ValueError): 2864 e[2:0:-1] = [ET.Element('b')] 2865 with self.assertRaises(ValueError): 2866 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 2867 with self.assertRaises(ValueError): 2868 e[2:0:-1] = [] 2869 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 2870 2871 e = self._make_elem_with_children(4) 2872 e[1::-sys.maxsize] = [ET.Element('b')] 2873 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2874 e[1::-sys.maxsize-1] = [ET.Element('c')] 2875 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 2876 e[1::-sys.maxsize<<64] = [ET.Element('d')] 2877 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 2878 2879 2880 class IOTest(unittest.TestCase): 2881 def test_encoding(self): 2882 # Test encoding issues. 2883 elem = ET.Element("tag") 2884 elem.text = "abc" 2885 self.assertEqual(serialize(elem), '<tag>abc</tag>') 2886 for enc in ("utf-8", "us-ascii"): 2887 with self.subTest(enc): 2888 self.assertEqual(serialize(elem, encoding=enc), 2889 b'<tag>abc</tag>') 2890 self.assertEqual(serialize(elem, encoding=enc.upper()), 2891 b'<tag>abc</tag>') 2892 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2893 with self.subTest(enc): 2894 self.assertEqual(serialize(elem, encoding=enc), 2895 ("<?xml version='1.0' encoding='%s'?>\n" 2896 "<tag>abc</tag>" % enc).encode(enc)) 2897 upper = enc.upper() 2898 self.assertEqual(serialize(elem, encoding=upper), 2899 ("<?xml version='1.0' encoding='%s'?>\n" 2900 "<tag>abc</tag>" % upper).encode(enc)) 2901 2902 elem = ET.Element("tag") 2903 elem.text = "<&\"\'>" 2904 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 2905 self.assertEqual(serialize(elem, encoding="utf-8"), 2906 b'<tag><&"\'></tag>') 2907 self.assertEqual(serialize(elem, encoding="us-ascii"), 2908 b'<tag><&"\'></tag>') 2909 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2910 self.assertEqual(serialize(elem, encoding=enc), 2911 ("<?xml version='1.0' encoding='%s'?>\n" 2912 "<tag><&\"'></tag>" % enc).encode(enc)) 2913 2914 elem = ET.Element("tag") 2915 elem.attrib["key"] = "<&\"\'>" 2916 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 2917 self.assertEqual(serialize(elem, encoding="utf-8"), 2918 b'<tag key="<&"\'>" />') 2919 self.assertEqual(serialize(elem, encoding="us-ascii"), 2920 b'<tag key="<&"\'>" />') 2921 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2922 self.assertEqual(serialize(elem, encoding=enc), 2923 ("<?xml version='1.0' encoding='%s'?>\n" 2924 "<tag key=\"<&"'>\" />" % enc).encode(enc)) 2925 2926 elem = ET.Element("tag") 2927 elem.text = '\xe5\xf6\xf6<>' 2928 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>') 2929 self.assertEqual(serialize(elem, encoding="utf-8"), 2930 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 2931 self.assertEqual(serialize(elem, encoding="us-ascii"), 2932 b'<tag>åöö<></tag>') 2933 for enc in ("iso-8859-1", "utf-16", "utf-32"): 2934 self.assertEqual(serialize(elem, encoding=enc), 2935 ("<?xml version='1.0' encoding='%s'?>\n" 2936 "<tag><></tag>" % enc).encode(enc)) 2937 2938 elem = ET.Element("tag") 2939 elem.attrib["key"] = '\xe5\xf6\xf6<>' 2940 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />') 2941 self.assertEqual(serialize(elem, encoding="utf-8"), 2942 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 2943 self.assertEqual(serialize(elem, encoding="us-ascii"), 2944 b'<tag key="åöö<>" />') 2945 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"): 2946 self.assertEqual(serialize(elem, encoding=enc), 2947 ("<?xml version='1.0' encoding='%s'?>\n" 2948 "<tag key=\"<>\" />" % enc).encode(enc)) 2949 2950 def test_write_to_filename(self): 2951 self.addCleanup(support.unlink, TESTFN) 2952 tree = ET.ElementTree(ET.XML('''<site />''')) 2953 tree.write(TESTFN) 2954 with open(TESTFN, 'rb') as f: 2955 self.assertEqual(f.read(), b'''<site />''') 2956 2957 def test_write_to_text_file(self): 2958 self.addCleanup(support.unlink, TESTFN) 2959 tree = ET.ElementTree(ET.XML('''<site />''')) 2960 with open(TESTFN, 'w', encoding='utf-8') as f: 2961 tree.write(f, encoding='unicode') 2962 self.assertFalse(f.closed) 2963 with open(TESTFN, 'rb') as f: 2964 self.assertEqual(f.read(), b'''<site />''') 2965 2966 def test_write_to_binary_file(self): 2967 self.addCleanup(support.unlink, TESTFN) 2968 tree = ET.ElementTree(ET.XML('''<site />''')) 2969 with open(TESTFN, 'wb') as f: 2970 tree.write(f) 2971 self.assertFalse(f.closed) 2972 with open(TESTFN, 'rb') as f: 2973 self.assertEqual(f.read(), b'''<site />''') 2974 2975 def test_write_to_binary_file_with_bom(self): 2976 self.addCleanup(support.unlink, TESTFN) 2977 tree = ET.ElementTree(ET.XML('''<site />''')) 2978 # test BOM writing to buffered file 2979 with open(TESTFN, 'wb') as f: 2980 tree.write(f, encoding='utf-16') 2981 self.assertFalse(f.closed) 2982 with open(TESTFN, 'rb') as f: 2983 self.assertEqual(f.read(), 2984 '''<?xml version='1.0' encoding='utf-16'?>\n''' 2985 '''<site />'''.encode("utf-16")) 2986 # test BOM writing to non-buffered file 2987 with open(TESTFN, 'wb', buffering=0) as f: 2988 tree.write(f, encoding='utf-16') 2989 self.assertFalse(f.closed) 2990 with open(TESTFN, 'rb') as f: 2991 self.assertEqual(f.read(), 2992 '''<?xml version='1.0' encoding='utf-16'?>\n''' 2993 '''<site />'''.encode("utf-16")) 2994 2995 def test_read_from_stringio(self): 2996 tree = ET.ElementTree() 2997 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 2998 tree.parse(stream) 2999 self.assertEqual(tree.getroot().tag, 'site') 3000 3001 def test_write_to_stringio(self): 3002 tree = ET.ElementTree(ET.XML('''<site />''')) 3003 stream = io.StringIO() 3004 tree.write(stream, encoding='unicode') 3005 self.assertEqual(stream.getvalue(), '''<site />''') 3006 3007 def test_read_from_bytesio(self): 3008 tree = ET.ElementTree() 3009 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3010 tree.parse(raw) 3011 self.assertEqual(tree.getroot().tag, 'site') 3012 3013 def test_write_to_bytesio(self): 3014 tree = ET.ElementTree(ET.XML('''<site />''')) 3015 raw = io.BytesIO() 3016 tree.write(raw) 3017 self.assertEqual(raw.getvalue(), b'''<site />''') 3018 3019 class dummy: 3020 pass 3021 3022 def test_read_from_user_text_reader(self): 3023 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3024 reader = self.dummy() 3025 reader.read = stream.read 3026 tree = ET.ElementTree() 3027 tree.parse(reader) 3028 self.assertEqual(tree.getroot().tag, 'site') 3029 3030 def test_write_to_user_text_writer(self): 3031 tree = ET.ElementTree(ET.XML('''<site />''')) 3032 stream = io.StringIO() 3033 writer = self.dummy() 3034 writer.write = stream.write 3035 tree.write(writer, encoding='unicode') 3036 self.assertEqual(stream.getvalue(), '''<site />''') 3037 3038 def test_read_from_user_binary_reader(self): 3039 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3040 reader = self.dummy() 3041 reader.read = raw.read 3042 tree = ET.ElementTree() 3043 tree.parse(reader) 3044 self.assertEqual(tree.getroot().tag, 'site') 3045 tree = ET.ElementTree() 3046 3047 def test_write_to_user_binary_writer(self): 3048 tree = ET.ElementTree(ET.XML('''<site />''')) 3049 raw = io.BytesIO() 3050 writer = self.dummy() 3051 writer.write = raw.write 3052 tree.write(writer) 3053 self.assertEqual(raw.getvalue(), b'''<site />''') 3054 3055 def test_write_to_user_binary_writer_with_bom(self): 3056 tree = ET.ElementTree(ET.XML('''<site />''')) 3057 raw = io.BytesIO() 3058 writer = self.dummy() 3059 writer.write = raw.write 3060 writer.seekable = lambda: True 3061 writer.tell = raw.tell 3062 tree.write(writer, encoding="utf-16") 3063 self.assertEqual(raw.getvalue(), 3064 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3065 '''<site />'''.encode("utf-16")) 3066 3067 def test_tostringlist_invariant(self): 3068 root = ET.fromstring('<tag>foo</tag>') 3069 self.assertEqual( 3070 ET.tostring(root, 'unicode'), 3071 ''.join(ET.tostringlist(root, 'unicode'))) 3072 self.assertEqual( 3073 ET.tostring(root, 'utf-16'), 3074 b''.join(ET.tostringlist(root, 'utf-16'))) 3075 3076 def test_short_empty_elements(self): 3077 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>') 3078 self.assertEqual( 3079 ET.tostring(root, 'unicode'), 3080 '<tag>a<x />b<y />c</tag>') 3081 self.assertEqual( 3082 ET.tostring(root, 'unicode', short_empty_elements=True), 3083 '<tag>a<x />b<y />c</tag>') 3084 self.assertEqual( 3085 ET.tostring(root, 'unicode', short_empty_elements=False), 3086 '<tag>a<x></x>b<y></y>c</tag>') 3087 3088 3089 class ParseErrorTest(unittest.TestCase): 3090 def test_subclass(self): 3091 self.assertIsInstance(ET.ParseError(), SyntaxError) 3092 3093 def _get_error(self, s): 3094 try: 3095 ET.fromstring(s) 3096 except ET.ParseError as e: 3097 return e 3098 3099 def test_error_position(self): 3100 self.assertEqual(self._get_error('foo').position, (1, 0)) 3101 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 3102 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 3103 3104 def test_error_code(self): 3105 import xml.parsers.expat.errors as ERRORS 3106 self.assertEqual(self._get_error('foo').code, 3107 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) 3108 3109 3110 class KeywordArgsTest(unittest.TestCase): 3111 # Test various issues with keyword arguments passed to ET.Element 3112 # constructor and methods 3113 def test_issue14818(self): 3114 x = ET.XML("<a>foo</a>") 3115 self.assertEqual(x.find('a', None), 3116 x.find(path='a', namespaces=None)) 3117 self.assertEqual(x.findtext('a', None, None), 3118 x.findtext(path='a', default=None, namespaces=None)) 3119 self.assertEqual(x.findall('a', None), 3120 x.findall(path='a', namespaces=None)) 3121 self.assertEqual(list(x.iterfind('a', None)), 3122 list(x.iterfind(path='a', namespaces=None))) 3123 3124 self.assertEqual(ET.Element('a').attrib, {}) 3125 elements = [ 3126 ET.Element('a', dict(href="#", id="foo")), 3127 ET.Element('a', attrib=dict(href="#", id="foo")), 3128 ET.Element('a', dict(href="#"), id="foo"), 3129 ET.Element('a', href="#", id="foo"), 3130 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 3131 ] 3132 for e in elements: 3133 self.assertEqual(e.tag, 'a') 3134 self.assertEqual(e.attrib, dict(href="#", id="foo")) 3135 3136 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 3137 self.assertEqual(e2.attrib['key1'], 'value1') 3138 3139 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3140 ET.Element('a', "I'm not a dict") 3141 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3142 ET.Element('a', attrib="I'm not a dict") 3143 3144 # -------------------------------------------------------------------- 3145 3146 class NoAcceleratorTest(unittest.TestCase): 3147 def setUp(self): 3148 if not pyET: 3149 raise unittest.SkipTest('only for the Python version') 3150 3151 # Test that the C accelerator was not imported for pyET 3152 def test_correct_import_pyET(self): 3153 # The type of methods defined in Python code is types.FunctionType, 3154 # while the type of methods defined inside _elementtree is 3155 # <class 'wrapper_descriptor'> 3156 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 3157 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 3158 3159 # -------------------------------------------------------------------- 3160 3161 3162 def test_main(module=None): 3163 # When invoked without a module, runs the Python ET tests by loading pyET. 3164 # Otherwise, uses the given module as the ET. 3165 global pyET 3166 pyET = import_fresh_module('xml.etree.ElementTree', 3167 blocked=['_elementtree']) 3168 if module is None: 3169 module = pyET 3170 3171 global ET 3172 ET = module 3173 3174 test_classes = [ 3175 ModuleTest, 3176 ElementSlicingTest, 3177 BasicElementTest, 3178 BadElementTest, 3179 BadElementPathTest, 3180 ElementTreeTest, 3181 IOTest, 3182 ParseErrorTest, 3183 XIncludeTest, 3184 ElementTreeTypeTest, 3185 ElementFindTest, 3186 ElementIterTest, 3187 TreeBuilderTest, 3188 XMLParserTest, 3189 XMLPullParserTest, 3190 BugsTest, 3191 ] 3192 3193 # These tests will only run for the pure-Python version that doesn't import 3194 # _elementtree. We can't use skipUnless here, because pyET is filled in only 3195 # after the module is loaded. 3196 if pyET is not ET: 3197 test_classes.extend([ 3198 NoAcceleratorTest, 3199 ]) 3200 3201 # Provide default namespace mapping and path cache. 3202 from xml.etree import ElementPath 3203 nsmap = ET.register_namespace._namespace_map 3204 # Copy the default namespace mapping 3205 nsmap_copy = nsmap.copy() 3206 # Copy the path cache (should be empty) 3207 path_cache = ElementPath._cache 3208 ElementPath._cache = path_cache.copy() 3209 try: 3210 support.run_unittest(*test_classes) 3211 finally: 3212 from xml.etree import ElementPath 3213 # Restore mapping and path cache 3214 nsmap.clear() 3215 nsmap.update(nsmap_copy) 3216 ElementPath._cache = path_cache 3217 # don't interfere with subsequent tests 3218 ET = pyET = None 3219 3220 3221 if __name__ == '__main__': 3222 test_main() 3223