1 # xml.etree test. This file contains enough tests to make sure that 2 # all included components work as they should. 3 # Large parts are extracted from the upstream test suite. 4 5 # IMPORTANT: the same doctests are run from "test_xml_etree_c" in 6 # order to ensure consistency between the C implementation and the 7 # Python implementation. 8 # 9 # For this purpose, the module-level "ET" symbol is temporarily 10 # monkey-patched when running the "test_xml_etree_c" test suite. 11 # Don't re-import "xml.etree.ElementTree" module in the docstring, 12 # except if the test is specific to the Python implementation. 13 14 import sys 15 import cgi 16 17 from test import test_support 18 from test.test_support import findfile 19 20 from xml.etree import ElementTree as ET 21 22 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 23 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 24 25 SAMPLE_XML = """\ 26 <body> 27 <tag class='a'>text</tag> 28 <tag class='b' /> 29 <section> 30 <tag class='b' id='inner'>subtext</tag> 31 </section> 32 </body> 33 """ 34 35 SAMPLE_SECTION = """\ 36 <section> 37 <tag class='b' id='inner'>subtext</tag> 38 <nexttag /> 39 <nextsection> 40 <tag /> 41 </nextsection> 42 </section> 43 """ 44 45 SAMPLE_XML_NS = """ 46 <body xmlns="http://effbot.org/ns"> 47 <tag>text</tag> 48 <tag /> 49 <section> 50 <tag>subtext</tag> 51 </section> 52 </body> 53 """ 54 55 56 def sanity(): 57 """ 58 Import sanity. 59 60 >>> from xml.etree import ElementTree 61 >>> from xml.etree import ElementInclude 62 >>> from xml.etree import ElementPath 63 """ 64 65 def check_method(method): 66 if not hasattr(method, '__call__'): 67 print method, "not callable" 68 69 def serialize(elem, to_string=True, **options): 70 import StringIO 71 file = StringIO.StringIO() 72 tree = ET.ElementTree(elem) 73 tree.write(file, **options) 74 if to_string: 75 return file.getvalue() 76 else: 77 file.seek(0) 78 return file 79 80 def summarize(elem): 81 if elem.tag == ET.Comment: 82 return "<Comment>" 83 return elem.tag 84 85 def summarize_list(seq): 86 return [summarize(elem) for elem in seq] 87 88 def normalize_crlf(tree): 89 for elem in tree.iter(): 90 if elem.text: 91 elem.text = elem.text.replace("\r\n", "\n") 92 if elem.tail: 93 elem.tail = elem.tail.replace("\r\n", "\n") 94 95 def check_string(string): 96 len(string) 97 for char in string: 98 if len(char) != 1: 99 print "expected one-character string, got %r" % char 100 new_string = string + "" 101 new_string = string + " " 102 string[:0] 103 104 def check_mapping(mapping): 105 len(mapping) 106 keys = mapping.keys() 107 items = mapping.items() 108 for key in keys: 109 item = mapping[key] 110 mapping["key"] = "value" 111 if mapping["key"] != "value": 112 print "expected value string, got %r" % mapping["key"] 113 114 def check_element(element): 115 if not ET.iselement(element): 116 print "not an element" 117 if not hasattr(element, "tag"): 118 print "no tag member" 119 if not hasattr(element, "attrib"): 120 print "no attrib member" 121 if not hasattr(element, "text"): 122 print "no text member" 123 if not hasattr(element, "tail"): 124 print "no tail member" 125 126 check_string(element.tag) 127 check_mapping(element.attrib) 128 if element.text is not None: 129 check_string(element.text) 130 if element.tail is not None: 131 check_string(element.tail) 132 for elem in element: 133 check_element(elem) 134 135 # -------------------------------------------------------------------- 136 # element tree tests 137 138 def interface(): 139 r""" 140 Test element tree interface. 141 142 >>> element = ET.Element("tag") 143 >>> check_element(element) 144 >>> tree = ET.ElementTree(element) 145 >>> check_element(tree.getroot()) 146 147 >>> element = ET.Element("t\xe4g", key="value") 148 >>> tree = ET.ElementTree(element) 149 >>> repr(element) # doctest: +ELLIPSIS 150 "<Element 't\\xe4g' at 0x...>" 151 >>> element = ET.Element("tag", key="value") 152 153 Make sure all standard element methods exist. 154 155 >>> check_method(element.append) 156 >>> check_method(element.extend) 157 >>> check_method(element.insert) 158 >>> check_method(element.remove) 159 >>> check_method(element.getchildren) 160 >>> check_method(element.find) 161 >>> check_method(element.iterfind) 162 >>> check_method(element.findall) 163 >>> check_method(element.findtext) 164 >>> check_method(element.clear) 165 >>> check_method(element.get) 166 >>> check_method(element.set) 167 >>> check_method(element.keys) 168 >>> check_method(element.items) 169 >>> check_method(element.iter) 170 >>> check_method(element.itertext) 171 >>> check_method(element.getiterator) 172 173 These methods return an iterable. See bug 6472. 174 175 >>> check_method(element.iter("tag").next) 176 >>> check_method(element.iterfind("tag").next) 177 >>> check_method(element.iterfind("*").next) 178 >>> check_method(tree.iter("tag").next) 179 >>> check_method(tree.iterfind("tag").next) 180 >>> check_method(tree.iterfind("*").next) 181 182 These aliases are provided: 183 184 >>> assert ET.XML == ET.fromstring 185 >>> assert ET.PI == ET.ProcessingInstruction 186 >>> assert ET.XMLParser == ET.XMLTreeBuilder 187 """ 188 189 def simpleops(): 190 """ 191 Basic method sanity checks. 192 193 >>> elem = ET.XML("<body><tag/></body>") 194 >>> serialize(elem) 195 '<body><tag /></body>' 196 >>> e = ET.Element("tag2") 197 >>> elem.append(e) 198 >>> serialize(elem) 199 '<body><tag /><tag2 /></body>' 200 >>> elem.remove(e) 201 >>> serialize(elem) 202 '<body><tag /></body>' 203 >>> elem.insert(0, e) 204 >>> serialize(elem) 205 '<body><tag2 /><tag /></body>' 206 >>> elem.remove(e) 207 >>> elem.extend([e]) 208 >>> serialize(elem) 209 '<body><tag /><tag2 /></body>' 210 >>> elem.remove(e) 211 212 >>> element = ET.Element("tag", key="value") 213 >>> serialize(element) # 1 214 '<tag key="value" />' 215 >>> subelement = ET.Element("subtag") 216 >>> element.append(subelement) 217 >>> serialize(element) # 2 218 '<tag key="value"><subtag /></tag>' 219 >>> element.insert(0, subelement) 220 >>> serialize(element) # 3 221 '<tag key="value"><subtag /><subtag /></tag>' 222 >>> element.remove(subelement) 223 >>> serialize(element) # 4 224 '<tag key="value"><subtag /></tag>' 225 >>> element.remove(subelement) 226 >>> serialize(element) # 5 227 '<tag key="value" />' 228 >>> element.remove(subelement) 229 Traceback (most recent call last): 230 ValueError: list.remove(x): x not in list 231 >>> serialize(element) # 6 232 '<tag key="value" />' 233 >>> element[0:0] = [subelement, subelement, subelement] 234 >>> serialize(element[1]) 235 '<subtag />' 236 >>> element[1:9] == [element[1], element[2]] 237 True 238 >>> element[:9:2] == [element[0], element[2]] 239 True 240 >>> del element[1:2] 241 >>> serialize(element) 242 '<tag key="value"><subtag /><subtag /></tag>' 243 """ 244 245 def cdata(): 246 """ 247 Test CDATA handling (etc). 248 249 >>> serialize(ET.XML("<tag>hello</tag>")) 250 '<tag>hello</tag>' 251 >>> serialize(ET.XML("<tag>hello</tag>")) 252 '<tag>hello</tag>' 253 >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>")) 254 '<tag>hello</tag>' 255 """ 256 257 # Only with Python implementation 258 def simplefind(): 259 """ 260 Test find methods using the elementpath fallback. 261 262 >>> from xml.etree import ElementTree 263 264 >>> CurrentElementPath = ElementTree.ElementPath 265 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath() 266 >>> elem = ElementTree.XML(SAMPLE_XML) 267 >>> elem.find("tag").tag 268 'tag' 269 >>> ElementTree.ElementTree(elem).find("tag").tag 270 'tag' 271 >>> elem.findtext("tag") 272 'text' 273 >>> elem.findtext("tog") 274 >>> elem.findtext("tog", "default") 275 'default' 276 >>> ElementTree.ElementTree(elem).findtext("tag") 277 'text' 278 >>> summarize_list(elem.findall("tag")) 279 ['tag', 'tag'] 280 >>> summarize_list(elem.findall(".//tag")) 281 ['tag', 'tag', 'tag'] 282 283 Path syntax doesn't work in this case. 284 285 >>> elem.find("section/tag") 286 >>> elem.findtext("section/tag") 287 >>> summarize_list(elem.findall("section/tag")) 288 [] 289 290 >>> ElementTree.ElementPath = CurrentElementPath 291 """ 292 293 def find(): 294 """ 295 Test find methods (including xpath syntax). 296 297 >>> elem = ET.XML(SAMPLE_XML) 298 >>> elem.find("tag").tag 299 'tag' 300 >>> ET.ElementTree(elem).find("tag").tag 301 'tag' 302 >>> elem.find("section/tag").tag 303 'tag' 304 >>> elem.find("./tag").tag 305 'tag' 306 >>> ET.ElementTree(elem).find("./tag").tag 307 'tag' 308 >>> ET.ElementTree(elem).find("/tag").tag 309 'tag' 310 >>> elem[2] = ET.XML(SAMPLE_SECTION) 311 >>> elem.find("section/nexttag").tag 312 'nexttag' 313 >>> ET.ElementTree(elem).find("section/tag").tag 314 'tag' 315 >>> ET.ElementTree(elem).find("tog") 316 >>> ET.ElementTree(elem).find("tog/foo") 317 >>> elem.findtext("tag") 318 'text' 319 >>> elem.findtext("section/nexttag") 320 '' 321 >>> elem.findtext("section/nexttag", "default") 322 '' 323 >>> elem.findtext("tog") 324 >>> elem.findtext("tog", "default") 325 'default' 326 >>> ET.ElementTree(elem).findtext("tag") 327 'text' 328 >>> ET.ElementTree(elem).findtext("tog/foo") 329 >>> ET.ElementTree(elem).findtext("tog/foo", "default") 330 'default' 331 >>> ET.ElementTree(elem).findtext("./tag") 332 'text' 333 >>> ET.ElementTree(elem).findtext("/tag") 334 'text' 335 >>> elem.findtext("section/tag") 336 'subtext' 337 >>> ET.ElementTree(elem).findtext("section/tag") 338 'subtext' 339 >>> summarize_list(elem.findall(".")) 340 ['body'] 341 >>> summarize_list(elem.findall("tag")) 342 ['tag', 'tag'] 343 >>> summarize_list(elem.findall("tog")) 344 [] 345 >>> summarize_list(elem.findall("tog/foo")) 346 [] 347 >>> summarize_list(elem.findall("*")) 348 ['tag', 'tag', 'section'] 349 >>> summarize_list(elem.findall(".//tag")) 350 ['tag', 'tag', 'tag', 'tag'] 351 >>> summarize_list(elem.findall("section/tag")) 352 ['tag'] 353 >>> summarize_list(elem.findall("section//tag")) 354 ['tag', 'tag'] 355 >>> summarize_list(elem.findall("section/*")) 356 ['tag', 'nexttag', 'nextsection'] 357 >>> summarize_list(elem.findall("section//*")) 358 ['tag', 'nexttag', 'nextsection', 'tag'] 359 >>> summarize_list(elem.findall("section/.//*")) 360 ['tag', 'nexttag', 'nextsection', 'tag'] 361 >>> summarize_list(elem.findall("*/*")) 362 ['tag', 'nexttag', 'nextsection'] 363 >>> summarize_list(elem.findall("*//*")) 364 ['tag', 'nexttag', 'nextsection', 'tag'] 365 >>> summarize_list(elem.findall("*/tag")) 366 ['tag'] 367 >>> summarize_list(elem.findall("*/./tag")) 368 ['tag'] 369 >>> summarize_list(elem.findall("./tag")) 370 ['tag', 'tag'] 371 >>> summarize_list(elem.findall(".//tag")) 372 ['tag', 'tag', 'tag', 'tag'] 373 >>> summarize_list(elem.findall("././tag")) 374 ['tag', 'tag'] 375 >>> summarize_list(elem.findall(".//tag[@class]")) 376 ['tag', 'tag', 'tag'] 377 >>> summarize_list(elem.findall(".//tag[@class='a']")) 378 ['tag'] 379 >>> summarize_list(elem.findall(".//tag[@class='b']")) 380 ['tag', 'tag'] 381 >>> summarize_list(elem.findall(".//tag[@id]")) 382 ['tag'] 383 >>> summarize_list(elem.findall(".//section[tag]")) 384 ['section'] 385 >>> summarize_list(elem.findall(".//section[element]")) 386 [] 387 >>> summarize_list(elem.findall("../tag")) 388 [] 389 >>> summarize_list(elem.findall("section/../tag")) 390 ['tag', 'tag'] 391 >>> summarize_list(ET.ElementTree(elem).findall("./tag")) 392 ['tag', 'tag'] 393 394 Following example is invalid in 1.2. 395 A leading '*' is assumed in 1.3. 396 397 >>> elem.findall("section//") == elem.findall("section//*") 398 True 399 400 ET's Path module handles this case incorrectly; this gives 401 a warning in 1.3, and the behaviour will be modified in 1.4. 402 403 >>> summarize_list(ET.ElementTree(elem).findall("/tag")) 404 ['tag', 'tag'] 405 406 >>> elem = ET.XML(SAMPLE_XML_NS) 407 >>> summarize_list(elem.findall("tag")) 408 [] 409 >>> summarize_list(elem.findall("{http://effbot.org/ns}tag")) 410 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 411 >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag")) 412 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 413 """ 414 415 def file_init(): 416 """ 417 >>> import StringIO 418 419 >>> stringfile = StringIO.StringIO(SAMPLE_XML) 420 >>> tree = ET.ElementTree(file=stringfile) 421 >>> tree.find("tag").tag 422 'tag' 423 >>> tree.find("section/tag").tag 424 'tag' 425 426 >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE) 427 >>> tree.find("element").tag 428 'element' 429 >>> tree.find("element/../empty-element").tag 430 'empty-element' 431 """ 432 433 def bad_find(): 434 """ 435 Check bad or unsupported path expressions. 436 437 >>> elem = ET.XML(SAMPLE_XML) 438 >>> elem.findall("/tag") 439 Traceback (most recent call last): 440 SyntaxError: cannot use absolute path on element 441 """ 442 443 def path_cache(): 444 """ 445 Check that the path cache behaves sanely. 446 447 >>> elem = ET.XML(SAMPLE_XML) 448 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 449 >>> cache_len_10 = len(ET.ElementPath._cache) 450 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 451 >>> len(ET.ElementPath._cache) == cache_len_10 452 True 453 >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 454 >>> len(ET.ElementPath._cache) > cache_len_10 455 True 456 >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 457 >>> len(ET.ElementPath._cache) < 500 458 True 459 """ 460 461 def copy(): 462 """ 463 Test copy handling (etc). 464 465 >>> import copy 466 >>> e1 = ET.XML("<tag>hello<foo/></tag>") 467 >>> e2 = copy.copy(e1) 468 >>> e3 = copy.deepcopy(e1) 469 >>> e1.find("foo").tag = "bar" 470 >>> serialize(e1) 471 '<tag>hello<bar /></tag>' 472 >>> serialize(e2) 473 '<tag>hello<bar /></tag>' 474 >>> serialize(e3) 475 '<tag>hello<foo /></tag>' 476 477 """ 478 479 def attrib(): 480 """ 481 Test attribute handling. 482 483 >>> elem = ET.Element("tag") 484 >>> elem.get("key") # 1.1 485 >>> elem.get("key", "default") # 1.2 486 'default' 487 >>> elem.set("key", "value") 488 >>> elem.get("key") # 1.3 489 'value' 490 491 >>> elem = ET.Element("tag", key="value") 492 >>> elem.get("key") # 2.1 493 'value' 494 >>> elem.attrib # 2.2 495 {'key': 'value'} 496 497 >>> attrib = {"key": "value"} 498 >>> elem = ET.Element("tag", attrib) 499 >>> attrib.clear() # check for aliasing issues 500 >>> elem.get("key") # 3.1 501 'value' 502 >>> elem.attrib # 3.2 503 {'key': 'value'} 504 505 >>> attrib = {"key": "value"} 506 >>> elem = ET.Element("tag", **attrib) 507 >>> attrib.clear() # check for aliasing issues 508 >>> elem.get("key") # 4.1 509 'value' 510 >>> elem.attrib # 4.2 511 {'key': 'value'} 512 513 >>> elem = ET.Element("tag", {"key": "other"}, key="value") 514 >>> elem.get("key") # 5.1 515 'value' 516 >>> elem.attrib # 5.2 517 {'key': 'value'} 518 519 >>> elem = ET.Element('test') 520 >>> elem.text = "aa" 521 >>> elem.set('testa', 'testval') 522 >>> elem.set('testb', 'test2') 523 >>> ET.tostring(elem) 524 '<test testa="testval" testb="test2">aa</test>' 525 >>> sorted(elem.keys()) 526 ['testa', 'testb'] 527 >>> sorted(elem.items()) 528 [('testa', 'testval'), ('testb', 'test2')] 529 >>> elem.attrib['testb'] 530 'test2' 531 >>> elem.attrib['testb'] = 'test1' 532 >>> elem.attrib['testc'] = 'test2' 533 >>> ET.tostring(elem) 534 '<test testa="testval" testb="test1" testc="test2">aa</test>' 535 """ 536 537 def makeelement(): 538 """ 539 Test makeelement handling. 540 541 >>> elem = ET.Element("tag") 542 >>> attrib = {"key": "value"} 543 >>> subelem = elem.makeelement("subtag", attrib) 544 >>> if subelem.attrib is attrib: 545 ... print "attrib aliasing" 546 >>> elem.append(subelem) 547 >>> serialize(elem) 548 '<tag><subtag key="value" /></tag>' 549 550 >>> elem.clear() 551 >>> serialize(elem) 552 '<tag />' 553 >>> elem.append(subelem) 554 >>> serialize(elem) 555 '<tag><subtag key="value" /></tag>' 556 >>> elem.extend([subelem, subelem]) 557 >>> serialize(elem) 558 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>' 559 >>> elem[:] = [subelem] 560 >>> serialize(elem) 561 '<tag><subtag key="value" /></tag>' 562 >>> elem[:] = tuple([subelem]) 563 >>> serialize(elem) 564 '<tag><subtag key="value" /></tag>' 565 566 """ 567 568 def parsefile(): 569 """ 570 Test parsing from file. 571 572 >>> tree = ET.parse(SIMPLE_XMLFILE) 573 >>> normalize_crlf(tree) 574 >>> tree.write(sys.stdout) 575 <root> 576 <element key="value">text</element> 577 <element>text</element>tail 578 <empty-element /> 579 </root> 580 >>> tree = ET.parse(SIMPLE_NS_XMLFILE) 581 >>> normalize_crlf(tree) 582 >>> tree.write(sys.stdout) 583 <ns0:root xmlns:ns0="namespace"> 584 <ns0:element key="value">text</ns0:element> 585 <ns0:element>text</ns0:element>tail 586 <ns0:empty-element /> 587 </ns0:root> 588 589 >>> with open(SIMPLE_XMLFILE) as f: 590 ... data = f.read() 591 592 >>> parser = ET.XMLParser() 593 >>> parser.version # doctest: +ELLIPSIS 594 'Expat ...' 595 >>> parser.feed(data) 596 >>> print serialize(parser.close()) 597 <root> 598 <element key="value">text</element> 599 <element>text</element>tail 600 <empty-element /> 601 </root> 602 603 >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility 604 >>> parser.feed(data) 605 >>> print serialize(parser.close()) 606 <root> 607 <element key="value">text</element> 608 <element>text</element>tail 609 <empty-element /> 610 </root> 611 612 >>> target = ET.TreeBuilder() 613 >>> parser = ET.XMLParser(target=target) 614 >>> parser.feed(data) 615 >>> print serialize(parser.close()) 616 <root> 617 <element key="value">text</element> 618 <element>text</element>tail 619 <empty-element /> 620 </root> 621 """ 622 623 def parseliteral(): 624 """ 625 >>> element = ET.XML("<html><body>text</body></html>") 626 >>> ET.ElementTree(element).write(sys.stdout) 627 <html><body>text</body></html> 628 >>> element = ET.fromstring("<html><body>text</body></html>") 629 >>> ET.ElementTree(element).write(sys.stdout) 630 <html><body>text</body></html> 631 >>> sequence = ["<html><body>", "text</bo", "dy></html>"] 632 >>> element = ET.fromstringlist(sequence) 633 >>> print ET.tostring(element) 634 <html><body>text</body></html> 635 >>> print "".join(ET.tostringlist(element)) 636 <html><body>text</body></html> 637 >>> ET.tostring(element, "ascii") 638 "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>" 639 >>> _, ids = ET.XMLID("<html><body>text</body></html>") 640 >>> len(ids) 641 0 642 >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 643 >>> len(ids) 644 1 645 >>> ids["body"].tag 646 'body' 647 """ 648 649 def iterparse(): 650 """ 651 Test iterparse interface. 652 653 >>> iterparse = ET.iterparse 654 655 >>> context = iterparse(SIMPLE_XMLFILE) 656 >>> action, elem = next(context) 657 >>> print action, elem.tag 658 end element 659 >>> for action, elem in context: 660 ... print action, elem.tag 661 end element 662 end empty-element 663 end root 664 >>> context.root.tag 665 'root' 666 667 >>> context = iterparse(SIMPLE_NS_XMLFILE) 668 >>> for action, elem in context: 669 ... print action, elem.tag 670 end {namespace}element 671 end {namespace}element 672 end {namespace}empty-element 673 end {namespace}root 674 675 >>> events = () 676 >>> context = iterparse(SIMPLE_XMLFILE, events) 677 >>> for action, elem in context: 678 ... print action, elem.tag 679 680 >>> events = () 681 >>> context = iterparse(SIMPLE_XMLFILE, events=events) 682 >>> for action, elem in context: 683 ... print action, elem.tag 684 685 >>> events = ("start", "end") 686 >>> context = iterparse(SIMPLE_XMLFILE, events) 687 >>> for action, elem in context: 688 ... print action, elem.tag 689 start root 690 start element 691 end element 692 start element 693 end element 694 start empty-element 695 end empty-element 696 end root 697 698 >>> events = ("start", "end", "start-ns", "end-ns") 699 >>> context = iterparse(SIMPLE_NS_XMLFILE, events) 700 >>> for action, elem in context: 701 ... if action in ("start", "end"): 702 ... print action, elem.tag 703 ... else: 704 ... print action, elem 705 start-ns ('', 'namespace') 706 start {namespace}root 707 start {namespace}element 708 end {namespace}element 709 start {namespace}element 710 end {namespace}element 711 start {namespace}empty-element 712 end {namespace}empty-element 713 end {namespace}root 714 end-ns None 715 716 >>> events = ("start", "end", "bogus") 717 >>> with open(SIMPLE_XMLFILE, "rb") as f: 718 ... iterparse(f, events) 719 Traceback (most recent call last): 720 ValueError: unknown event 'bogus' 721 722 >>> import StringIO 723 724 >>> source = StringIO.StringIO( 725 ... "<?xml version='1.0' encoding='iso-8859-1'?>\\n" 726 ... "<body xmlns='http://éffbot.org/ns'\\n" 727 ... " xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n") 728 >>> events = ("start-ns",) 729 >>> context = iterparse(source, events) 730 >>> for action, elem in context: 731 ... print action, elem 732 start-ns ('', u'http://\\xe9ffbot.org/ns') 733 start-ns (u'cl\\xe9', 'http://effbot.org/ns') 734 735 >>> source = StringIO.StringIO("<document />junk") 736 >>> try: 737 ... for action, elem in iterparse(source): 738 ... print action, elem.tag 739 ... except ET.ParseError, v: 740 ... print v 741 end document 742 junk after document element: line 1, column 12 743 """ 744 745 def writefile(): 746 """ 747 >>> elem = ET.Element("tag") 748 >>> elem.text = "text" 749 >>> serialize(elem) 750 '<tag>text</tag>' 751 >>> ET.SubElement(elem, "subtag").text = "subtext" 752 >>> serialize(elem) 753 '<tag>text<subtag>subtext</subtag></tag>' 754 755 Test tag suppression 756 >>> elem.tag = None 757 >>> serialize(elem) 758 'text<subtag>subtext</subtag>' 759 >>> elem.insert(0, ET.Comment("comment")) 760 >>> serialize(elem) # assumes 1.3 761 'text<!--comment--><subtag>subtext</subtag>' 762 >>> elem[0] = ET.PI("key", "value") 763 >>> serialize(elem) 764 'text<?key value?><subtag>subtext</subtag>' 765 """ 766 767 def custom_builder(): 768 """ 769 Test parser w. custom builder. 770 771 >>> with open(SIMPLE_XMLFILE) as f: 772 ... data = f.read() 773 >>> class Builder: 774 ... def start(self, tag, attrib): 775 ... print "start", tag 776 ... def end(self, tag): 777 ... print "end", tag 778 ... def data(self, text): 779 ... pass 780 >>> builder = Builder() 781 >>> parser = ET.XMLParser(target=builder) 782 >>> parser.feed(data) 783 start root 784 start element 785 end element 786 start element 787 end element 788 start empty-element 789 end empty-element 790 end root 791 792 >>> with open(SIMPLE_NS_XMLFILE) as f: 793 ... data = f.read() 794 >>> class Builder: 795 ... def start(self, tag, attrib): 796 ... print "start", tag 797 ... def end(self, tag): 798 ... print "end", tag 799 ... def data(self, text): 800 ... pass 801 ... def pi(self, target, data): 802 ... print "pi", target, repr(data) 803 ... def comment(self, data): 804 ... print "comment", repr(data) 805 >>> builder = Builder() 806 >>> parser = ET.XMLParser(target=builder) 807 >>> parser.feed(data) 808 pi pi 'data' 809 comment ' comment ' 810 start {namespace}root 811 start {namespace}element 812 end {namespace}element 813 start {namespace}element 814 end {namespace}element 815 start {namespace}empty-element 816 end {namespace}empty-element 817 end {namespace}root 818 819 """ 820 821 def getchildren(): 822 """ 823 Test Element.getchildren() 824 825 >>> with open(SIMPLE_XMLFILE, "r") as f: 826 ... tree = ET.parse(f) 827 >>> for elem in tree.getroot().iter(): 828 ... summarize_list(elem.getchildren()) 829 ['element', 'element', 'empty-element'] 830 [] 831 [] 832 [] 833 >>> for elem in tree.getiterator(): 834 ... summarize_list(elem.getchildren()) 835 ['element', 'element', 'empty-element'] 836 [] 837 [] 838 [] 839 840 >>> elem = ET.XML(SAMPLE_XML) 841 >>> len(elem.getchildren()) 842 3 843 >>> len(elem[2].getchildren()) 844 1 845 >>> elem[:] == elem.getchildren() 846 True 847 >>> child1 = elem[0] 848 >>> child2 = elem[2] 849 >>> del elem[1:2] 850 >>> len(elem.getchildren()) 851 2 852 >>> child1 == elem[0] 853 True 854 >>> child2 == elem[1] 855 True 856 >>> elem[0:2] = [child2, child1] 857 >>> child2 == elem[0] 858 True 859 >>> child1 == elem[1] 860 True 861 >>> child1 == elem[0] 862 False 863 >>> elem.clear() 864 >>> elem.getchildren() 865 [] 866 """ 867 868 def writestring(): 869 """ 870 >>> elem = ET.XML("<html><body>text</body></html>") 871 >>> ET.tostring(elem) 872 '<html><body>text</body></html>' 873 >>> elem = ET.fromstring("<html><body>text</body></html>") 874 >>> ET.tostring(elem) 875 '<html><body>text</body></html>' 876 """ 877 878 def check_encoding(encoding): 879 """ 880 >>> check_encoding("ascii") 881 >>> check_encoding("us-ascii") 882 >>> check_encoding("iso-8859-1") 883 >>> check_encoding("iso-8859-15") 884 >>> check_encoding("cp437") 885 >>> check_encoding("mac-roman") 886 """ 887 ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding) 888 889 def encoding(): 890 r""" 891 Test encoding issues. 892 893 >>> elem = ET.Element("tag") 894 >>> elem.text = u"abc" 895 >>> serialize(elem) 896 '<tag>abc</tag>' 897 >>> serialize(elem, encoding="utf-8") 898 '<tag>abc</tag>' 899 >>> serialize(elem, encoding="us-ascii") 900 '<tag>abc</tag>' 901 >>> serialize(elem, encoding="iso-8859-1") 902 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>" 903 904 >>> elem.text = "<&\"\'>" 905 >>> serialize(elem) 906 '<tag><&"\'></tag>' 907 >>> serialize(elem, encoding="utf-8") 908 '<tag><&"\'></tag>' 909 >>> serialize(elem, encoding="us-ascii") # cdata characters 910 '<tag><&"\'></tag>' 911 >>> serialize(elem, encoding="iso-8859-1") 912 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>' 913 914 >>> elem.attrib["key"] = "<&\"\'>" 915 >>> elem.text = None 916 >>> serialize(elem) 917 '<tag key="<&"\'>" />' 918 >>> serialize(elem, encoding="utf-8") 919 '<tag key="<&"\'>" />' 920 >>> serialize(elem, encoding="us-ascii") 921 '<tag key="<&"\'>" />' 922 >>> serialize(elem, encoding="iso-8859-1") 923 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>" />' 924 925 >>> elem.text = u'\xe5\xf6\xf6<>' 926 >>> elem.attrib.clear() 927 >>> serialize(elem) 928 '<tag>åöö<></tag>' 929 >>> serialize(elem, encoding="utf-8") 930 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>' 931 >>> serialize(elem, encoding="us-ascii") 932 '<tag>åöö<></tag>' 933 >>> serialize(elem, encoding="iso-8859-1") 934 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>" 935 936 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' 937 >>> elem.text = None 938 >>> serialize(elem) 939 '<tag key="åöö<>" />' 940 >>> serialize(elem, encoding="utf-8") 941 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />' 942 >>> serialize(elem, encoding="us-ascii") 943 '<tag key="åöö<>" />' 944 >>> serialize(elem, encoding="iso-8859-1") 945 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>" />' 946 """ 947 948 def methods(): 949 r""" 950 Test serialization methods. 951 952 >>> e = ET.XML("<html><link/><script>1 < 2</script></html>") 953 >>> e.tail = "\n" 954 >>> serialize(e) 955 '<html><link /><script>1 < 2</script></html>\n' 956 >>> serialize(e, method=None) 957 '<html><link /><script>1 < 2</script></html>\n' 958 >>> serialize(e, method="xml") 959 '<html><link /><script>1 < 2</script></html>\n' 960 >>> serialize(e, method="html") 961 '<html><link><script>1 < 2</script></html>\n' 962 >>> serialize(e, method="text") 963 '1 < 2\n' 964 """ 965 966 def iterators(): 967 """ 968 Test iterators. 969 970 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 971 >>> summarize_list(e.iter()) 972 ['html', 'body', 'i'] 973 >>> summarize_list(e.find("body").iter()) 974 ['body', 'i'] 975 >>> summarize(next(e.iter())) 976 'html' 977 >>> "".join(e.itertext()) 978 'this is a paragraph...' 979 >>> "".join(e.find("body").itertext()) 980 'this is a paragraph.' 981 >>> next(e.itertext()) 982 'this is a ' 983 984 Method iterparse should return an iterator. See bug 6472. 985 986 >>> sourcefile = serialize(e, to_string=False) 987 >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS 988 ('end', <Element 'i' at 0x...>) 989 990 >>> tree = ET.ElementTree(None) 991 >>> tree.iter() 992 Traceback (most recent call last): 993 AttributeError: 'NoneType' object has no attribute 'iter' 994 """ 995 996 ENTITY_XML = """\ 997 <!DOCTYPE points [ 998 <!ENTITY % user-entities SYSTEM 'user-entities.xml'> 999 %user-entities; 1000 ]> 1001 <document>&entity;</document> 1002 """ 1003 1004 def entity(): 1005 """ 1006 Test entity handling. 1007 1008 1) good entities 1009 1010 >>> e = ET.XML("<document title='舰'>test</document>") 1011 >>> serialize(e) 1012 '<document title="舰">test</document>' 1013 1014 2) bad entities 1015 1016 >>> ET.XML("<document>&entity;</document>") 1017 Traceback (most recent call last): 1018 ParseError: undefined entity: line 1, column 10 1019 1020 >>> ET.XML(ENTITY_XML) 1021 Traceback (most recent call last): 1022 ParseError: undefined entity &entity;: line 5, column 10 1023 1024 3) custom entity 1025 1026 >>> parser = ET.XMLParser() 1027 >>> parser.entity["entity"] = "text" 1028 >>> parser.feed(ENTITY_XML) 1029 >>> root = parser.close() 1030 >>> serialize(root) 1031 '<document>text</document>' 1032 """ 1033 1034 def error(xml): 1035 """ 1036 1037 Test error handling. 1038 1039 >>> issubclass(ET.ParseError, SyntaxError) 1040 True 1041 >>> error("foo").position 1042 (1, 0) 1043 >>> error("<tag>&foo;</tag>").position 1044 (1, 5) 1045 >>> error("foobar<").position 1046 (1, 6) 1047 1048 """ 1049 try: 1050 ET.XML(xml) 1051 except ET.ParseError: 1052 return sys.exc_value 1053 1054 def namespace(): 1055 """ 1056 Test namespace issues. 1057 1058 1) xml namespace 1059 1060 >>> elem = ET.XML("<tag xml:lang='en' />") 1061 >>> serialize(elem) # 1.1 1062 '<tag xml:lang="en" />' 1063 1064 2) other "well-known" namespaces 1065 1066 >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1067 >>> serialize(elem) # 2.1 1068 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />' 1069 1070 >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1071 >>> serialize(elem) # 2.2 1072 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />' 1073 1074 >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1075 >>> serialize(elem) # 2.3 1076 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />' 1077 1078 3) unknown namespaces 1079 >>> elem = ET.XML(SAMPLE_XML_NS) 1080 >>> print serialize(elem) 1081 <ns0:body xmlns:ns0="http://effbot.org/ns"> 1082 <ns0:tag>text</ns0:tag> 1083 <ns0:tag /> 1084 <ns0:section> 1085 <ns0:tag>subtext</ns0:tag> 1086 </ns0:section> 1087 </ns0:body> 1088 """ 1089 1090 def qname(): 1091 """ 1092 Test QName handling. 1093 1094 1) decorated tags 1095 1096 >>> elem = ET.Element("{uri}tag") 1097 >>> serialize(elem) # 1.1 1098 '<ns0:tag xmlns:ns0="uri" />' 1099 >>> elem = ET.Element(ET.QName("{uri}tag")) 1100 >>> serialize(elem) # 1.2 1101 '<ns0:tag xmlns:ns0="uri" />' 1102 >>> elem = ET.Element(ET.QName("uri", "tag")) 1103 >>> serialize(elem) # 1.3 1104 '<ns0:tag xmlns:ns0="uri" />' 1105 >>> elem = ET.Element(ET.QName("uri", "tag")) 1106 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1107 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1108 >>> serialize(elem) # 1.4 1109 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>' 1110 1111 2) decorated attributes 1112 1113 >>> elem.clear() 1114 >>> elem.attrib["{uri}key"] = "value" 1115 >>> serialize(elem) # 2.1 1116 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />' 1117 1118 >>> elem.clear() 1119 >>> elem.attrib[ET.QName("{uri}key")] = "value" 1120 >>> serialize(elem) # 2.2 1121 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />' 1122 1123 3) decorated values are not converted by default, but the 1124 QName wrapper can be used for values 1125 1126 >>> elem.clear() 1127 >>> elem.attrib["{uri}key"] = "{uri}value" 1128 >>> serialize(elem) # 3.1 1129 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />' 1130 1131 >>> elem.clear() 1132 >>> elem.attrib["{uri}key"] = ET.QName("{uri}value") 1133 >>> serialize(elem) # 3.2 1134 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />' 1135 1136 >>> elem.clear() 1137 >>> subelem = ET.Element("tag") 1138 >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1139 >>> elem.append(subelem) 1140 >>> elem.append(subelem) 1141 >>> serialize(elem) # 3.3 1142 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>' 1143 1144 4) Direct QName tests 1145 1146 >>> str(ET.QName('ns', 'tag')) 1147 '{ns}tag' 1148 >>> str(ET.QName('{ns}tag')) 1149 '{ns}tag' 1150 >>> q1 = ET.QName('ns', 'tag') 1151 >>> q2 = ET.QName('ns', 'tag') 1152 >>> q1 == q2 1153 True 1154 >>> q2 = ET.QName('ns', 'other-tag') 1155 >>> q1 == q2 1156 False 1157 >>> q1 == 'ns:tag' 1158 False 1159 >>> q1 == '{ns}tag' 1160 True 1161 """ 1162 1163 def doctype_public(): 1164 """ 1165 Test PUBLIC doctype. 1166 1167 >>> elem = ET.XML('<!DOCTYPE html PUBLIC' 1168 ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1169 ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1170 ... '<html>text</html>') 1171 1172 """ 1173 1174 def xpath_tokenizer(p): 1175 """ 1176 Test the XPath tokenizer. 1177 1178 >>> # tests from the xml specification 1179 >>> xpath_tokenizer("*") 1180 ['*'] 1181 >>> xpath_tokenizer("text()") 1182 ['text', '()'] 1183 >>> xpath_tokenizer("@name") 1184 ['@', 'name'] 1185 >>> xpath_tokenizer("@*") 1186 ['@', '*'] 1187 >>> xpath_tokenizer("para[1]") 1188 ['para', '[', '1', ']'] 1189 >>> xpath_tokenizer("para[last()]") 1190 ['para', '[', 'last', '()', ']'] 1191 >>> xpath_tokenizer("*/para") 1192 ['*', '/', 'para'] 1193 >>> xpath_tokenizer("/doc/chapter[5]/section[2]") 1194 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']'] 1195 >>> xpath_tokenizer("chapter//para") 1196 ['chapter', '//', 'para'] 1197 >>> xpath_tokenizer("//para") 1198 ['//', 'para'] 1199 >>> xpath_tokenizer("//olist/item") 1200 ['//', 'olist', '/', 'item'] 1201 >>> xpath_tokenizer(".") 1202 ['.'] 1203 >>> xpath_tokenizer(".//para") 1204 ['.', '//', 'para'] 1205 >>> xpath_tokenizer("..") 1206 ['..'] 1207 >>> xpath_tokenizer("../@lang") 1208 ['..', '/', '@', 'lang'] 1209 >>> xpath_tokenizer("chapter[title]") 1210 ['chapter', '[', 'title', ']'] 1211 >>> xpath_tokenizer("employee[@secretary and @assistant]") 1212 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'] 1213 1214 >>> # additional tests 1215 >>> xpath_tokenizer("{http://spam}egg") 1216 ['{http://spam}egg'] 1217 >>> xpath_tokenizer("./spam.egg") 1218 ['.', '/', 'spam.egg'] 1219 >>> xpath_tokenizer(".//{http://spam}egg") 1220 ['.', '//', '{http://spam}egg'] 1221 """ 1222 from xml.etree import ElementPath 1223 out = [] 1224 for op, tag in ElementPath.xpath_tokenizer(p): 1225 out.append(op or tag) 1226 return out 1227 1228 def processinginstruction(): 1229 """ 1230 Test ProcessingInstruction directly 1231 1232 >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction')) 1233 '<?test instruction?>' 1234 >>> ET.tostring(ET.PI('test', 'instruction')) 1235 '<?test instruction?>' 1236 1237 Issue #2746 1238 1239 >>> ET.tostring(ET.PI('test', '<testing&>')) 1240 '<?test <testing&>?>' 1241 >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1') 1242 "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>" 1243 """ 1244 1245 # 1246 # xinclude tests (samples from appendix C of the xinclude specification) 1247 1248 XINCLUDE = {} 1249 1250 XINCLUDE["C1.xml"] = """\ 1251 <?xml version='1.0'?> 1252 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1253 <p>120 Mz is adequate for an average home user.</p> 1254 <xi:include href="disclaimer.xml"/> 1255 </document> 1256 """ 1257 1258 XINCLUDE["disclaimer.xml"] = """\ 1259 <?xml version='1.0'?> 1260 <disclaimer> 1261 <p>The opinions represented herein represent those of the individual 1262 and should not be interpreted as official policy endorsed by this 1263 organization.</p> 1264 </disclaimer> 1265 """ 1266 1267 XINCLUDE["C2.xml"] = """\ 1268 <?xml version='1.0'?> 1269 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1270 <p>This document has been accessed 1271 <xi:include href="count.txt" parse="text"/> times.</p> 1272 </document> 1273 """ 1274 1275 XINCLUDE["count.txt"] = "324387" 1276 1277 XINCLUDE["C2b.xml"] = """\ 1278 <?xml version='1.0'?> 1279 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1280 <p>This document has been <em>accessed</em> 1281 <xi:include href="count.txt" parse="text"/> times.</p> 1282 </document> 1283 """ 1284 1285 XINCLUDE["C3.xml"] = """\ 1286 <?xml version='1.0'?> 1287 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1288 <p>The following is the source of the "data.xml" resource:</p> 1289 <example><xi:include href="data.xml" parse="text"/></example> 1290 </document> 1291 """ 1292 1293 XINCLUDE["data.xml"] = """\ 1294 <?xml version='1.0'?> 1295 <data> 1296 <item><![CDATA[Brooks & Shields]]></item> 1297 </data> 1298 """ 1299 1300 XINCLUDE["C5.xml"] = """\ 1301 <?xml version='1.0'?> 1302 <div xmlns:xi="http://www.w3.org/2001/XInclude"> 1303 <xi:include href="example.txt" parse="text"> 1304 <xi:fallback> 1305 <xi:include href="fallback-example.txt" parse="text"> 1306 <xi:fallback><a href="mailto:bob (at] example.org">Report error</a></xi:fallback> 1307 </xi:include> 1308 </xi:fallback> 1309 </xi:include> 1310 </div> 1311 """ 1312 1313 XINCLUDE["default.xml"] = """\ 1314 <?xml version='1.0'?> 1315 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1316 <p>Example.</p> 1317 <xi:include href="{}"/> 1318 </document> 1319 """.format(cgi.escape(SIMPLE_XMLFILE, True)) 1320 1321 def xinclude_loader(href, parse="xml", encoding=None): 1322 try: 1323 data = XINCLUDE[href] 1324 except KeyError: 1325 raise IOError("resource not found") 1326 if parse == "xml": 1327 from xml.etree.ElementTree import XML 1328 return XML(data) 1329 return data 1330 1331 def xinclude(): 1332 r""" 1333 Basic inclusion example (XInclude C.1) 1334 1335 >>> from xml.etree import ElementTree as ET 1336 >>> from xml.etree import ElementInclude 1337 1338 >>> document = xinclude_loader("C1.xml") 1339 >>> ElementInclude.include(document, xinclude_loader) 1340 >>> print serialize(document) # C1 1341 <document> 1342 <p>120 Mz is adequate for an average home user.</p> 1343 <disclaimer> 1344 <p>The opinions represented herein represent those of the individual 1345 and should not be interpreted as official policy endorsed by this 1346 organization.</p> 1347 </disclaimer> 1348 </document> 1349 1350 Textual inclusion example (XInclude C.2) 1351 1352 >>> document = xinclude_loader("C2.xml") 1353 >>> ElementInclude.include(document, xinclude_loader) 1354 >>> print serialize(document) # C2 1355 <document> 1356 <p>This document has been accessed 1357 324387 times.</p> 1358 </document> 1359 1360 Textual inclusion after sibling element (based on modified XInclude C.2) 1361 1362 >>> document = xinclude_loader("C2b.xml") 1363 >>> ElementInclude.include(document, xinclude_loader) 1364 >>> print(serialize(document)) # C2b 1365 <document> 1366 <p>This document has been <em>accessed</em> 1367 324387 times.</p> 1368 </document> 1369 1370 Textual inclusion of XML example (XInclude C.3) 1371 1372 >>> document = xinclude_loader("C3.xml") 1373 >>> ElementInclude.include(document, xinclude_loader) 1374 >>> print serialize(document) # C3 1375 <document> 1376 <p>The following is the source of the "data.xml" resource:</p> 1377 <example><?xml version='1.0'?> 1378 <data> 1379 <item><![CDATA[Brooks & Shields]]></item> 1380 </data> 1381 </example> 1382 </document> 1383 1384 Fallback example (XInclude C.5) 1385 Note! Fallback support is not yet implemented 1386 1387 >>> document = xinclude_loader("C5.xml") 1388 >>> ElementInclude.include(document, xinclude_loader) 1389 Traceback (most recent call last): 1390 IOError: resource not found 1391 >>> # print serialize(document) # C5 1392 """ 1393 1394 def xinclude_default(): 1395 """ 1396 >>> from xml.etree import ElementInclude 1397 1398 >>> document = xinclude_loader("default.xml") 1399 >>> ElementInclude.include(document) 1400 >>> print serialize(document) # default 1401 <document> 1402 <p>Example.</p> 1403 <root> 1404 <element key="value">text</element> 1405 <element>text</element>tail 1406 <empty-element /> 1407 </root> 1408 </document> 1409 """ 1410 1411 # 1412 # badly formatted xi:include tags 1413 1414 XINCLUDE_BAD = {} 1415 1416 XINCLUDE_BAD["B1.xml"] = """\ 1417 <?xml version='1.0'?> 1418 <document xmlns:xi="http://www.w3.org/2001/XInclude"> 1419 <p>120 Mz is adequate for an average home user.</p> 1420 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1421 </document> 1422 """ 1423 1424 XINCLUDE_BAD["B2.xml"] = """\ 1425 <?xml version='1.0'?> 1426 <div xmlns:xi="http://www.w3.org/2001/XInclude"> 1427 <xi:fallback></xi:fallback> 1428 </div> 1429 """ 1430 1431 def xinclude_failures(): 1432 r""" 1433 Test failure to locate included XML file. 1434 1435 >>> from xml.etree import ElementInclude 1436 1437 >>> def none_loader(href, parser, encoding=None): 1438 ... return None 1439 1440 >>> document = ET.XML(XINCLUDE["C1.xml"]) 1441 >>> ElementInclude.include(document, loader=none_loader) 1442 Traceback (most recent call last): 1443 FatalIncludeError: cannot load 'disclaimer.xml' as 'xml' 1444 1445 Test failure to locate included text file. 1446 1447 >>> document = ET.XML(XINCLUDE["C2.xml"]) 1448 >>> ElementInclude.include(document, loader=none_loader) 1449 Traceback (most recent call last): 1450 FatalIncludeError: cannot load 'count.txt' as 'text' 1451 1452 Test bad parse type. 1453 1454 >>> document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1455 >>> ElementInclude.include(document, loader=none_loader) 1456 Traceback (most recent call last): 1457 FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE') 1458 1459 Test xi:fallback outside xi:include. 1460 1461 >>> document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1462 >>> ElementInclude.include(document, loader=none_loader) 1463 Traceback (most recent call last): 1464 FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback') 1465 """ 1466 1467 # -------------------------------------------------------------------- 1468 # reported bugs 1469 1470 def bug_xmltoolkit21(): 1471 """ 1472 1473 marshaller gives obscure errors for non-string values 1474 1475 >>> elem = ET.Element(123) 1476 >>> serialize(elem) # tag 1477 Traceback (most recent call last): 1478 TypeError: cannot serialize 123 (type int) 1479 >>> elem = ET.Element("elem") 1480 >>> elem.text = 123 1481 >>> serialize(elem) # text 1482 Traceback (most recent call last): 1483 TypeError: cannot serialize 123 (type int) 1484 >>> elem = ET.Element("elem") 1485 >>> elem.tail = 123 1486 >>> serialize(elem) # tail 1487 Traceback (most recent call last): 1488 TypeError: cannot serialize 123 (type int) 1489 >>> elem = ET.Element("elem") 1490 >>> elem.set(123, "123") 1491 >>> serialize(elem) # attribute key 1492 Traceback (most recent call last): 1493 TypeError: cannot serialize 123 (type int) 1494 >>> elem = ET.Element("elem") 1495 >>> elem.set("123", 123) 1496 >>> serialize(elem) # attribute value 1497 Traceback (most recent call last): 1498 TypeError: cannot serialize 123 (type int) 1499 1500 """ 1501 1502 def bug_xmltoolkit25(): 1503 """ 1504 1505 typo in ElementTree.findtext 1506 1507 >>> elem = ET.XML(SAMPLE_XML) 1508 >>> tree = ET.ElementTree(elem) 1509 >>> tree.findtext("tag") 1510 'text' 1511 >>> tree.findtext("section/tag") 1512 'subtext' 1513 1514 """ 1515 1516 def bug_xmltoolkit28(): 1517 """ 1518 1519 .//tag causes exceptions 1520 1521 >>> tree = ET.XML("<doc><table><tbody/></table></doc>") 1522 >>> summarize_list(tree.findall(".//thead")) 1523 [] 1524 >>> summarize_list(tree.findall(".//tbody")) 1525 ['tbody'] 1526 1527 """ 1528 1529 def bug_xmltoolkitX1(): 1530 """ 1531 1532 dump() doesn't flush the output buffer 1533 1534 >>> tree = ET.XML("<doc><table><tbody/></table></doc>") 1535 >>> ET.dump(tree); sys.stdout.write("tail") 1536 <doc><table><tbody /></table></doc> 1537 tail 1538 1539 """ 1540 1541 def bug_xmltoolkit39(): 1542 """ 1543 1544 non-ascii element and attribute names doesn't work 1545 1546 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1547 >>> ET.tostring(tree, "utf-8") 1548 '<t\\xc3\\xa4g />' 1549 1550 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='välue' />") 1551 >>> tree.attrib 1552 {u'\\xe4ttr': u'v\\xe4lue'} 1553 >>> ET.tostring(tree, "utf-8") 1554 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />' 1555 1556 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>") 1557 >>> ET.tostring(tree, "utf-8") 1558 '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>' 1559 1560 >>> tree = ET.Element(u"t\u00e4g") 1561 >>> ET.tostring(tree, "utf-8") 1562 '<t\\xc3\\xa4g />' 1563 1564 >>> tree = ET.Element("tag") 1565 >>> tree.set(u"\u00e4ttr", u"v\u00e4lue") 1566 >>> ET.tostring(tree, "utf-8") 1567 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />' 1568 1569 """ 1570 1571 def bug_xmltoolkit54(): 1572 """ 1573 1574 problems handling internally defined entities 1575 1576 >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]><doc>&ldots;</doc>") 1577 >>> serialize(e) 1578 '<doc>舰</doc>' 1579 1580 """ 1581 1582 def bug_xmltoolkit55(): 1583 """ 1584 1585 make sure we're reporting the first error, not the last 1586 1587 >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>") 1588 Traceback (most recent call last): 1589 ParseError: undefined entity &ldots;: line 1, column 36 1590 1591 """ 1592 1593 class ExceptionFile: 1594 def read(self, x): 1595 raise IOError 1596 1597 def xmltoolkit60(): 1598 """ 1599 1600 Handle crash in stream source. 1601 >>> tree = ET.parse(ExceptionFile()) 1602 Traceback (most recent call last): 1603 IOError 1604 1605 """ 1606 1607 XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?> 1608 <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1609 <patent-application-publication> 1610 <subdoc-abstract> 1611 <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1612 </subdoc-abstract> 1613 </patent-application-publication>""" 1614 1615 1616 def xmltoolkit62(): 1617 """ 1618 1619 Don't crash when using custom entities. 1620 1621 >>> xmltoolkit62() 1622 u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.' 1623 1624 """ 1625 ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'} 1626 parser = ET.XMLTreeBuilder() 1627 parser.entity.update(ENTITIES) 1628 parser.feed(XMLTOOLKIT62_DOC) 1629 t = parser.close() 1630 return t.find('.//paragraph').text 1631 1632 def xmltoolkit63(): 1633 """ 1634 1635 Check reference leak. 1636 >>> xmltoolkit63() 1637 >>> count = sys.getrefcount(None) 1638 >>> for i in range(1000): 1639 ... xmltoolkit63() 1640 >>> sys.getrefcount(None) - count 1641 0 1642 1643 """ 1644 tree = ET.TreeBuilder() 1645 tree.start("tag", {}) 1646 tree.data("text") 1647 tree.end("tag") 1648 1649 # -------------------------------------------------------------------- 1650 1651 1652 def bug_200708_newline(): 1653 r""" 1654 1655 Preserve newlines in attributes. 1656 1657 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1658 >>> ET.tostring(e) 1659 '<SomeTag text="def _f(): return 3 " />' 1660 >>> ET.XML(ET.tostring(e)).get("text") 1661 'def _f():\n return 3\n' 1662 >>> ET.tostring(ET.XML(ET.tostring(e))) 1663 '<SomeTag text="def _f(): return 3 " />' 1664 1665 """ 1666 1667 def bug_200708_close(): 1668 """ 1669 1670 Test default builder. 1671 >>> parser = ET.XMLParser() # default 1672 >>> parser.feed("<element>some text</element>") 1673 >>> summarize(parser.close()) 1674 'element' 1675 1676 Test custom builder. 1677 >>> class EchoTarget: 1678 ... def close(self): 1679 ... return ET.Element("element") # simulate root 1680 >>> parser = ET.XMLParser(EchoTarget()) 1681 >>> parser.feed("<element>some text</element>") 1682 >>> summarize(parser.close()) 1683 'element' 1684 1685 """ 1686 1687 def bug_200709_default_namespace(): 1688 """ 1689 1690 >>> e = ET.Element("{default}elem") 1691 >>> s = ET.SubElement(e, "{default}elem") 1692 >>> serialize(e, default_namespace="default") # 1 1693 '<elem xmlns="default"><elem /></elem>' 1694 1695 >>> e = ET.Element("{default}elem") 1696 >>> s = ET.SubElement(e, "{default}elem") 1697 >>> s = ET.SubElement(e, "{not-default}elem") 1698 >>> serialize(e, default_namespace="default") # 2 1699 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>' 1700 1701 >>> e = ET.Element("{default}elem") 1702 >>> s = ET.SubElement(e, "{default}elem") 1703 >>> s = ET.SubElement(e, "elem") # unprefixed name 1704 >>> serialize(e, default_namespace="default") # 3 1705 Traceback (most recent call last): 1706 ValueError: cannot use non-qualified names with default_namespace option 1707 1708 """ 1709 1710 def bug_200709_register_namespace(): 1711 """ 1712 1713 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title")) 1714 '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />' 1715 >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1716 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title")) 1717 '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />' 1718 1719 And the Dublin Core namespace is in the default list: 1720 1721 >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title")) 1722 '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />' 1723 1724 """ 1725 1726 def bug_200709_element_comment(): 1727 """ 1728 1729 Not sure if this can be fixed, really (since the serializer needs 1730 ET.Comment, not cET.comment). 1731 1732 >>> a = ET.Element('a') 1733 >>> a.append(ET.Comment('foo')) 1734 >>> a[0].tag == ET.Comment 1735 True 1736 1737 >>> a = ET.Element('a') 1738 >>> a.append(ET.PI('foo')) 1739 >>> a[0].tag == ET.PI 1740 True 1741 1742 """ 1743 1744 def bug_200709_element_insert(): 1745 """ 1746 1747 >>> a = ET.Element('a') 1748 >>> b = ET.SubElement(a, 'b') 1749 >>> c = ET.SubElement(a, 'c') 1750 >>> d = ET.Element('d') 1751 >>> a.insert(0, d) 1752 >>> summarize_list(a) 1753 ['d', 'b', 'c'] 1754 >>> a.insert(-1, d) 1755 >>> summarize_list(a) 1756 ['d', 'b', 'd', 'c'] 1757 1758 """ 1759 1760 def bug_200709_iter_comment(): 1761 """ 1762 1763 >>> a = ET.Element('a') 1764 >>> b = ET.SubElement(a, 'b') 1765 >>> comment_b = ET.Comment("TEST-b") 1766 >>> b.append(comment_b) 1767 >>> summarize_list(a.iter(ET.Comment)) 1768 ['<Comment>'] 1769 1770 """ 1771 1772 # -------------------------------------------------------------------- 1773 # reported on bugs.python.org 1774 1775 def bug_1534630(): 1776 """ 1777 1778 >>> bob = ET.TreeBuilder() 1779 >>> e = bob.data("data") 1780 >>> e = bob.start("tag", {}) 1781 >>> e = bob.end("tag") 1782 >>> e = bob.close() 1783 >>> serialize(e) 1784 '<tag />' 1785 1786 """ 1787 1788 def check_issue6233(): 1789 """ 1790 1791 >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>") 1792 >>> ET.tostring(e, 'ascii') 1793 "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" 1794 >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>") 1795 >>> ET.tostring(e, 'ascii') 1796 "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" 1797 1798 """ 1799 1800 def check_issue3151(): 1801 """ 1802 1803 >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1804 >>> e.tag 1805 '{${stuff}}localname' 1806 >>> t = ET.ElementTree(e) 1807 >>> ET.tostring(e) 1808 '<ns0:localname xmlns:ns0="${stuff}" />' 1809 1810 """ 1811 1812 def check_issue6565(): 1813 """ 1814 1815 >>> elem = ET.XML("<body><tag/></body>") 1816 >>> summarize_list(elem) 1817 ['tag'] 1818 >>> newelem = ET.XML(SAMPLE_XML) 1819 >>> elem[:] = newelem[:] 1820 >>> summarize_list(elem) 1821 ['tag', 'tag', 'section'] 1822 1823 """ 1824 1825 def check_html_empty_elems_serialization(self): 1826 # issue 15970 1827 # from http://www.w3.org/TR/html401/index/elements.html 1828 """ 1829 1830 >>> empty_elems = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 1831 ... 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM'] 1832 >>> elems = ''.join('<%s />' % elem for elem in empty_elems) 1833 >>> serialize(ET.XML('<html>%s</html>' % elems), method='html') 1834 '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>' 1835 >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html') 1836 '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>' 1837 >>> elems = ''.join('<%s></%s>' % (elem, elem) for elem in empty_elems) 1838 >>> serialize(ET.XML('<html>%s</html>' % elems), method='html') 1839 '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>' 1840 >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html') 1841 '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>' 1842 1843 """ 1844 1845 # -------------------------------------------------------------------- 1846 1847 1848 class CleanContext(object): 1849 """Provide default namespace mapping and path cache.""" 1850 checkwarnings = None 1851 1852 def __init__(self, quiet=False): 1853 if sys.flags.optimize >= 2: 1854 # under -OO, doctests cannot be run and therefore not all warnings 1855 # will be emitted 1856 quiet = True 1857 deprecations = ( 1858 # Search behaviour is broken if search path starts with "/". 1859 ("This search is broken in 1.3 and earlier, and will be fixed " 1860 "in a future version. If you rely on the current behaviour, " 1861 "change it to '.+'", FutureWarning), 1862 # Element.getchildren() and Element.getiterator() are deprecated. 1863 ("This method will be removed in future versions. " 1864 "Use .+ instead.", DeprecationWarning), 1865 ("This method will be removed in future versions. " 1866 "Use .+ instead.", PendingDeprecationWarning), 1867 # XMLParser.doctype() is deprecated. 1868 ("This method of XMLParser is deprecated. Define doctype.. " 1869 "method on the TreeBuilder target.", DeprecationWarning)) 1870 self.checkwarnings = test_support.check_warnings(*deprecations, 1871 quiet=quiet) 1872 1873 def __enter__(self): 1874 from xml.etree import ElementTree 1875 self._nsmap = ElementTree._namespace_map 1876 self._path_cache = ElementTree.ElementPath._cache 1877 # Copy the default namespace mapping 1878 ElementTree._namespace_map = self._nsmap.copy() 1879 # Copy the path cache (should be empty) 1880 ElementTree.ElementPath._cache = self._path_cache.copy() 1881 self.checkwarnings.__enter__() 1882 1883 def __exit__(self, *args): 1884 from xml.etree import ElementTree 1885 # Restore mapping and path cache 1886 ElementTree._namespace_map = self._nsmap 1887 ElementTree.ElementPath._cache = self._path_cache 1888 self.checkwarnings.__exit__(*args) 1889 1890 1891 def test_main(module_name='xml.etree.ElementTree'): 1892 from test import test_xml_etree 1893 1894 use_py_module = (module_name == 'xml.etree.ElementTree') 1895 1896 # The same doctests are used for both the Python and the C implementations 1897 assert test_xml_etree.ET.__name__ == module_name 1898 1899 # XXX the C module should give the same warnings as the Python module 1900 with CleanContext(quiet=not use_py_module): 1901 test_support.run_doctest(test_xml_etree, verbosity=True) 1902 1903 # The module should not be changed by the tests 1904 assert test_xml_etree.ET.__name__ == module_name 1905 1906 if __name__ == '__main__': 1907 test_main() 1908