Home | History | Annotate | Download | only in test
      1 # xml.etree test.  This file contains enough tests to make sure that

      2 # all included components work as they should.

      3 # Large parts are extracted from the upstream test suite.

      4 
      5 # IMPORTANT: the same doctests are run from "test_xml_etree_c" in

      6 # order to ensure consistency between the C implementation and the

      7 # Python implementation.

      8 #

      9 # For this purpose, the module-level "ET" symbol is temporarily

     10 # monkey-patched when running the "test_xml_etree_c" test suite.

     11 # Don't re-import "xml.etree.ElementTree" module in the docstring,

     12 # except if the test is specific to the Python implementation.

     13 
     14 import sys
     15 import cgi
     16 
     17 from test import test_support
     18 from test.test_support import findfile
     19 
     20 from xml.etree import ElementTree as ET
     21 
     22 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
     23 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
     24 
     25 SAMPLE_XML = """\
     26 <body>
     27   <tag class='a'>text</tag>
     28   <tag class='b' />
     29   <section>
     30     <tag class='b' id='inner'>subtext</tag>
     31   </section>
     32 </body>
     33 """
     34 
     35 SAMPLE_SECTION = """\
     36 <section>
     37   <tag class='b' id='inner'>subtext</tag>
     38   <nexttag />
     39   <nextsection>
     40     <tag />
     41   </nextsection>
     42 </section>
     43 """
     44 
     45 SAMPLE_XML_NS = """
     46 <body xmlns="http://effbot.org/ns">
     47   <tag>text</tag>
     48   <tag />
     49   <section>
     50     <tag>subtext</tag>
     51   </section>
     52 </body>
     53 """
     54 
     55 
     56 def sanity():
     57     """
     58     Import sanity.
     59 
     60     >>> from xml.etree import ElementTree
     61     >>> from xml.etree import ElementInclude
     62     >>> from xml.etree import ElementPath
     63     """
     64 
     65 def check_method(method):
     66     if not hasattr(method, '__call__'):
     67         print method, "not callable"
     68 
     69 def serialize(elem, to_string=True, **options):
     70     import StringIO
     71     file = StringIO.StringIO()
     72     tree = ET.ElementTree(elem)
     73     tree.write(file, **options)
     74     if to_string:
     75         return file.getvalue()
     76     else:
     77         file.seek(0)
     78         return file
     79 
     80 def summarize(elem):
     81     if elem.tag == ET.Comment:
     82         return "<Comment>"
     83     return elem.tag
     84 
     85 def summarize_list(seq):
     86     return [summarize(elem) for elem in seq]
     87 
     88 def normalize_crlf(tree):
     89     for elem in tree.iter():
     90         if elem.text:
     91             elem.text = elem.text.replace("\r\n", "\n")
     92         if elem.tail:
     93             elem.tail = elem.tail.replace("\r\n", "\n")
     94 
     95 def check_string(string):
     96     len(string)
     97     for char in string:
     98         if len(char) != 1:
     99             print "expected one-character string, got %r" % char
    100     new_string = string + ""
    101     new_string = string + " "
    102     string[:0]
    103 
    104 def check_mapping(mapping):
    105     len(mapping)
    106     keys = mapping.keys()
    107     items = mapping.items()
    108     for key in keys:
    109         item = mapping[key]
    110     mapping["key"] = "value"
    111     if mapping["key"] != "value":
    112         print "expected value string, got %r" % mapping["key"]
    113 
    114 def check_element(element):
    115     if not ET.iselement(element):
    116         print "not an element"
    117     if not hasattr(element, "tag"):
    118         print "no tag member"
    119     if not hasattr(element, "attrib"):
    120         print "no attrib member"
    121     if not hasattr(element, "text"):
    122         print "no text member"
    123     if not hasattr(element, "tail"):
    124         print "no tail member"
    125 
    126     check_string(element.tag)
    127     check_mapping(element.attrib)
    128     if element.text is not None:
    129         check_string(element.text)
    130     if element.tail is not None:
    131         check_string(element.tail)
    132     for elem in element:
    133         check_element(elem)
    134 
    135 # --------------------------------------------------------------------

    136 # element tree tests

    137 
    138 def interface():
    139     r"""
    140     Test element tree interface.
    141 
    142     >>> element = ET.Element("tag")
    143     >>> check_element(element)
    144     >>> tree = ET.ElementTree(element)
    145     >>> check_element(tree.getroot())
    146 
    147     >>> element = ET.Element("t\xe4g", key="value")
    148     >>> tree = ET.ElementTree(element)
    149     >>> repr(element)   # doctest: +ELLIPSIS
    150     "<Element 't\\xe4g' at 0x...>"
    151     >>> element = ET.Element("tag", key="value")
    152 
    153     Make sure all standard element methods exist.
    154 
    155     >>> check_method(element.append)
    156     >>> check_method(element.extend)
    157     >>> check_method(element.insert)
    158     >>> check_method(element.remove)
    159     >>> check_method(element.getchildren)
    160     >>> check_method(element.find)
    161     >>> check_method(element.iterfind)
    162     >>> check_method(element.findall)
    163     >>> check_method(element.findtext)
    164     >>> check_method(element.clear)
    165     >>> check_method(element.get)
    166     >>> check_method(element.set)
    167     >>> check_method(element.keys)
    168     >>> check_method(element.items)
    169     >>> check_method(element.iter)
    170     >>> check_method(element.itertext)
    171     >>> check_method(element.getiterator)
    172 
    173     These methods return an iterable. See bug 6472.
    174 
    175     >>> check_method(element.iter("tag").next)
    176     >>> check_method(element.iterfind("tag").next)
    177     >>> check_method(element.iterfind("*").next)
    178     >>> check_method(tree.iter("tag").next)
    179     >>> check_method(tree.iterfind("tag").next)
    180     >>> check_method(tree.iterfind("*").next)
    181 
    182     These aliases are provided:
    183 
    184     >>> assert ET.XML == ET.fromstring
    185     >>> assert ET.PI == ET.ProcessingInstruction
    186     >>> assert ET.XMLParser == ET.XMLTreeBuilder
    187     """
    188 
    189 def simpleops():
    190     """
    191     Basic method sanity checks.
    192 
    193     >>> elem = ET.XML("<body><tag/></body>")
    194     >>> serialize(elem)
    195     '<body><tag /></body>'
    196     >>> e = ET.Element("tag2")
    197     >>> elem.append(e)
    198     >>> serialize(elem)
    199     '<body><tag /><tag2 /></body>'
    200     >>> elem.remove(e)
    201     >>> serialize(elem)
    202     '<body><tag /></body>'
    203     >>> elem.insert(0, e)
    204     >>> serialize(elem)
    205     '<body><tag2 /><tag /></body>'
    206     >>> elem.remove(e)
    207     >>> elem.extend([e])
    208     >>> serialize(elem)
    209     '<body><tag /><tag2 /></body>'
    210     >>> elem.remove(e)
    211 
    212     >>> element = ET.Element("tag", key="value")
    213     >>> serialize(element) # 1
    214     '<tag key="value" />'
    215     >>> subelement = ET.Element("subtag")
    216     >>> element.append(subelement)
    217     >>> serialize(element) # 2
    218     '<tag key="value"><subtag /></tag>'
    219     >>> element.insert(0, subelement)
    220     >>> serialize(element) # 3
    221     '<tag key="value"><subtag /><subtag /></tag>'
    222     >>> element.remove(subelement)
    223     >>> serialize(element) # 4
    224     '<tag key="value"><subtag /></tag>'
    225     >>> element.remove(subelement)
    226     >>> serialize(element) # 5
    227     '<tag key="value" />'
    228     >>> element.remove(subelement)
    229     Traceback (most recent call last):
    230     ValueError: list.remove(x): x not in list
    231     >>> serialize(element) # 6
    232     '<tag key="value" />'
    233     >>> element[0:0] = [subelement, subelement, subelement]
    234     >>> serialize(element[1])
    235     '<subtag />'
    236     >>> element[1:9] == [element[1], element[2]]
    237     True
    238     >>> element[:9:2] == [element[0], element[2]]
    239     True
    240     >>> del element[1:2]
    241     >>> serialize(element)
    242     '<tag key="value"><subtag /><subtag /></tag>'
    243     """
    244 
    245 def cdata():
    246     """
    247     Test CDATA handling (etc).
    248 
    249     >>> serialize(ET.XML("<tag>hello</tag>"))
    250     '<tag>hello</tag>'
    251     >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))

    252     '<tag>hello</tag>'
    253     >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
    254     '<tag>hello</tag>'
    255     """
    256 
    257 # Only with Python implementation
    258 def simplefind():
    259     """
    260     Test find methods using the elementpath fallback.
    261 
    262     >>> from xml.etree import ElementTree
    263 
    264     >>> CurrentElementPath = ElementTree.ElementPath
    265     >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
    266     >>> elem = ElementTree.XML(SAMPLE_XML)
    267     >>> elem.find("tag").tag
    268     'tag'
    269     >>> ElementTree.ElementTree(elem).find("tag").tag
    270     'tag'
    271     >>> elem.findtext("tag")
    272     'text'
    273     >>> elem.findtext("tog")
    274     >>> elem.findtext("tog", "default")
    275     'default'
    276     >>> ElementTree.ElementTree(elem).findtext("tag")
    277     'text'
    278     >>> summarize_list(elem.findall("tag"))
    279     ['tag', 'tag']
    280     >>> summarize_list(elem.findall(".//tag"))
    281     ['tag', 'tag', 'tag']
    282 
    283     Path syntax doesn't work in this case.
    284 
    285     >>> elem.find("section/tag")
    286     >>> elem.findtext("section/tag")
    287     >>> summarize_list(elem.findall("section/tag"))
    288     []
    289 
    290     >>> ElementTree.ElementPath = CurrentElementPath
    291     """
    292 
    293 def find():
    294     """
    295     Test find methods (including xpath syntax).
    296 
    297     >>> elem = ET.XML(SAMPLE_XML)
    298     >>> elem.find("tag").tag
    299     'tag'
    300     >>> ET.ElementTree(elem).find("tag").tag
    301     'tag'
    302     >>> elem.find("section/tag").tag
    303     'tag'
    304     >>> elem.find("./tag").tag
    305     'tag'
    306     >>> ET.ElementTree(elem).find("./tag").tag
    307     'tag'
    308     >>> ET.ElementTree(elem).find("/tag").tag
    309     'tag'
    310     >>> elem[2] = ET.XML(SAMPLE_SECTION)
    311     >>> elem.find("section/nexttag").tag
    312     'nexttag'
    313     >>> ET.ElementTree(elem).find("section/tag").tag
    314     'tag'
    315     >>> ET.ElementTree(elem).find("tog")
    316     >>> ET.ElementTree(elem).find("tog/foo")
    317     >>> elem.findtext("tag")
    318     'text'
    319     >>> elem.findtext("section/nexttag")
    320     ''
    321     >>> elem.findtext("section/nexttag", "default")
    322     ''
    323     >>> elem.findtext("tog")
    324     >>> elem.findtext("tog", "default")
    325     'default'
    326     >>> ET.ElementTree(elem).findtext("tag")
    327     'text'
    328     >>> ET.ElementTree(elem).findtext("tog/foo")
    329     >>> ET.ElementTree(elem).findtext("tog/foo", "default")
    330     'default'
    331     >>> ET.ElementTree(elem).findtext("./tag")
    332     'text'
    333     >>> ET.ElementTree(elem).findtext("/tag")
    334     'text'
    335     >>> elem.findtext("section/tag")
    336     'subtext'
    337     >>> ET.ElementTree(elem).findtext("section/tag")
    338     'subtext'
    339     >>> summarize_list(elem.findall("."))
    340     ['body']
    341     >>> summarize_list(elem.findall("tag"))
    342     ['tag', 'tag']
    343     >>> summarize_list(elem.findall("tog"))
    344     []
    345     >>> summarize_list(elem.findall("tog/foo"))
    346     []
    347     >>> summarize_list(elem.findall("*"))
    348     ['tag', 'tag', 'section']
    349     >>> summarize_list(elem.findall(".//tag"))
    350     ['tag', 'tag', 'tag', 'tag']
    351     >>> summarize_list(elem.findall("section/tag"))
    352     ['tag']
    353     >>> summarize_list(elem.findall("section//tag"))
    354     ['tag', 'tag']
    355     >>> summarize_list(elem.findall("section/*"))
    356     ['tag', 'nexttag', 'nextsection']
    357     >>> summarize_list(elem.findall("section//*"))
    358     ['tag', 'nexttag', 'nextsection', 'tag']
    359     >>> summarize_list(elem.findall("section/.//*"))
    360     ['tag', 'nexttag', 'nextsection', 'tag']
    361     >>> summarize_list(elem.findall("*/*"))
    362     ['tag', 'nexttag', 'nextsection']
    363     >>> summarize_list(elem.findall("*//*"))
    364     ['tag', 'nexttag', 'nextsection', 'tag']
    365     >>> summarize_list(elem.findall("*/tag"))
    366     ['tag']
    367     >>> summarize_list(elem.findall("*/./tag"))
    368     ['tag']
    369     >>> summarize_list(elem.findall("./tag"))
    370     ['tag', 'tag']
    371     >>> summarize_list(elem.findall(".//tag"))
    372     ['tag', 'tag', 'tag', 'tag']
    373     >>> summarize_list(elem.findall("././tag"))
    374     ['tag', 'tag']
    375     >>> summarize_list(elem.findall(".//tag[@class]"))
    376     ['tag', 'tag', 'tag']
    377     >>> summarize_list(elem.findall(".//tag[@class='a']"))
    378     ['tag']
    379     >>> summarize_list(elem.findall(".//tag[@class='b']"))
    380     ['tag', 'tag']
    381     >>> summarize_list(elem.findall(".//tag[@id]"))
    382     ['tag']
    383     >>> summarize_list(elem.findall(".//section[tag]"))
    384     ['section']
    385     >>> summarize_list(elem.findall(".//section[element]"))
    386     []
    387     >>> summarize_list(elem.findall("../tag"))
    388     []
    389     >>> summarize_list(elem.findall("section/../tag"))
    390     ['tag', 'tag']
    391     >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
    392     ['tag', 'tag']
    393 
    394     Following example is invalid in 1.2.
    395     A leading '*' is assumed in 1.3.
    396 
    397     >>> elem.findall("section//") == elem.findall("section//*")
    398     True
    399 
    400     ET's Path module handles this case incorrectly; this gives
    401     a warning in 1.3, and the behaviour will be modified in 1.4.
    402 
    403     >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
    404     ['tag', 'tag']
    405 
    406     >>> elem = ET.XML(SAMPLE_XML_NS)
    407     >>> summarize_list(elem.findall("tag"))
    408     []
    409     >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
    410     ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
    411     >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
    412     ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
    413     """
    414 
    415 def file_init():
    416     """
    417     >>> import StringIO
    418 
    419     >>> stringfile = StringIO.StringIO(SAMPLE_XML)
    420     >>> tree = ET.ElementTree(file=stringfile)
    421     >>> tree.find("tag").tag
    422     'tag'
    423     >>> tree.find("section/tag").tag
    424     'tag'
    425 
    426     >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
    427     >>> tree.find("element").tag
    428     'element'
    429     >>> tree.find("element/../empty-element").tag
    430     'empty-element'
    431     """
    432 
    433 def bad_find():
    434     """
    435     Check bad or unsupported path expressions.
    436 
    437     >>> elem = ET.XML(SAMPLE_XML)
    438     >>> elem.findall("/tag")
    439     Traceback (most recent call last):
    440     SyntaxError: cannot use absolute path on element
    441     """
    442 
    443 def path_cache():
    444     """
    445     Check that the path cache behaves sanely.
    446 
    447     >>> elem = ET.XML(SAMPLE_XML)
    448     >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
    449     >>> cache_len_10 = len(ET.ElementPath._cache)
    450     >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
    451     >>> len(ET.ElementPath._cache) == cache_len_10
    452     True
    453     >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
    454     >>> len(ET.ElementPath._cache) > cache_len_10
    455     True
    456     >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
    457     >>> len(ET.ElementPath._cache) < 500
    458     True
    459     """
    460 
    461 def copy():
    462     """
    463     Test copy handling (etc).
    464 
    465     >>> import copy
    466     >>> e1 = ET.XML("<tag>hello<foo/></tag>")
    467     >>> e2 = copy.copy(e1)
    468     >>> e3 = copy.deepcopy(e1)
    469     >>> e1.find("foo").tag = "bar"
    470     >>> serialize(e1)
    471     '<tag>hello<bar /></tag>'
    472     >>> serialize(e2)
    473     '<tag>hello<bar /></tag>'
    474     >>> serialize(e3)
    475     '<tag>hello<foo /></tag>'
    476 
    477     """
    478 
    479 def attrib():
    480     """
    481     Test attribute handling.
    482 
    483     >>> elem = ET.Element("tag")
    484     >>> elem.get("key") # 1.1

    485     >>> elem.get("key", "default") # 1.2

    486     'default'
    487     >>> elem.set("key", "value")
    488     >>> elem.get("key") # 1.3

    489     'value'
    490 
    491     >>> elem = ET.Element("tag", key="value")
    492     >>> elem.get("key") # 2.1

    493     'value'
    494     >>> elem.attrib # 2.2

    495     {'key': 'value'}
    496 
    497     >>> attrib = {"key": "value"}
    498     >>> elem = ET.Element("tag", attrib)
    499     >>> attrib.clear() # check for aliasing issues

    500     >>> elem.get("key") # 3.1

    501     'value'
    502     >>> elem.attrib # 3.2

    503     {'key': 'value'}
    504 
    505     >>> attrib = {"key": "value"}
    506     >>> elem = ET.Element("tag", **attrib)
    507     >>> attrib.clear() # check for aliasing issues

    508     >>> elem.get("key") # 4.1

    509     'value'
    510     >>> elem.attrib # 4.2

    511     {'key': 'value'}
    512 
    513     >>> elem = ET.Element("tag", {"key": "other"}, key="value")
    514     >>> elem.get("key") # 5.1

    515     'value'
    516     >>> elem.attrib # 5.2

    517     {'key': 'value'}
    518 
    519     >>> elem = ET.Element('test')
    520     >>> elem.text = "aa"
    521     >>> elem.set('testa', 'testval')
    522     >>> elem.set('testb', 'test2')
    523     >>> ET.tostring(elem)
    524     '<test testa="testval" testb="test2">aa</test>'
    525     >>> sorted(elem.keys())
    526     ['testa', 'testb']
    527     >>> sorted(elem.items())
    528     [('testa', 'testval'), ('testb', 'test2')]
    529     >>> elem.attrib['testb']
    530     'test2'
    531     >>> elem.attrib['testb'] = 'test1'
    532     >>> elem.attrib['testc'] = 'test2'
    533     >>> ET.tostring(elem)
    534     '<test testa="testval" testb="test1" testc="test2">aa</test>'
    535     """
    536 
    537 def makeelement():
    538     """
    539     Test makeelement handling.
    540 
    541     >>> elem = ET.Element("tag")
    542     >>> attrib = {"key": "value"}
    543     >>> subelem = elem.makeelement("subtag", attrib)
    544     >>> if subelem.attrib is attrib:
    545     ...     print "attrib aliasing"
    546     >>> elem.append(subelem)
    547     >>> serialize(elem)
    548     '<tag><subtag key="value" /></tag>'
    549 
    550     >>> elem.clear()
    551     >>> serialize(elem)
    552     '<tag />'
    553     >>> elem.append(subelem)
    554     >>> serialize(elem)
    555     '<tag><subtag key="value" /></tag>'
    556     >>> elem.extend([subelem, subelem])
    557     >>> serialize(elem)
    558     '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
    559     >>> elem[:] = [subelem]
    560     >>> serialize(elem)
    561     '<tag><subtag key="value" /></tag>'
    562     >>> elem[:] = tuple([subelem])
    563     >>> serialize(elem)
    564     '<tag><subtag key="value" /></tag>'
    565 
    566     """
    567 
    568 def parsefile():
    569     """
    570     Test parsing from file.
    571 
    572     >>> tree = ET.parse(SIMPLE_XMLFILE)
    573     >>> normalize_crlf(tree)
    574     >>> tree.write(sys.stdout)
    575     <root>
    576        <element key="value">text</element>
    577        <element>text</element>tail
    578        <empty-element />
    579     </root>
    580     >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
    581     >>> normalize_crlf(tree)
    582     >>> tree.write(sys.stdout)
    583     <ns0:root xmlns:ns0="namespace">
    584        <ns0:element key="value">text</ns0:element>
    585        <ns0:element>text</ns0:element>tail
    586        <ns0:empty-element />
    587     </ns0:root>
    588 
    589     >>> with open(SIMPLE_XMLFILE) as f:
    590     ...     data = f.read()
    591 
    592     >>> parser = ET.XMLParser()
    593     >>> parser.version  # doctest: +ELLIPSIS

    594     'Expat ...'
    595     >>> parser.feed(data)
    596     >>> print serialize(parser.close())
    597     <root>
    598        <element key="value">text</element>
    599        <element>text</element>tail
    600        <empty-element />
    601     </root>
    602 
    603     >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility

    604     >>> parser.feed(data)
    605     >>> print serialize(parser.close())
    606     <root>
    607        <element key="value">text</element>
    608        <element>text</element>tail
    609        <empty-element />
    610     </root>
    611 
    612     >>> target = ET.TreeBuilder()
    613     >>> parser = ET.XMLParser(target=target)
    614     >>> parser.feed(data)
    615     >>> print serialize(parser.close())
    616     <root>
    617        <element key="value">text</element>
    618        <element>text</element>tail
    619        <empty-element />
    620     </root>
    621     """
    622 
    623 def parseliteral():
    624     """
    625     >>> element = ET.XML("<html><body>text</body></html>")
    626     >>> ET.ElementTree(element).write(sys.stdout)
    627     <html><body>text</body></html>
    628     >>> element = ET.fromstring("<html><body>text</body></html>")
    629     >>> ET.ElementTree(element).write(sys.stdout)
    630     <html><body>text</body></html>
    631     >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
    632     >>> element = ET.fromstringlist(sequence)
    633     >>> print ET.tostring(element)
    634     <html><body>text</body></html>
    635     >>> print "".join(ET.tostringlist(element))
    636     <html><body>text</body></html>
    637     >>> ET.tostring(element, "ascii")
    638     "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
    639     >>> _, ids = ET.XMLID("<html><body>text</body></html>")
    640     >>> len(ids)
    641     0
    642     >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
    643     >>> len(ids)
    644     1
    645     >>> ids["body"].tag
    646     'body'
    647     """
    648 
    649 def iterparse():
    650     """
    651     Test iterparse interface.
    652 
    653     >>> iterparse = ET.iterparse
    654 
    655     >>> context = iterparse(SIMPLE_XMLFILE)
    656     >>> action, elem = next(context)
    657     >>> print action, elem.tag
    658     end element
    659     >>> for action, elem in context:
    660     ...   print action, elem.tag
    661     end element
    662     end empty-element
    663     end root
    664     >>> context.root.tag
    665     'root'
    666 
    667     >>> context = iterparse(SIMPLE_NS_XMLFILE)
    668     >>> for action, elem in context:
    669     ...   print action, elem.tag
    670     end {namespace}element
    671     end {namespace}element
    672     end {namespace}empty-element
    673     end {namespace}root
    674 
    675     >>> events = ()
    676     >>> context = iterparse(SIMPLE_XMLFILE, events)
    677     >>> for action, elem in context:
    678     ...   print action, elem.tag
    679 
    680     >>> events = ()
    681     >>> context = iterparse(SIMPLE_XMLFILE, events=events)
    682     >>> for action, elem in context:
    683     ...   print action, elem.tag
    684 
    685     >>> events = ("start", "end")
    686     >>> context = iterparse(SIMPLE_XMLFILE, events)
    687     >>> for action, elem in context:
    688     ...   print action, elem.tag
    689     start root
    690     start element
    691     end element
    692     start element
    693     end element
    694     start empty-element
    695     end empty-element
    696     end root
    697 
    698     >>> events = ("start", "end", "start-ns", "end-ns")
    699     >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
    700     >>> for action, elem in context:
    701     ...   if action in ("start", "end"):
    702     ...     print action, elem.tag
    703     ...   else:
    704     ...     print action, elem
    705     start-ns ('', 'namespace')
    706     start {namespace}root
    707     start {namespace}element
    708     end {namespace}element
    709     start {namespace}element
    710     end {namespace}element
    711     start {namespace}empty-element
    712     end {namespace}empty-element
    713     end {namespace}root
    714     end-ns None
    715 
    716     >>> events = ("start", "end", "bogus")
    717     >>> with open(SIMPLE_XMLFILE, "rb") as f:
    718     ...     iterparse(f, events)
    719     Traceback (most recent call last):
    720     ValueError: unknown event 'bogus'
    721 
    722     >>> import StringIO
    723 
    724     >>> source = StringIO.StringIO(
    725     ...     "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
    726     ...     "<body xmlns='http://&#233;ffbot.org/ns'\\n"
    727     ...     "      xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
    728     >>> events = ("start-ns",)
    729     >>> context = iterparse(source, events)
    730     >>> for action, elem in context:
    731     ...     print action, elem
    732     start-ns ('', u'http://\\xe9ffbot.org/ns')
    733     start-ns (u'cl\\xe9', 'http://effbot.org/ns')
    734 
    735     >>> source = StringIO.StringIO("<document />junk")
    736     >>> try:
    737     ...   for action, elem in iterparse(source):
    738     ...     print action, elem.tag
    739     ... except ET.ParseError, v:
    740     ...   print v
    741     junk after document element: line 1, column 12
    742     """
    743 
    744 def writefile():
    745     """
    746     >>> elem = ET.Element("tag")
    747     >>> elem.text = "text"
    748     >>> serialize(elem)
    749     '<tag>text</tag>'
    750     >>> ET.SubElement(elem, "subtag").text = "subtext"
    751     >>> serialize(elem)
    752     '<tag>text<subtag>subtext</subtag></tag>'
    753 
    754     Test tag suppression
    755     >>> elem.tag = None
    756     >>> serialize(elem)
    757     'text<subtag>subtext</subtag>'
    758     >>> elem.insert(0, ET.Comment("comment"))
    759     >>> serialize(elem)     # assumes 1.3

    760     'text<!--comment--><subtag>subtext</subtag>'
    761     >>> elem[0] = ET.PI("key", "value")
    762     >>> serialize(elem)
    763     'text<?key value?><subtag>subtext</subtag>'
    764     """
    765 
    766 def custom_builder():
    767     """
    768     Test parser w. custom builder.
    769 
    770     >>> with open(SIMPLE_XMLFILE) as f:
    771     ...     data = f.read()
    772     >>> class Builder:
    773     ...     def start(self, tag, attrib):
    774     ...         print "start", tag
    775     ...     def end(self, tag):
    776     ...         print "end", tag
    777     ...     def data(self, text):
    778     ...         pass
    779     >>> builder = Builder()
    780     >>> parser = ET.XMLParser(target=builder)
    781     >>> parser.feed(data)
    782     start root
    783     start element
    784     end element
    785     start element
    786     end element
    787     start empty-element
    788     end empty-element
    789     end root
    790 
    791     >>> with open(SIMPLE_NS_XMLFILE) as f:
    792     ...     data = f.read()
    793     >>> class Builder:
    794     ...     def start(self, tag, attrib):
    795     ...         print "start", tag
    796     ...     def end(self, tag):
    797     ...         print "end", tag
    798     ...     def data(self, text):
    799     ...         pass
    800     ...     def pi(self, target, data):
    801     ...         print "pi", target, repr(data)
    802     ...     def comment(self, data):
    803     ...         print "comment", repr(data)
    804     >>> builder = Builder()
    805     >>> parser = ET.XMLParser(target=builder)
    806     >>> parser.feed(data)
    807     pi pi 'data'
    808     comment ' comment '
    809     start {namespace}root
    810     start {namespace}element
    811     end {namespace}element
    812     start {namespace}element
    813     end {namespace}element
    814     start {namespace}empty-element
    815     end {namespace}empty-element
    816     end {namespace}root
    817 
    818     """
    819 
    820 def getchildren():
    821     """
    822     Test Element.getchildren()
    823 
    824     >>> with open(SIMPLE_XMLFILE, "r") as f:
    825     ...     tree = ET.parse(f)
    826     >>> for elem in tree.getroot().iter():
    827     ...     summarize_list(elem.getchildren())
    828     ['element', 'element', 'empty-element']
    829     []
    830     []
    831     []
    832     >>> for elem in tree.getiterator():
    833     ...     summarize_list(elem.getchildren())
    834     ['element', 'element', 'empty-element']
    835     []
    836     []
    837     []
    838 
    839     >>> elem = ET.XML(SAMPLE_XML)
    840     >>> len(elem.getchildren())
    841     3
    842     >>> len(elem[2].getchildren())
    843     1
    844     >>> elem[:] == elem.getchildren()
    845     True
    846     >>> child1 = elem[0]
    847     >>> child2 = elem[2]
    848     >>> del elem[1:2]
    849     >>> len(elem.getchildren())
    850     2
    851     >>> child1 == elem[0]
    852     True
    853     >>> child2 == elem[1]
    854     True
    855     >>> elem[0:2] = [child2, child1]
    856     >>> child2 == elem[0]
    857     True
    858     >>> child1 == elem[1]
    859     True
    860     >>> child1 == elem[0]
    861     False
    862     >>> elem.clear()
    863     >>> elem.getchildren()
    864     []
    865     """
    866 
    867 def writestring():
    868     """
    869     >>> elem = ET.XML("<html><body>text</body></html>")
    870     >>> ET.tostring(elem)
    871     '<html><body>text</body></html>'
    872     >>> elem = ET.fromstring("<html><body>text</body></html>")
    873     >>> ET.tostring(elem)
    874     '<html><body>text</body></html>'
    875     """
    876 
    877 def check_encoding(encoding):
    878     """
    879     >>> check_encoding("ascii")
    880     >>> check_encoding("us-ascii")
    881     >>> check_encoding("iso-8859-1")
    882     >>> check_encoding("iso-8859-15")
    883     >>> check_encoding("cp437")
    884     >>> check_encoding("mac-roman")
    885     """
    886     ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
    887 
    888 def encoding():
    889     r"""
    890     Test encoding issues.
    891 
    892     >>> elem = ET.Element("tag")
    893     >>> elem.text = u"abc"
    894     >>> serialize(elem)
    895     '<tag>abc</tag>'
    896     >>> serialize(elem, encoding="utf-8")
    897     '<tag>abc</tag>'
    898     >>> serialize(elem, encoding="us-ascii")
    899     '<tag>abc</tag>'
    900     >>> serialize(elem, encoding="iso-8859-1")
    901     "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
    902 
    903     >>> elem.text = "<&\"\'>"
    904     >>> serialize(elem)
    905     '<tag>&lt;&amp;"\'&gt;</tag>'
    906     >>> serialize(elem, encoding="utf-8")
    907     '<tag>&lt;&amp;"\'&gt;</tag>'
    908     >>> serialize(elem, encoding="us-ascii") # cdata characters

    909     '<tag>&lt;&amp;"\'&gt;</tag>'
    910     >>> serialize(elem, encoding="iso-8859-1")
    911     '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
    912 
    913     >>> elem.attrib["key"] = "<&\"\'>"
    914     >>> elem.text = None
    915     >>> serialize(elem)
    916     '<tag key="&lt;&amp;&quot;\'&gt;" />'
    917     >>> serialize(elem, encoding="utf-8")
    918     '<tag key="&lt;&amp;&quot;\'&gt;" />'
    919     >>> serialize(elem, encoding="us-ascii")
    920     '<tag key="&lt;&amp;&quot;\'&gt;" />'
    921     >>> serialize(elem, encoding="iso-8859-1")
    922     '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
    923 
    924     >>> elem.text = u'\xe5\xf6\xf6<>'
    925     >>> elem.attrib.clear()
    926     >>> serialize(elem)
    927     '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
    928     >>> serialize(elem, encoding="utf-8")
    929     '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
    930     >>> serialize(elem, encoding="us-ascii")
    931     '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
    932     >>> serialize(elem, encoding="iso-8859-1")
    933     "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
    934 
    935     >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
    936     >>> elem.text = None
    937     >>> serialize(elem)
    938     '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
    939     >>> serialize(elem, encoding="utf-8")
    940     '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
    941     >>> serialize(elem, encoding="us-ascii")
    942     '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
    943     >>> serialize(elem, encoding="iso-8859-1")
    944     '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
    945     """
    946 
    947 def methods():
    948     r"""
    949     Test serialization methods.
    950 
    951     >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
    952     >>> e.tail = "\n"
    953     >>> serialize(e)
    954     '<html><link /><script>1 &lt; 2</script></html>\n'
    955     >>> serialize(e, method=None)
    956     '<html><link /><script>1 &lt; 2</script></html>\n'
    957     >>> serialize(e, method="xml")
    958     '<html><link /><script>1 &lt; 2</script></html>\n'
    959     >>> serialize(e, method="html")
    960     '<html><link><script>1 < 2</script></html>\n'
    961     >>> serialize(e, method="text")
    962     '1 < 2\n'
    963     """
    964 
    965 def iterators():
    966     """
    967     Test iterators.
    968 
    969     >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
    970     >>> summarize_list(e.iter())
    971     ['html', 'body', 'i']
    972     >>> summarize_list(e.find("body").iter())
    973     ['body', 'i']
    974     >>> summarize(next(e.iter()))
    975     'html'
    976     >>> "".join(e.itertext())
    977     'this is a paragraph...'
    978     >>> "".join(e.find("body").itertext())
    979     'this is a paragraph.'
    980     >>> next(e.itertext())
    981     'this is a '
    982 
    983     Method iterparse should return an iterator. See bug 6472.
    984 
    985     >>> sourcefile = serialize(e, to_string=False)
    986     >>> next(ET.iterparse(sourcefile))  # doctest: +ELLIPSIS

    987     ('end', <Element 'i' at 0x...>)
    988 
    989     >>> tree = ET.ElementTree(None)
    990     >>> tree.iter()
    991     Traceback (most recent call last):
    992     AttributeError: 'NoneType' object has no attribute 'iter'
    993     """
    994 
    995 ENTITY_XML = """\
    996 <!DOCTYPE points [
    997 <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
    998 %user-entities;
    999 ]>
   1000 <document>&entity;</document>
   1001 """
   1002 
   1003 def entity():
   1004     """
   1005     Test entity handling.
   1006 
   1007     1) good entities
   1008 
   1009     >>> e = ET.XML("<document title='&#x8230;'>test</document>")
   1010     >>> serialize(e)
   1011     '<document title="&#33328;">test</document>'
   1012 
   1013     2) bad entities
   1014 
   1015     >>> ET.XML("<document>&entity;</document>")
   1016     Traceback (most recent call last):
   1017     ParseError: undefined entity: line 1, column 10
   1018 
   1019     >>> ET.XML(ENTITY_XML)
   1020     Traceback (most recent call last):
   1021     ParseError: undefined entity &entity;: line 5, column 10
   1022 
   1023     3) custom entity
   1024 
   1025     >>> parser = ET.XMLParser()
   1026     >>> parser.entity["entity"] = "text"
   1027     >>> parser.feed(ENTITY_XML)
   1028     >>> root = parser.close()
   1029     >>> serialize(root)
   1030     '<document>text</document>'
   1031     """
   1032 
   1033 def error(xml):
   1034     """
   1035 
   1036     Test error handling.
   1037 
   1038     >>> issubclass(ET.ParseError, SyntaxError)
   1039     True
   1040     >>> error("foo").position
   1041     (1, 0)
   1042     >>> error("<tag>&foo;</tag>").position
   1043     (1, 5)
   1044     >>> error("foobar<").position
   1045     (1, 6)
   1046 
   1047     """
   1048     try:
   1049         ET.XML(xml)
   1050     except ET.ParseError:
   1051         return sys.exc_value
   1052 
   1053 def namespace():
   1054     """
   1055     Test namespace issues.
   1056 
   1057     1) xml namespace
   1058 
   1059     >>> elem = ET.XML("<tag xml:lang='en' />")
   1060     >>> serialize(elem) # 1.1

   1061     '<tag xml:lang="en" />'
   1062 
   1063     2) other "well-known" namespaces
   1064 
   1065     >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
   1066     >>> serialize(elem) # 2.1

   1067     '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
   1068 
   1069     >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
   1070     >>> serialize(elem) # 2.2

   1071     '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
   1072 
   1073     >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
   1074     >>> serialize(elem) # 2.3

   1075     '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
   1076 
   1077     3) unknown namespaces
   1078     >>> elem = ET.XML(SAMPLE_XML_NS)
   1079     >>> print serialize(elem)
   1080     <ns0:body xmlns:ns0="http://effbot.org/ns">
   1081       <ns0:tag>text</ns0:tag>
   1082       <ns0:tag />
   1083       <ns0:section>
   1084         <ns0:tag>subtext</ns0:tag>
   1085       </ns0:section>
   1086     </ns0:body>
   1087     """
   1088 
   1089 def qname():
   1090     """
   1091     Test QName handling.
   1092 
   1093     1) decorated tags
   1094 
   1095     >>> elem = ET.Element("{uri}tag")
   1096     >>> serialize(elem) # 1.1

   1097     '<ns0:tag xmlns:ns0="uri" />'
   1098     >>> elem = ET.Element(ET.QName("{uri}tag"))
   1099     >>> serialize(elem) # 1.2

   1100     '<ns0:tag xmlns:ns0="uri" />'
   1101     >>> elem = ET.Element(ET.QName("uri", "tag"))
   1102     >>> serialize(elem) # 1.3

   1103     '<ns0:tag xmlns:ns0="uri" />'
   1104     >>> elem = ET.Element(ET.QName("uri", "tag"))
   1105     >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
   1106     >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
   1107     >>> serialize(elem) # 1.4

   1108     '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
   1109 
   1110     2) decorated attributes
   1111 
   1112     >>> elem.clear()
   1113     >>> elem.attrib["{uri}key"] = "value"
   1114     >>> serialize(elem) # 2.1

   1115     '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
   1116 
   1117     >>> elem.clear()
   1118     >>> elem.attrib[ET.QName("{uri}key")] = "value"
   1119     >>> serialize(elem) # 2.2

   1120     '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
   1121 
   1122     3) decorated values are not converted by default, but the
   1123        QName wrapper can be used for values
   1124 
   1125     >>> elem.clear()
   1126     >>> elem.attrib["{uri}key"] = "{uri}value"
   1127     >>> serialize(elem) # 3.1

   1128     '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
   1129 
   1130     >>> elem.clear()
   1131     >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
   1132     >>> serialize(elem) # 3.2

   1133     '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
   1134 
   1135     >>> elem.clear()
   1136     >>> subelem = ET.Element("tag")
   1137     >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
   1138     >>> elem.append(subelem)
   1139     >>> elem.append(subelem)
   1140     >>> serialize(elem) # 3.3

   1141     '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
   1142 
   1143     4) Direct QName tests
   1144 
   1145     >>> str(ET.QName('ns', 'tag'))
   1146     '{ns}tag'
   1147     >>> str(ET.QName('{ns}tag'))
   1148     '{ns}tag'
   1149     >>> q1 = ET.QName('ns', 'tag')
   1150     >>> q2 = ET.QName('ns', 'tag')
   1151     >>> q1 == q2
   1152     True
   1153     >>> q2 = ET.QName('ns', 'other-tag')
   1154     >>> q1 == q2
   1155     False
   1156     >>> q1 == 'ns:tag'
   1157     False
   1158     >>> q1 == '{ns}tag'
   1159     True
   1160     """
   1161 
   1162 def doctype_public():
   1163     """
   1164     Test PUBLIC doctype.
   1165 
   1166     >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
   1167     ...   ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
   1168     ...   ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
   1169     ...   '<html>text</html>')
   1170 
   1171     """
   1172 
   1173 def xpath_tokenizer(p):
   1174     """
   1175     Test the XPath tokenizer.
   1176 
   1177     >>> # tests from the xml specification

   1178     >>> xpath_tokenizer("*")
   1179     ['*']
   1180     >>> xpath_tokenizer("text()")
   1181     ['text', '()']
   1182     >>> xpath_tokenizer("@name")
   1183     ['@', 'name']
   1184     >>> xpath_tokenizer("@*")
   1185     ['@', '*']
   1186     >>> xpath_tokenizer("para[1]")
   1187     ['para', '[', '1', ']']
   1188     >>> xpath_tokenizer("para[last()]")
   1189     ['para', '[', 'last', '()', ']']
   1190     >>> xpath_tokenizer("*/para")
   1191     ['*', '/', 'para']
   1192     >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
   1193     ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
   1194     >>> xpath_tokenizer("chapter//para")
   1195     ['chapter', '//', 'para']
   1196     >>> xpath_tokenizer("//para")
   1197     ['//', 'para']
   1198     >>> xpath_tokenizer("//olist/item")
   1199     ['//', 'olist', '/', 'item']
   1200     >>> xpath_tokenizer(".")
   1201     ['.']
   1202     >>> xpath_tokenizer(".//para")
   1203     ['.', '//', 'para']
   1204     >>> xpath_tokenizer("..")
   1205     ['..']
   1206     >>> xpath_tokenizer("../@lang")
   1207     ['..', '/', '@', 'lang']
   1208     >>> xpath_tokenizer("chapter[title]")
   1209     ['chapter', '[', 'title', ']']
   1210     >>> xpath_tokenizer("employee[@secretary and @assistant]")
   1211     ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
   1212 
   1213     >>> # additional tests

   1214     >>> xpath_tokenizer("{http://spam}egg")
   1215     ['{http://spam}egg']
   1216     >>> xpath_tokenizer("./spam.egg")
   1217     ['.', '/', 'spam.egg']
   1218     >>> xpath_tokenizer(".//{http://spam}egg")
   1219     ['.', '//', '{http://spam}egg']
   1220     """
   1221     from xml.etree import ElementPath
   1222     out = []
   1223     for op, tag in ElementPath.xpath_tokenizer(p):
   1224         out.append(op or tag)
   1225     return out
   1226 
   1227 def processinginstruction():
   1228     """
   1229     Test ProcessingInstruction directly
   1230 
   1231     >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
   1232     '<?test instruction?>'
   1233     >>> ET.tostring(ET.PI('test', 'instruction'))
   1234     '<?test instruction?>'
   1235 
   1236     Issue #2746

   1237 
   1238     >>> ET.tostring(ET.PI('test', '<testing&>'))
   1239     '<?test <testing&>?>'
   1240     >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
   1241     "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
   1242     """
   1243 
   1244 #
   1245 # xinclude tests (samples from appendix C of the xinclude specification)
   1246 
   1247 XINCLUDE = {}
   1248 
   1249 XINCLUDE["C1.xml"] = """\
   1250 <?xml version='1.0'?>
   1251 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1252   <p>120 Mz is adequate for an average home user.</p>
   1253   <xi:include href="disclaimer.xml"/>
   1254 </document>
   1255 """
   1256 
   1257 XINCLUDE["disclaimer.xml"] = """\
   1258 <?xml version='1.0'?>
   1259 <disclaimer>
   1260   <p>The opinions represented herein represent those of the individual
   1261   and should not be interpreted as official policy endorsed by this
   1262   organization.</p>
   1263 </disclaimer>
   1264 """
   1265 
   1266 XINCLUDE["C2.xml"] = """\
   1267 <?xml version='1.0'?>
   1268 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1269   <p>This document has been accessed
   1270   <xi:include href="count.txt" parse="text"/> times.</p>
   1271 </document>
   1272 """
   1273 
   1274 XINCLUDE["count.txt"] = "324387"
   1275 
   1276 XINCLUDE["C2b.xml"] = """\
   1277 <?xml version='1.0'?>
   1278 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1279   <p>This document has been <em>accessed</em>
   1280   <xi:include href="count.txt" parse="text"/> times.</p>
   1281 </document>
   1282 """
   1283 
   1284 XINCLUDE["C3.xml"] = """\
   1285 <?xml version='1.0'?>
   1286 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1287   <p>The following is the source of the "data.xml" resource:</p>
   1288   <example><xi:include href="data.xml" parse="text"/></example>
   1289 </document>
   1290 """
   1291 
   1292 XINCLUDE["data.xml"] = """\
   1293 <?xml version='1.0'?>
   1294 <data>
   1295   <item><![CDATA[Brooks & Shields]]></item>
   1296 </data>
   1297 """
   1298 
   1299 XINCLUDE["C5.xml"] = """\
   1300 <?xml version='1.0'?>
   1301 <div xmlns:xi="http://www.w3.org/2001/XInclude">
   1302   <xi:include href="example.txt" parse="text">
   1303     <xi:fallback>
   1304       <xi:include href="fallback-example.txt" parse="text">
   1305         <xi:fallback><a href="mailto:bob (at] example.org">Report error</a></xi:fallback>
   1306       </xi:include>
   1307     </xi:fallback>
   1308   </xi:include>
   1309 </div>
   1310 """
   1311 
   1312 XINCLUDE["default.xml"] = """\
   1313 <?xml version='1.0'?>
   1314 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1315   <p>Example.</p>
   1316   <xi:include href="{}"/>
   1317 </document>
   1318 """.format(cgi.escape(SIMPLE_XMLFILE, True))
   1319 
   1320 def xinclude_loader(href, parse="xml", encoding=None):
   1321     try:
   1322         data = XINCLUDE[href]
   1323     except KeyError:
   1324         raise IOError("resource not found")
   1325     if parse == "xml":
   1326         from xml.etree.ElementTree import XML
   1327         return XML(data)
   1328     return data
   1329 
   1330 def xinclude():
   1331     r"""
   1332     Basic inclusion example (XInclude C.1)
   1333 
   1334     >>> from xml.etree import ElementTree as ET
   1335     >>> from xml.etree import ElementInclude
   1336 
   1337     >>> document = xinclude_loader("C1.xml")
   1338     >>> ElementInclude.include(document, xinclude_loader)
   1339     >>> print serialize(document) # C1

   1340     <document>
   1341       <p>120 Mz is adequate for an average home user.</p>
   1342       <disclaimer>
   1343       <p>The opinions represented herein represent those of the individual
   1344       and should not be interpreted as official policy endorsed by this
   1345       organization.</p>
   1346     </disclaimer>
   1347     </document>
   1348 
   1349     Textual inclusion example (XInclude C.2)
   1350 
   1351     >>> document = xinclude_loader("C2.xml")
   1352     >>> ElementInclude.include(document, xinclude_loader)
   1353     >>> print serialize(document) # C2

   1354     <document>
   1355       <p>This document has been accessed
   1356       324387 times.</p>
   1357     </document>
   1358 
   1359     Textual inclusion after sibling element (based on modified XInclude C.2)
   1360 
   1361     >>> document = xinclude_loader("C2b.xml")
   1362     >>> ElementInclude.include(document, xinclude_loader)
   1363     >>> print(serialize(document)) # C2b

   1364     <document>
   1365       <p>This document has been <em>accessed</em>
   1366       324387 times.</p>
   1367     </document>
   1368 
   1369     Textual inclusion of XML example (XInclude C.3)
   1370 
   1371     >>> document = xinclude_loader("C3.xml")
   1372     >>> ElementInclude.include(document, xinclude_loader)
   1373     >>> print serialize(document) # C3

   1374     <document>
   1375       <p>The following is the source of the "data.xml" resource:</p>
   1376       <example>&lt;?xml version='1.0'?&gt;
   1377     &lt;data&gt;
   1378       &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
   1379     &lt;/data&gt;
   1380     </example>
   1381     </document>
   1382 
   1383     Fallback example (XInclude C.5)
   1384     Note! Fallback support is not yet implemented
   1385 
   1386     >>> document = xinclude_loader("C5.xml")
   1387     >>> ElementInclude.include(document, xinclude_loader)
   1388     Traceback (most recent call last):
   1389     IOError: resource not found
   1390     >>> # print serialize(document) # C5

   1391     """
   1392 
   1393 def xinclude_default():
   1394     """
   1395     >>> from xml.etree import ElementInclude
   1396 
   1397     >>> document = xinclude_loader("default.xml")
   1398     >>> ElementInclude.include(document)
   1399     >>> print serialize(document) # default

   1400     <document>
   1401       <p>Example.</p>
   1402       <root>
   1403        <element key="value">text</element>
   1404        <element>text</element>tail
   1405        <empty-element />
   1406     </root>
   1407     </document>
   1408     """
   1409 
   1410 #
   1411 # badly formatted xi:include tags
   1412 
   1413 XINCLUDE_BAD = {}
   1414 
   1415 XINCLUDE_BAD["B1.xml"] = """\
   1416 <?xml version='1.0'?>
   1417 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1418   <p>120 Mz is adequate for an average home user.</p>
   1419   <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
   1420 </document>
   1421 """
   1422 
   1423 XINCLUDE_BAD["B2.xml"] = """\
   1424 <?xml version='1.0'?>
   1425 <div xmlns:xi="http://www.w3.org/2001/XInclude">
   1426     <xi:fallback></xi:fallback>
   1427 </div>
   1428 """
   1429 
   1430 def xinclude_failures():
   1431     r"""
   1432     Test failure to locate included XML file.
   1433 
   1434     >>> from xml.etree import ElementInclude
   1435 
   1436     >>> def none_loader(href, parser, encoding=None):
   1437     ...     return None
   1438 
   1439     >>> document = ET.XML(XINCLUDE["C1.xml"])
   1440     >>> ElementInclude.include(document, loader=none_loader)
   1441     Traceback (most recent call last):
   1442     FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
   1443 
   1444     Test failure to locate included text file.
   1445 
   1446     >>> document = ET.XML(XINCLUDE["C2.xml"])
   1447     >>> ElementInclude.include(document, loader=none_loader)
   1448     Traceback (most recent call last):
   1449     FatalIncludeError: cannot load 'count.txt' as 'text'
   1450 
   1451     Test bad parse type.
   1452 
   1453     >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
   1454     >>> ElementInclude.include(document, loader=none_loader)
   1455     Traceback (most recent call last):
   1456     FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
   1457 
   1458     Test xi:fallback outside xi:include.
   1459 
   1460     >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
   1461     >>> ElementInclude.include(document, loader=none_loader)
   1462     Traceback (most recent call last):
   1463     FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
   1464     """
   1465 
   1466 # --------------------------------------------------------------------
   1467 # reported bugs
   1468 
   1469 def bug_xmltoolkit21():
   1470     """
   1471 
   1472     marshaller gives obscure errors for non-string values
   1473 
   1474     >>> elem = ET.Element(123)
   1475     >>> serialize(elem) # tag

   1476     Traceback (most recent call last):
   1477     TypeError: cannot serialize 123 (type int)
   1478     >>> elem = ET.Element("elem")
   1479     >>> elem.text = 123
   1480     >>> serialize(elem) # text

   1481     Traceback (most recent call last):
   1482     TypeError: cannot serialize 123 (type int)
   1483     >>> elem = ET.Element("elem")
   1484     >>> elem.tail = 123
   1485     >>> serialize(elem) # tail

   1486     Traceback (most recent call last):
   1487     TypeError: cannot serialize 123 (type int)
   1488     >>> elem = ET.Element("elem")
   1489     >>> elem.set(123, "123")
   1490     >>> serialize(elem) # attribute key

   1491     Traceback (most recent call last):
   1492     TypeError: cannot serialize 123 (type int)
   1493     >>> elem = ET.Element("elem")
   1494     >>> elem.set("123", 123)
   1495     >>> serialize(elem) # attribute value

   1496     Traceback (most recent call last):
   1497     TypeError: cannot serialize 123 (type int)
   1498 
   1499     """
   1500 
   1501 def bug_xmltoolkit25():
   1502     """
   1503 
   1504     typo in ElementTree.findtext
   1505 
   1506     >>> elem = ET.XML(SAMPLE_XML)
   1507     >>> tree = ET.ElementTree(elem)
   1508     >>> tree.findtext("tag")
   1509     'text'
   1510     >>> tree.findtext("section/tag")
   1511     'subtext'
   1512 
   1513     """
   1514 
   1515 def bug_xmltoolkit28():
   1516     """
   1517 
   1518     .//tag causes exceptions
   1519 
   1520     >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
   1521     >>> summarize_list(tree.findall(".//thead"))
   1522     []
   1523     >>> summarize_list(tree.findall(".//tbody"))
   1524     ['tbody']
   1525 
   1526     """
   1527 
   1528 def bug_xmltoolkitX1():
   1529     """
   1530 
   1531     dump() doesn't flush the output buffer
   1532 
   1533     >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
   1534     >>> ET.dump(tree); sys.stdout.write("tail")
   1535     <doc><table><tbody /></table></doc>
   1536     tail
   1537 
   1538     """
   1539 
   1540 def bug_xmltoolkit39():
   1541     """
   1542 
   1543     non-ascii element and attribute names doesn't work
   1544 
   1545     >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
   1546     >>> ET.tostring(tree, "utf-8")
   1547     '<t\\xc3\\xa4g />'
   1548 
   1549     >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='v&#228;lue' />")
   1550     >>> tree.attrib
   1551     {u'\\xe4ttr': u'v\\xe4lue'}
   1552     >>> ET.tostring(tree, "utf-8")
   1553     '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
   1554 
   1555     >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
   1556     >>> ET.tostring(tree, "utf-8")
   1557     '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
   1558 
   1559     >>> tree = ET.Element(u"t\u00e4g")
   1560     >>> ET.tostring(tree, "utf-8")
   1561     '<t\\xc3\\xa4g />'
   1562 
   1563     >>> tree = ET.Element("tag")
   1564     >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
   1565     >>> ET.tostring(tree, "utf-8")
   1566     '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
   1567 
   1568     """
   1569 
   1570 def bug_xmltoolkit54():
   1571     """
   1572 
   1573     problems handling internally defined entities
   1574 
   1575     >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
   1576     >>> serialize(e)
   1577     '<doc>&#33328;</doc>'
   1578 
   1579     """
   1580 
   1581 def bug_xmltoolkit55():
   1582     """
   1583 
   1584     make sure we're reporting the first error, not the last
   1585 
   1586     >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
   1587     Traceback (most recent call last):
   1588     ParseError: undefined entity &ldots;: line 1, column 36
   1589 
   1590     """
   1591 
   1592 class ExceptionFile:
   1593     def read(self, x):
   1594         raise IOError
   1595 
   1596 def xmltoolkit60():
   1597     """
   1598 
   1599     Handle crash in stream source.
   1600     >>> tree = ET.parse(ExceptionFile())
   1601     Traceback (most recent call last):
   1602     IOError
   1603 
   1604     """
   1605 
   1606 XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
   1607 <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
   1608 <patent-application-publication>
   1609 <subdoc-abstract>
   1610 <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
   1611 </subdoc-abstract>
   1612 </patent-application-publication>"""
   1613 
   1614 
   1615 def xmltoolkit62():
   1616     """
   1617 
   1618     Don't crash when using custom entities.
   1619 
   1620     >>> xmltoolkit62()
   1621     u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
   1622 
   1623     """
   1624     ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
   1625     parser = ET.XMLTreeBuilder()
   1626     parser.entity.update(ENTITIES)
   1627     parser.feed(XMLTOOLKIT62_DOC)
   1628     t = parser.close()
   1629     return t.find('.//paragraph').text
   1630 
   1631 def xmltoolkit63():
   1632     """
   1633 
   1634     Check reference leak.
   1635     >>> xmltoolkit63()
   1636     >>> count = sys.getrefcount(None)
   1637     >>> for i in range(1000):
   1638     ...     xmltoolkit63()
   1639     >>> sys.getrefcount(None) - count
   1640     0
   1641 
   1642     """
   1643     tree = ET.TreeBuilder()
   1644     tree.start("tag", {})
   1645     tree.data("text")
   1646     tree.end("tag")
   1647 
   1648 # --------------------------------------------------------------------
   1649 
   1650 
   1651 def bug_200708_newline():
   1652     r"""
   1653 
   1654     Preserve newlines in attributes.
   1655 
   1656     >>> e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
   1657     >>> ET.tostring(e)
   1658     '<SomeTag text="def _f():&#10;  return 3&#10;" />'
   1659     >>> ET.XML(ET.tostring(e)).get("text")
   1660     'def _f():\n  return 3\n'
   1661     >>> ET.tostring(ET.XML(ET.tostring(e)))
   1662     '<SomeTag text="def _f():&#10;  return 3&#10;" />'
   1663 
   1664     """
   1665 
   1666 def bug_200708_close():
   1667     """
   1668 
   1669     Test default builder.
   1670     >>> parser = ET.XMLParser() # default

   1671     >>> parser.feed("<element>some text</element>")
   1672     >>> summarize(parser.close())
   1673     'element'
   1674 
   1675     Test custom builder.
   1676     >>> class EchoTarget:
   1677     ...     def close(self):
   1678     ...         return ET.Element("element") # simulate root

   1679     >>> parser = ET.XMLParser(EchoTarget())
   1680     >>> parser.feed("<element>some text</element>")
   1681     >>> summarize(parser.close())
   1682     'element'
   1683 
   1684     """
   1685 
   1686 def bug_200709_default_namespace():
   1687     """
   1688 
   1689     >>> e = ET.Element("{default}elem")
   1690     >>> s = ET.SubElement(e, "{default}elem")
   1691     >>> serialize(e, default_namespace="default") # 1

   1692     '<elem xmlns="default"><elem /></elem>'
   1693 
   1694     >>> e = ET.Element("{default}elem")
   1695     >>> s = ET.SubElement(e, "{default}elem")
   1696     >>> s = ET.SubElement(e, "{not-default}elem")
   1697     >>> serialize(e, default_namespace="default") # 2

   1698     '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
   1699 
   1700     >>> e = ET.Element("{default}elem")
   1701     >>> s = ET.SubElement(e, "{default}elem")
   1702     >>> s = ET.SubElement(e, "elem") # unprefixed name

   1703     >>> serialize(e, default_namespace="default") # 3

   1704     Traceback (most recent call last):
   1705     ValueError: cannot use non-qualified names with default_namespace option
   1706 
   1707     """
   1708 
   1709 def bug_200709_register_namespace():
   1710     """
   1711 
   1712     >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
   1713     '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
   1714     >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
   1715     >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
   1716     '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
   1717 
   1718     And the Dublin Core namespace is in the default list:
   1719 
   1720     >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
   1721     '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
   1722 
   1723     """
   1724 
   1725 def bug_200709_element_comment():
   1726     """
   1727 
   1728     Not sure if this can be fixed, really (since the serializer needs
   1729     ET.Comment, not cET.comment).
   1730 
   1731     >>> a = ET.Element('a')
   1732     >>> a.append(ET.Comment('foo'))
   1733     >>> a[0].tag == ET.Comment
   1734     True
   1735 
   1736     >>> a = ET.Element('a')
   1737     >>> a.append(ET.PI('foo'))
   1738     >>> a[0].tag == ET.PI
   1739     True
   1740 
   1741     """
   1742 
   1743 def bug_200709_element_insert():
   1744     """
   1745 
   1746     >>> a = ET.Element('a')
   1747     >>> b = ET.SubElement(a, 'b')
   1748     >>> c = ET.SubElement(a, 'c')
   1749     >>> d = ET.Element('d')
   1750     >>> a.insert(0, d)
   1751     >>> summarize_list(a)
   1752     ['d', 'b', 'c']
   1753     >>> a.insert(-1, d)
   1754     >>> summarize_list(a)
   1755     ['d', 'b', 'd', 'c']
   1756 
   1757     """
   1758 
   1759 def bug_200709_iter_comment():
   1760     """
   1761 
   1762     >>> a = ET.Element('a')
   1763     >>> b = ET.SubElement(a, 'b')
   1764     >>> comment_b = ET.Comment("TEST-b")
   1765     >>> b.append(comment_b)
   1766     >>> summarize_list(a.iter(ET.Comment))
   1767     ['<Comment>']
   1768 
   1769     """
   1770 
   1771 # --------------------------------------------------------------------
   1772 # reported on bugs.python.org
   1773 
   1774 def bug_1534630():
   1775     """
   1776 
   1777     >>> bob = ET.TreeBuilder()
   1778     >>> e = bob.data("data")
   1779     >>> e = bob.start("tag", {})
   1780     >>> e = bob.end("tag")
   1781     >>> e = bob.close()
   1782     >>> serialize(e)
   1783     '<tag />'
   1784 
   1785     """
   1786 
   1787 def check_issue6233():
   1788     """
   1789 
   1790     >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
   1791     >>> ET.tostring(e, 'ascii')
   1792     "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
   1793     >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
   1794     >>> ET.tostring(e, 'ascii')
   1795     "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
   1796 
   1797     """
   1798 
   1799 def check_issue3151():
   1800     """
   1801 
   1802     >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
   1803     >>> e.tag
   1804     '{${stuff}}localname'
   1805     >>> t = ET.ElementTree(e)
   1806     >>> ET.tostring(e)
   1807     '<ns0:localname xmlns:ns0="${stuff}" />'
   1808 
   1809     """
   1810 
   1811 def check_issue6565():
   1812     """
   1813 
   1814     >>> elem = ET.XML("<body><tag/></body>")
   1815     >>> summarize_list(elem)
   1816     ['tag']
   1817     >>> newelem = ET.XML(SAMPLE_XML)
   1818     >>> elem[:] = newelem[:]
   1819     >>> summarize_list(elem)
   1820     ['tag', 'tag', 'section']
   1821 
   1822     """
   1823 
   1824 # --------------------------------------------------------------------
   1825 
   1826 
   1827 class CleanContext(object):
   1828     """Provide default namespace mapping and path cache."""
   1829     checkwarnings = None
   1830 
   1831     def __init__(self, quiet=False):
   1832         if sys.flags.optimize >= 2:
   1833             # under -OO, doctests cannot be run and therefore not all warnings
   1834             # will be emitted
   1835             quiet = True
   1836         deprecations = (
   1837             # Search behaviour is broken if search path starts with "/".
   1838             ("This search is broken in 1.3 and earlier, and will be fixed "
   1839              "in a future version.  If you rely on the current behaviour, "
   1840              "change it to '.+'", FutureWarning),
   1841             # Element.getchildren() and Element.getiterator() are deprecated.
   1842             ("This method will be removed in future versions.  "
   1843              "Use .+ instead.", DeprecationWarning),
   1844             ("This method will be removed in future versions.  "
   1845              "Use .+ instead.", PendingDeprecationWarning),
   1846             # XMLParser.doctype() is deprecated.
   1847             ("This method of XMLParser is deprecated.  Define doctype.. "
   1848              "method on the TreeBuilder target.", DeprecationWarning))
   1849         self.checkwarnings = test_support.check_warnings(*deprecations,
   1850                                                          quiet=quiet)
   1851 
   1852     def __enter__(self):
   1853         from xml.etree import ElementTree
   1854         self._nsmap = ElementTree._namespace_map
   1855         self._path_cache = ElementTree.ElementPath._cache
   1856         # Copy the default namespace mapping
   1857         ElementTree._namespace_map = self._nsmap.copy()
   1858         # Copy the path cache (should be empty)
   1859         ElementTree.ElementPath._cache = self._path_cache.copy()
   1860         self.checkwarnings.__enter__()
   1861 
   1862     def __exit__(self, *args):
   1863         from xml.etree import ElementTree
   1864         # Restore mapping and path cache
   1865         ElementTree._namespace_map = self._nsmap
   1866         ElementTree.ElementPath._cache = self._path_cache
   1867         self.checkwarnings.__exit__(*args)
   1868 
   1869 
   1870 def test_main(module_name='xml.etree.ElementTree'):
   1871     from test import test_xml_etree
   1872 
   1873     use_py_module = (module_name == 'xml.etree.ElementTree')
   1874 
   1875     # The same doctests are used for both the Python and the C implementations
   1876     assert test_xml_etree.ET.__name__ == module_name
   1877 
   1878     # XXX the C module should give the same warnings as the Python module
   1879     with CleanContext(quiet=not use_py_module):
   1880         test_support.run_doctest(test_xml_etree, verbosity=True)
   1881 
   1882     # The module should not be changed by the tests
   1883     assert test_xml_etree.ET.__name__ == module_name
   1884 
   1885 if __name__ == '__main__':
   1886     test_main()
   1887